get-gremium-members/get-gremium-members.mjs

100 lines
2.7 KiB
JavaScript

import puppeteer from 'puppeteer';
import fetch from 'node-fetch';
(async () => {
const browser = await puppeteer.launch({
headless: false,
product: 'firefox',
executablePath: process.env.FIREFOX_BIN,
});
const page = await browser.newPage();
const urlToVisit = process.argv[2];
await page.goto(urlToVisit);
// Extract the results from the page.
const gremium = await page.evaluate(() => {
return document.querySelector('h1.smc_h1').textContent;
});
// Extract the results from the page.
const linksWithOrg = await page.evaluate(() => {
return [...document.querySelectorAll('#smc_page_kp0040_contenttable1 .pename a.smce-a-u')].map((anchor) => {
const party = anchor.parentElement.nextElementSibling.textContent;
return {
href: anchor.href,
party,
};
});
});
const MAX_WAIT_TIME = linksWithOrg.length * 1500;
const members = await Promise.all(linksWithOrg.map(async ({ href, party }) => {
await new Promise((resolve) => {
setTimeout(() => resolve(), Math.random() * MAX_WAIT_TIME);
});
const localPage = await browser.newPage();
await localPage.goto(href, { timeout: 120 * 1000 });
// Extract the results from the page.
const vcardLink = await localPage.evaluate(() => {
return document.querySelector('.smc-btn-vcard')?.href;
});
if (!vcardLink) {
const name = await localPage.evaluate(() => {
return document.querySelector('.smc_h1')?.textContent || '';
});
const email = await localPage.evaluate(() => {
const emailSpan = document.querySelector('.smcademail span');
if (!emailSpan) {
return '';
}
const before = window.getComputedStyle(emailSpan, '::before')?.content;
const after = window.getComputedStyle(emailSpan, '::after')?.content;
return (before + emailSpan.textContent + after).split('').filter(c => c !== '"' && c !== ' ').reverse().join('');
});
localPage.close();
return {
gremium,
name,
party,
email,
};
}
localPage.close();
const vcardResponse = await fetch(vcardLink);
const vcard = await vcardResponse.text();
const email = vcard.match(/^EMAIL:(.*)$/m)?.[1] || '';
const [_, lastName, firstName] = vcard.match(/^N:([^;]+);([^;]+);.*$/m) || ['', '', ''];
return {
gremium,
firstName,
lastName,
party,
email,
};
}));
console.log([
'Vorname;Nachname;E-Mail-Adresse;Partei;Gremium;',
...members.map(({
firstName,
lastName,
gremium,
party,
email
}) => `${firstName};${lastName};${email};${party};${gremium};`)
].join('\n'));
await browser.close();
})();