import puppeteer from 'puppeteer'; import fetch from 'node-fetch'; (async () => { const browser = await puppeteer.launch({ headless: false, product: 'firefox', executablePath: process.env.FIREFOX_BIN, }); const page = await browser.newPage(); const urlToVisit = process.argv[2]; await page.goto(urlToVisit); // Extract the results from the page. const gremium = await page.evaluate(() => { return document.querySelector('h1.smc_h1').textContent; }); // Extract the results from the page. const linksWithOrg = await page.evaluate(() => { return [...document.querySelectorAll('#smc_page_kp0040_contenttable1 .pename a.smce-a-u')].map((anchor) => { const party = anchor.parentElement.nextElementSibling.textContent; return { href: anchor.href, party, }; }); }); const MAX_WAIT_TIME = linksWithOrg.length * 1500; const members = await Promise.all(linksWithOrg.map(async ({ href, party }) => { await new Promise((resolve) => { setTimeout(() => resolve(), Math.random() * MAX_WAIT_TIME); }); const localPage = await browser.newPage(); await localPage.goto(href, { timeout: 120 * 1000 }); // Extract the results from the page. const vcardLink = await localPage.evaluate(() => { return document.querySelector('.smc-btn-vcard')?.href; }); if (!vcardLink) { const name = await localPage.evaluate(() => { return document.querySelector('.smc_h1')?.textContent || ''; }); const email = await localPage.evaluate(() => { const emailSpan = document.querySelector('.smcademail span'); if (!emailSpan) { return ''; } const before = window.getComputedStyle(emailSpan, '::before')?.content; const after = window.getComputedStyle(emailSpan, '::after')?.content; return (before + emailSpan.textContent + after).split('').filter(c => c !== '"' && c !== ' ').reverse().join(''); }); localPage.close(); return { gremium, name, party, email, }; } localPage.close(); const vcardResponse = await fetch(vcardLink); const vcard = await vcardResponse.text(); const email = vcard.match(/^EMAIL:(.*)$/m)?.[1] || ''; const [_, lastName, firstName] = vcard.match(/^N:([^;]+);([^;]+);.*$/m) || ['', '', '']; return { gremium, firstName, lastName, party, email, }; })); console.log([ 'Vorname;Nachname;E-Mail-Adresse;Partei;Gremium;', ...members.map(({ firstName, lastName, gremium, party, email }) => `${firstName};${lastName};${email};${party};${gremium};`) ].join('\n')); await browser.close(); })();