import * as wmill from "";
import puppeteer from "npm:puppeteer";
const loadingTimeToWait = 2000;
// 71 most common translations of "contact"
const commonContactTranslations = [
"seikswa ya",
"igbesi aye",
// Naive way of getting contact page
// It is not robust, since not all websites will follow this convention
// of having contact route
async function findContactURL(page: puppeteer.Page) {
const contactLink = await page.evaluate(
async (translations): Promise<string | null> => {
for (const contactTranslation of translations) {
try {
const contactLinkElement = await page.waitForSelector(
if (contactLinkElement && "href" in contactLinkElement) {
return contactLinkElement.href as string;
} catch (error) {
return null;
return contactLink;
function extractEmail(htmlContent: string) {
const emailPattern = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
const emails = htmlContent.match(emailPattern) || [];
return emails;
function isValidURL(url: string) {
try {
new URL(url);
return true;
} catch (error) {
return false;
interface Result {
email: string | null;
contact_page: string | null;
export async function main(websiteUrl: string, googleSheetId?: string) {
let result: Result = { contact_page: null, email: null };
if (!isValidURL(websiteUrl)) {
return result;
const browser = await puppeteer.launch();
const page = await browser.newPage();
try {
await page.goto(websiteUrl);
} catch (error) {
return result;
await page.waitForTimeout(loadingTimeToWait);
const contactURL = await findContactURL(page);
// Naive approach worked and page with "contact"
// route was found
if (contactURL) {
try {
await page.goto(contactURL);
} catch (error) {
return result;
await page.waitForTimeout(loadingTimeToWait);
const contactHtmlContent = await page.content();
const emails = extractEmail(contactHtmlContent);
if (emails.length > 0 && emails[0]) {
result = {
contact_page: contactURL,
email: emails[0],
await browser.close();
return result;
Submitted by pesjak.matej93 625 days ago