diff options
Diffstat (limited to 'data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js')
-rw-r--r-- | data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js | 449 |
1 files changed, 191 insertions, 258 deletions
diff --git a/data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js b/data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js index c8689c0..439f1b1 100644 --- a/data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js +++ b/data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js @@ -3,6 +3,7 @@ * * * Copyright (C) 2017 Nathan Nichols, Loic J. Duros, Nik Nyby * Copyright (C) 2018 Giorgio Maone +* Copyright (C) 2022 Yuchen Pei * * This file is part of GNU LibreJS. * @@ -35,292 +36,224 @@ //********************************************************************************************* -//Regexes taken from "contact_regex.js" in the current LibreJS -//Copyright (C) 2011, 2012, 2014 Loic J. Duros -//Copyright (C) 2014, 2015 Nik Nyby - -function debug(format, ...args) { - console.debug(`LibreJS - ${format}`, ...args); -} - -var myPort; - -debug("Injecting contact finder in %s", document.URL); +(() => { + function debug(format, ...args) { + console.debug(`LibreJS - ${format}`, ...args); + } -// email address regexp -var reEmail = /^mailto\:(admin|feedback|webmaster|info|contact|support|comments|team|help)\@[a-z0-9.\-]+\.[a-z]{2,4}$/i; + debug("Injecting contact finder in %s", document.URL); -var reAnyEmail = /^mailto\:.*?\@[a-z0-9\.\-]+\.[a-z]{2,4}$/i; + /** + * contactSearchStrings + * Contains arrays of strings classified by language + * and by degree of certainty. + */ + const CONTACT_FRAGS = + [ + // de + { + 'certain': [ + '^[\\s]*Kontakt os[\\s]*$', + '^[\\s]*Email Os[\\s]*$', + '^[\\s]*Kontakt[\\s]*$' + ], + 'probable': ['^[\\s]Kontakt', '^[\\s]*Email'], + 'uncertain': [ + '^[\\s]*Om Us', + '^[\\s]*Om', + 'Hvem vi er' + ] + }, + // en + { + 'certain': [ + '^[\\s]*Contact Us[\\s]*$', + '^[\\s]*Email Us[\\s]*$', + '^[\\s]*Contact[\\s]*$', + '^[\\s]*Feedback[\\s]*$', + '^[\\s]*Web.?site Feedback[\\s]*$' + ], + 'probable': ['^[\\s]*Contact', '^[\\s]*Email'], + 'uncertain': [ + '^[\\s]*About Us', + '^[\\s]*About', + 'Who we are', + 'Who I am', + 'Company Info', + 'Customer Service' + ] + }, + // es + { + 'certain': [ + '^[\\s]*contáctenos[\\s]*$', + '^[\\s]*Email[\\s]*$' + ], + 'probable': ['^[\\s]contáctenos', '^[\\s]*Email'], + 'uncertain': [ + 'Acerca de nosotros' + ] + }, + // fr + { + 'certain': [ + '^[\\s]*Contactez nous[\\s]*$', + '^[\\s]*(Nous )?contacter[\\s]*$', + '^[\\s]*Email[\\s]*$', + '^[\\s]*Contact[\\s]*$', + '^[\\s]*Commentaires[\\s]*$' + ], + 'probable': ['^[\\s]Contact', '^[\\s]*Email'], + 'uncertain': [ + '^[\\s]*(A|À) propos', + 'Qui nous sommes', + 'Qui suis(-| )?je', + 'Info', + 'Service Client(e|è)le' + ] + } + ]; -// twitter address regexp -var reTwitter = /twitter\.com\/(\!?#\/)?[a-z0-9]*/i; + const CONTACT_LINK_LIMIT = 5; -// identi.ca address regexp -var reIdentiCa = /identi\.ca\/(?!notice\/)[a-z0-9]*/i; + // Taken from http://emailregex.com/ + const EMAIL_REGEX = + new RegExp(/(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])/g); + //********************************************************************************************* -/** - * contactSearchStrings - * Contains arrays of strings classified by language - * and by degree of certainty. - */ -var contactStr = { - 'da': { - 'certain': [ - '^[\\s]*Kontakt os[\\s]*$', - '^[\\s]*Email Os[\\s]*$', - '^[\\s]*Kontakt[\\s]*$' - ], - 'probable': ['^[\\s]Kontakt', '^[\\s]*Email'], - 'uncertain': [ - '^[\\s]*Om Us', - '^[\\s]*Om', - 'Hvem vi er' - ] - }, - 'en': { - 'certain': [ - '^[\\s]*Contact Us[\\s]*$', - '^[\\s]*Email Us[\\s]*$', - '^[\\s]*Contact[\\s]*$', - '^[\\s]*Feedback[\\s]*$', - '^[\\s]*Web.?site Feedback[\\s]*$' - ], - 'probable': ['^[\\s]Contact', '^[\\s]*Email'], - 'uncertain': [ - '^[\\s]*About Us', - '^[\\s]*About', - 'Who we are', - 'Who I am', - 'Company Info', - 'Customer Service' - ] - }, - 'es': { - 'certain': [ - '^[\\s]*contáctenos[\\s]*$', - '^[\\s]*Email[\\s]*$' - ], - 'probable': ['^[\\s]contáctenos', '^[\\s]*Email'], - 'uncertain': [ - 'Acerca de nosotros' - ] - }, - 'fr': { - 'certain': [ - '^[\\s]*Contactez nous[\\s]*$', - '^[\\s]*(Nous )?contacter[\\s]*$', - '^[\\s]*Email[\\s]*$', - '^[\\s]*Contact[\\s]*$', - '^[\\s]*Commentaires[\\s]*$' - ], - 'probable': ['^[\\s]Contact', '^[\\s]*Email'], - 'uncertain': [ - '^[\\s]*(A|À) propos', - 'Qui nous sommes', - 'Qui suis(-| )?je', - 'Info', - 'Service Client(e|è)le' - ] + function findMatch(link, frag) { + const result = (link.innerText.match(new RegExp(frag, "g")) || []).filter(x => typeof x == "string"); + if (result.length) return true; + return false; } -}; -var usaPhoneNumber = new RegExp(/(?:\+ ?1 ?)?\(?[2-9]{1}[0-9]{2}\)?(?:\-|\.| )?[0-9]{3}(?:\-|\.| )[0-9]{4}(?:[^0-9])/mg); -// Taken from http://emailregex.com/ -var email_regex = new RegExp(/(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])/g); -//********************************************************************************************* - -var prefs; - -/** -* returns input with all elements not of type string removed -*/ -function remove_not_str(a) { - var new_a = []; - for (var i in a) { - if (typeof (a[i]) == "string") { - new_a.push(a[i]) - } - } - return new_a; -} -/** -* Tests all links on the page for regexes under a certain certainty level. -* -* Will return either the first regex match from the selected certainty level or all regexes that -* match on that certainty level. -* -* certainty_lvl can be "certain" > "probable" > "uncertain" -*/ -function attempt(certainty_lvl, first = true) { - // There needs to be some kind of max so that people can't troll by for example leaving a comment with a bunch of emails - // to cause LibreJS users to slow down. - var fail_flag = true; - var flag; - var matches = []; - var result = []; - var str_under_test = ""; - for (var i in document.links) { - if (typeof (document.links[i].innerText) != "string" || typeof (document.links[i].href) != "string") { - continue; - } - str_under_test = document.links[i].innerText + " " + document.links[i].href; - flag = true; - for (var j in contactStr) { - for (var k in contactStr[j][certainty_lvl]) { - if (flag) { - result = []; - result = str_under_test.match(new RegExp(contactStr[j][certainty_lvl][k], "g")); - result = remove_not_str(result); - if (result !== undefined && typeof (result[0]) == "string") { - if (first) { - return { "fail": false, "result": document.links[i] }; - } else { - //console.log(document.links[i].href + " matched " + contactStr[j][certainty_lvl][k]); - matches.push(document.links[i]); - fail_flag = false; - flag = false; - } - } + /** + * Tests all links on the page for regexes under a certain certainty level. + * + * Will return either all regex matches from the selected certainty level, + * up to a limit. + * + * certainty can be "certain" > "probable" > "uncertain" + */ + function attempt(certainty, limit) { + // There needs to be some kind of max so that people can't troll by for example leaving a comment with a bunch of emails + // to cause LibreJS users to slow down. + const matches = []; + const links = Array.from(document.links).filter(link => (typeof (link.innerText) === "string" || typeof (link.href) === "string")); + for (const link of links) { + for (const byLevel of CONTACT_FRAGS) { + for (const frag of byLevel[certainty]) { + findMatch(link, frag) && matches.push(link); + if (matches.length >= limit) return { 'fail': false, 'result': [link] }; } } } + return { "fail": matches.length === 0, "result": matches }; } - return { "fail": fail_flag, "result": matches }; -} -/** -* "LibreJS detects contact pages, email addresses that are likely to be owned by the -* maintainer of the site, Twitter and identi.ca links, and phone numbers." -*/ -function find_contacts() { - var all = document.documentElement.innerText; - var phone_num = []; - var twitlinks = []; - var identi = []; - var contact_pages = []; - var res = attempt("certain"); - var flag = true; - var type = ""; - if (res["fail"] == false) { - type = "certain"; - res = res["result"]; - flag = false; - } - if (flag) { - res = attempt("probable"); - if (res["fail"] == false) { - type = "probable"; - res = res["result"]; - flag = false; - } - } - if (flag) { - res = attempt("uncertain"); - if (res["fail"] == false) { - type = "uncertain"; - res = res["result"]; - flag = false; + /** + * "LibreJS detects contact pages and email addresses that are likely to be owned by the + * maintainer of the site." + */ + function findContacts() { + for (const type of ["certain", "probable", "uncertain"]) { + const attempted = attempt(type, CONTACT_LINK_LIMIT); + if (!attempted["fail"]) { + return [type, attempted["result"]]; + } } + return null; } - if (flag) { - return res; + + + function createWidget(id, tag, parent = document.body) { + const oldWidget = document.getElementById(id); + if (oldWidget) oldWidget.remove(); + const widget = parent.appendChild(document.createElement(tag)); + widget.id = id; + return widget; } - return [type, res]; -} + /** + * + * Creates the contact finder / complain UI as a semi-transparent overlay + * + */ -function createWidget(id, tag, parent = document.body) { - let widget = document.getElementById(id); - if (widget) widget.remove(); - widget = parent.appendChild(document.createElement(tag)); - widget.id = id; - return widget; -} + function main() { + const overlay = createWidget("_LibreJS_overlay", "div"); + const frame = createWidget("_LibreJS_frame", "iframe"); -/** -* -* Creates the contact finder / complain UI as a semi-transparent overlay -* -*/ + const close = () => { + frame.remove(); + overlay.remove(); + }; -function main() { - let overlay = createWidget("_LibreJS_overlay", "div"); - let frame = createWidget("_LibreJS_frame", "iframe"); + // Clicking the "outer area" closes the dialog. + overlay.addEventListener("click", close); - let close = () => { - frame.remove(); - overlay.remove(); - }; + const initFrame = prefs => { + debug("initFrame"); + const contentDoc = frame.contentWindow.document; - let closeListener = e => { - let t = e.currentTarget; - if (t.href) { // link navigation - if (t.href !== document.URL) { - if (t.href.includes("#")) { - window.addEventListener("hashchange", close); - } - return; + const addText = (text, tag, wherein) => { + el = wherein.appendChild(contentDoc.createElement(tag)); + el.textContent = text; } - } - close(); - }; - let makeCloser = clickable => clickable.addEventListener("click", closeListener); - makeCloser(overlay); + // Header of the dialog + const { body } = contentDoc; + body.id = "_LibreJS_dialog"; + addText('LibreJS Complaint', 'h1', body); + const closeButton = body.appendChild(contentDoc.createElement('button')); + closeButton.classList.toggle('close', true); + closeButton.textContent = 'x'; + closeButton.addEventListener("click", close); - let initFrame = () => { - debug("initFrame"); - let res = find_contacts(); - let contentDoc = frame.contentWindow.document; - let { body } = contentDoc; - body.id = "_LibreJS_dialog"; - body.innerHTML = `<h1>LibreJS Complaint</h1><button class='close'>x</button>`; - contentDoc.documentElement.appendChild(contentDoc.createElement("base")).target = "_top"; - let content = body.appendChild(contentDoc.createElement("div")); - content.id = "content"; - let addHTML = s => content.insertAdjacentHTML("beforeend", s); - if ("fail" in res) { - content.classList.toggle("_LibreJS_fail", true) - addHTML("<div>Could not guess any contact page for this site.</div>"); - } else { - addHTML("<h3>Contact info guessed for this site</h3>"); - if (typeof (res[1]) === "string") { - let a = contentDoc.createElement("a"); - a.href = a.textContent = res[1]; - content.appendChild(a); - } else if (typeof (res[1]) === "object") { - addHTML(`${res[0]}: ${res[1].outerHTML}`); - } - } + const content = body.appendChild(contentDoc.createElement("div")); + content.id = "content"; - let emails = document.documentElement.textContent.match(email_regex); - if (emails && (emails = Array.filter(emails, e => !!e)).length) { - addHTML("<h5>Possible email addresses:</h5>"); - let list = contentDoc.createElement("ul"); - for (let i = 0, max = Math.min(emails.length, 10); i < max; i++) { - let recipient = emails[i]; - let a = contentDoc.createElement("a"); - a.href = `mailto:${recipient}?subject${encodeURIComponent(prefs["pref_subject"]) - }&body=${encodeURIComponent(prefs["pref_body"]) - }`; - a.textContent = recipient; - list.appendChild(contentDoc.createElement("li")).appendChild(a); + // Add list of contact links + const res = findContacts(); + if (!res) { + content.classList.toggle("_LibreJS_fail", true); + addText('Could not guess any contact page for this site.', 'div', content); + } else { + addText('Contact info guessed for this site', 'h3', content); + addText(res[0] + ':', 'span', content); + const list = content.appendChild(contentDoc.createElement("ul")); + for (const link of res[1]) { + const a = contentDoc.createElement("a"); + a.href = link.href; + a.textContent = link.textContent; + list.appendChild(contentDoc.createElement("li")).appendChild(a); + } } - content.appendChild(list); - } - contentDoc.querySelectorAll(".close, a").forEach(makeCloser); - debug("frame initialized"); - } + // Add list of emails + const emails = (document.documentElement.textContent.match(EMAIL_REGEX) || []).filter(e => !!e); + if (emails.length) { + addText("Possible email addresses:", 'h5', content); + const list = content.appendChild(contentDoc.createElement("ul")); + for (const recipient of emails.slice(0, 10)) { + const a = contentDoc.createElement("a"); + a.href = `mailto:${recipient}?subject=${encodeURIComponent(prefs["pref_subject"]) + }&body=${encodeURIComponent(prefs["pref_body"]) + }`; + a.textContent = recipient; + list.appendChild(contentDoc.createElement("li")).appendChild(a); + } + } + // contentDoc.querySelectorAll(".close, a").forEach(makeCloser); + debug("frame initialized"); + } - frame.addEventListener("load", e => { - debug("frame loaded"); - myPort = browser.runtime.connect({ name: "contact_finder" }).onMessage.addListener(m => { - prefs = m; - initFrame(); + frame.addEventListener("load", _ => { + debug("frame loaded"); + browser.runtime.connect({ name: "contact_finder" }).onMessage.addListener(initFrame); }); - }); -} + } -main(); + main(); +})(); |