summaryrefslogtreecommitdiff
path: root/data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js
diff options
context:
space:
mode:
Diffstat (limited to 'data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js')
-rw-r--r--data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js449
1 files changed, 191 insertions, 258 deletions
diff --git a/data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js b/data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js
index c8689c0..439f1b1 100644
--- a/data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js
+++ b/data/extensions/jid1-KtlZuoiikVfFew@jetpack/content/contactFinder.js
@@ -3,6 +3,7 @@
* *
* Copyright (C) 2017 Nathan Nichols, Loic J. Duros, Nik Nyby
* Copyright (C) 2018 Giorgio Maone
+* Copyright (C) 2022 Yuchen Pei
*
* This file is part of GNU LibreJS.
*
@@ -35,292 +36,224 @@
//*********************************************************************************************
-//Regexes taken from "contact_regex.js" in the current LibreJS
-//Copyright (C) 2011, 2012, 2014 Loic J. Duros
-//Copyright (C) 2014, 2015 Nik Nyby
-
-function debug(format, ...args) {
- console.debug(`LibreJS - ${format}`, ...args);
-}
-
-var myPort;
-
-debug("Injecting contact finder in %s", document.URL);
+(() => {
+ function debug(format, ...args) {
+ console.debug(`LibreJS - ${format}`, ...args);
+ }
-// email address regexp
-var reEmail = /^mailto\:(admin|feedback|webmaster|info|contact|support|comments|team|help)\@[a-z0-9.\-]+\.[a-z]{2,4}$/i;
+ debug("Injecting contact finder in %s", document.URL);
-var reAnyEmail = /^mailto\:.*?\@[a-z0-9\.\-]+\.[a-z]{2,4}$/i;
+ /**
+ * contactSearchStrings
+ * Contains arrays of strings classified by language
+ * and by degree of certainty.
+ */
+ const CONTACT_FRAGS =
+ [
+ // de
+ {
+ 'certain': [
+ '^[\\s]*Kontakt os[\\s]*$',
+ '^[\\s]*Email Os[\\s]*$',
+ '^[\\s]*Kontakt[\\s]*$'
+ ],
+ 'probable': ['^[\\s]Kontakt', '^[\\s]*Email'],
+ 'uncertain': [
+ '^[\\s]*Om Us',
+ '^[\\s]*Om',
+ 'Hvem vi er'
+ ]
+ },
+ // en
+ {
+ 'certain': [
+ '^[\\s]*Contact Us[\\s]*$',
+ '^[\\s]*Email Us[\\s]*$',
+ '^[\\s]*Contact[\\s]*$',
+ '^[\\s]*Feedback[\\s]*$',
+ '^[\\s]*Web.?site Feedback[\\s]*$'
+ ],
+ 'probable': ['^[\\s]*Contact', '^[\\s]*Email'],
+ 'uncertain': [
+ '^[\\s]*About Us',
+ '^[\\s]*About',
+ 'Who we are',
+ 'Who I am',
+ 'Company Info',
+ 'Customer Service'
+ ]
+ },
+ // es
+ {
+ 'certain': [
+ '^[\\s]*contáctenos[\\s]*$',
+ '^[\\s]*Email[\\s]*$'
+ ],
+ 'probable': ['^[\\s]contáctenos', '^[\\s]*Email'],
+ 'uncertain': [
+ 'Acerca de nosotros'
+ ]
+ },
+ // fr
+ {
+ 'certain': [
+ '^[\\s]*Contactez nous[\\s]*$',
+ '^[\\s]*(Nous )?contacter[\\s]*$',
+ '^[\\s]*Email[\\s]*$',
+ '^[\\s]*Contact[\\s]*$',
+ '^[\\s]*Commentaires[\\s]*$'
+ ],
+ 'probable': ['^[\\s]Contact', '^[\\s]*Email'],
+ 'uncertain': [
+ '^[\\s]*(A|À) propos',
+ 'Qui nous sommes',
+ 'Qui suis(-| )?je',
+ 'Info',
+ 'Service Client(e|è)le'
+ ]
+ }
+ ];
-// twitter address regexp
-var reTwitter = /twitter\.com\/(\!?#\/)?[a-z0-9]*/i;
+ const CONTACT_LINK_LIMIT = 5;
-// identi.ca address regexp
-var reIdentiCa = /identi\.ca\/(?!notice\/)[a-z0-9]*/i;
+ // Taken from http://emailregex.com/
+ const EMAIL_REGEX =
+ new RegExp(/(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])/g);
+ //*********************************************************************************************
-/**
- * contactSearchStrings
- * Contains arrays of strings classified by language
- * and by degree of certainty.
- */
-var contactStr = {
- 'da': {
- 'certain': [
- '^[\\s]*Kontakt os[\\s]*$',
- '^[\\s]*Email Os[\\s]*$',
- '^[\\s]*Kontakt[\\s]*$'
- ],
- 'probable': ['^[\\s]Kontakt', '^[\\s]*Email'],
- 'uncertain': [
- '^[\\s]*Om Us',
- '^[\\s]*Om',
- 'Hvem vi er'
- ]
- },
- 'en': {
- 'certain': [
- '^[\\s]*Contact Us[\\s]*$',
- '^[\\s]*Email Us[\\s]*$',
- '^[\\s]*Contact[\\s]*$',
- '^[\\s]*Feedback[\\s]*$',
- '^[\\s]*Web.?site Feedback[\\s]*$'
- ],
- 'probable': ['^[\\s]Contact', '^[\\s]*Email'],
- 'uncertain': [
- '^[\\s]*About Us',
- '^[\\s]*About',
- 'Who we are',
- 'Who I am',
- 'Company Info',
- 'Customer Service'
- ]
- },
- 'es': {
- 'certain': [
- '^[\\s]*contáctenos[\\s]*$',
- '^[\\s]*Email[\\s]*$'
- ],
- 'probable': ['^[\\s]contáctenos', '^[\\s]*Email'],
- 'uncertain': [
- 'Acerca de nosotros'
- ]
- },
- 'fr': {
- 'certain': [
- '^[\\s]*Contactez nous[\\s]*$',
- '^[\\s]*(Nous )?contacter[\\s]*$',
- '^[\\s]*Email[\\s]*$',
- '^[\\s]*Contact[\\s]*$',
- '^[\\s]*Commentaires[\\s]*$'
- ],
- 'probable': ['^[\\s]Contact', '^[\\s]*Email'],
- 'uncertain': [
- '^[\\s]*(A|À) propos',
- 'Qui nous sommes',
- 'Qui suis(-| )?je',
- 'Info',
- 'Service Client(e|è)le'
- ]
+ function findMatch(link, frag) {
+ const result = (link.innerText.match(new RegExp(frag, "g")) || []).filter(x => typeof x == "string");
+ if (result.length) return true;
+ return false;
}
-};
-var usaPhoneNumber = new RegExp(/(?:\+ ?1 ?)?\(?[2-9]{1}[0-9]{2}\)?(?:\-|\.| )?[0-9]{3}(?:\-|\.| )[0-9]{4}(?:[^0-9])/mg);
-// Taken from http://emailregex.com/
-var email_regex = new RegExp(/(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])/g);
-//*********************************************************************************************
-
-var prefs;
-
-/**
-* returns input with all elements not of type string removed
-*/
-function remove_not_str(a) {
- var new_a = [];
- for (var i in a) {
- if (typeof (a[i]) == "string") {
- new_a.push(a[i])
- }
- }
- return new_a;
-}
-/**
-* Tests all links on the page for regexes under a certain certainty level.
-*
-* Will return either the first regex match from the selected certainty level or all regexes that
-* match on that certainty level.
-*
-* certainty_lvl can be "certain" > "probable" > "uncertain"
-*/
-function attempt(certainty_lvl, first = true) {
- // There needs to be some kind of max so that people can't troll by for example leaving a comment with a bunch of emails
- // to cause LibreJS users to slow down.
- var fail_flag = true;
- var flag;
- var matches = [];
- var result = [];
- var str_under_test = "";
- for (var i in document.links) {
- if (typeof (document.links[i].innerText) != "string" || typeof (document.links[i].href) != "string") {
- continue;
- }
- str_under_test = document.links[i].innerText + " " + document.links[i].href;
- flag = true;
- for (var j in contactStr) {
- for (var k in contactStr[j][certainty_lvl]) {
- if (flag) {
- result = [];
- result = str_under_test.match(new RegExp(contactStr[j][certainty_lvl][k], "g"));
- result = remove_not_str(result);
- if (result !== undefined && typeof (result[0]) == "string") {
- if (first) {
- return { "fail": false, "result": document.links[i] };
- } else {
- //console.log(document.links[i].href + " matched " + contactStr[j][certainty_lvl][k]);
- matches.push(document.links[i]);
- fail_flag = false;
- flag = false;
- }
- }
+ /**
+ * Tests all links on the page for regexes under a certain certainty level.
+ *
+ * Will return either all regex matches from the selected certainty level,
+ * up to a limit.
+ *
+ * certainty can be "certain" > "probable" > "uncertain"
+ */
+ function attempt(certainty, limit) {
+ // There needs to be some kind of max so that people can't troll by for example leaving a comment with a bunch of emails
+ // to cause LibreJS users to slow down.
+ const matches = [];
+ const links = Array.from(document.links).filter(link => (typeof (link.innerText) === "string" || typeof (link.href) === "string"));
+ for (const link of links) {
+ for (const byLevel of CONTACT_FRAGS) {
+ for (const frag of byLevel[certainty]) {
+ findMatch(link, frag) && matches.push(link);
+ if (matches.length >= limit) return { 'fail': false, 'result': [link] };
}
}
}
+ return { "fail": matches.length === 0, "result": matches };
}
- return { "fail": fail_flag, "result": matches };
-}
-/**
-* "LibreJS detects contact pages, email addresses that are likely to be owned by the
-* maintainer of the site, Twitter and identi.ca links, and phone numbers."
-*/
-function find_contacts() {
- var all = document.documentElement.innerText;
- var phone_num = [];
- var twitlinks = [];
- var identi = [];
- var contact_pages = [];
- var res = attempt("certain");
- var flag = true;
- var type = "";
- if (res["fail"] == false) {
- type = "certain";
- res = res["result"];
- flag = false;
- }
- if (flag) {
- res = attempt("probable");
- if (res["fail"] == false) {
- type = "probable";
- res = res["result"];
- flag = false;
- }
- }
- if (flag) {
- res = attempt("uncertain");
- if (res["fail"] == false) {
- type = "uncertain";
- res = res["result"];
- flag = false;
+ /**
+ * "LibreJS detects contact pages and email addresses that are likely to be owned by the
+ * maintainer of the site."
+ */
+ function findContacts() {
+ for (const type of ["certain", "probable", "uncertain"]) {
+ const attempted = attempt(type, CONTACT_LINK_LIMIT);
+ if (!attempted["fail"]) {
+ return [type, attempted["result"]];
+ }
}
+ return null;
}
- if (flag) {
- return res;
+
+
+ function createWidget(id, tag, parent = document.body) {
+ const oldWidget = document.getElementById(id);
+ if (oldWidget) oldWidget.remove();
+ const widget = parent.appendChild(document.createElement(tag));
+ widget.id = id;
+ return widget;
}
- return [type, res];
-}
+ /**
+ *
+ * Creates the contact finder / complain UI as a semi-transparent overlay
+ *
+ */
-function createWidget(id, tag, parent = document.body) {
- let widget = document.getElementById(id);
- if (widget) widget.remove();
- widget = parent.appendChild(document.createElement(tag));
- widget.id = id;
- return widget;
-}
+ function main() {
+ const overlay = createWidget("_LibreJS_overlay", "div");
+ const frame = createWidget("_LibreJS_frame", "iframe");
-/**
-*
-* Creates the contact finder / complain UI as a semi-transparent overlay
-*
-*/
+ const close = () => {
+ frame.remove();
+ overlay.remove();
+ };
-function main() {
- let overlay = createWidget("_LibreJS_overlay", "div");
- let frame = createWidget("_LibreJS_frame", "iframe");
+ // Clicking the "outer area" closes the dialog.
+ overlay.addEventListener("click", close);
- let close = () => {
- frame.remove();
- overlay.remove();
- };
+ const initFrame = prefs => {
+ debug("initFrame");
+ const contentDoc = frame.contentWindow.document;
- let closeListener = e => {
- let t = e.currentTarget;
- if (t.href) { // link navigation
- if (t.href !== document.URL) {
- if (t.href.includes("#")) {
- window.addEventListener("hashchange", close);
- }
- return;
+ const addText = (text, tag, wherein) => {
+ el = wherein.appendChild(contentDoc.createElement(tag));
+ el.textContent = text;
}
- }
- close();
- };
- let makeCloser = clickable => clickable.addEventListener("click", closeListener);
- makeCloser(overlay);
+ // Header of the dialog
+ const { body } = contentDoc;
+ body.id = "_LibreJS_dialog";
+ addText('LibreJS Complaint', 'h1', body);
+ const closeButton = body.appendChild(contentDoc.createElement('button'));
+ closeButton.classList.toggle('close', true);
+ closeButton.textContent = 'x';
+ closeButton.addEventListener("click", close);
- let initFrame = () => {
- debug("initFrame");
- let res = find_contacts();
- let contentDoc = frame.contentWindow.document;
- let { body } = contentDoc;
- body.id = "_LibreJS_dialog";
- body.innerHTML = `<h1>LibreJS Complaint</h1><button class='close'>x</button>`;
- contentDoc.documentElement.appendChild(contentDoc.createElement("base")).target = "_top";
- let content = body.appendChild(contentDoc.createElement("div"));
- content.id = "content";
- let addHTML = s => content.insertAdjacentHTML("beforeend", s);
- if ("fail" in res) {
- content.classList.toggle("_LibreJS_fail", true)
- addHTML("<div>Could not guess any contact page for this site.</div>");
- } else {
- addHTML("<h3>Contact info guessed for this site</h3>");
- if (typeof (res[1]) === "string") {
- let a = contentDoc.createElement("a");
- a.href = a.textContent = res[1];
- content.appendChild(a);
- } else if (typeof (res[1]) === "object") {
- addHTML(`${res[0]}: ${res[1].outerHTML}`);
- }
- }
+ const content = body.appendChild(contentDoc.createElement("div"));
+ content.id = "content";
- let emails = document.documentElement.textContent.match(email_regex);
- if (emails && (emails = Array.filter(emails, e => !!e)).length) {
- addHTML("<h5>Possible email addresses:</h5>");
- let list = contentDoc.createElement("ul");
- for (let i = 0, max = Math.min(emails.length, 10); i < max; i++) {
- let recipient = emails[i];
- let a = contentDoc.createElement("a");
- a.href = `mailto:${recipient}?subject${encodeURIComponent(prefs["pref_subject"])
- }&body=${encodeURIComponent(prefs["pref_body"])
- }`;
- a.textContent = recipient;
- list.appendChild(contentDoc.createElement("li")).appendChild(a);
+ // Add list of contact links
+ const res = findContacts();
+ if (!res) {
+ content.classList.toggle("_LibreJS_fail", true);
+ addText('Could not guess any contact page for this site.', 'div', content);
+ } else {
+ addText('Contact info guessed for this site', 'h3', content);
+ addText(res[0] + ':', 'span', content);
+ const list = content.appendChild(contentDoc.createElement("ul"));
+ for (const link of res[1]) {
+ const a = contentDoc.createElement("a");
+ a.href = link.href;
+ a.textContent = link.textContent;
+ list.appendChild(contentDoc.createElement("li")).appendChild(a);
+ }
}
- content.appendChild(list);
- }
- contentDoc.querySelectorAll(".close, a").forEach(makeCloser);
- debug("frame initialized");
- }
+ // Add list of emails
+ const emails = (document.documentElement.textContent.match(EMAIL_REGEX) || []).filter(e => !!e);
+ if (emails.length) {
+ addText("Possible email addresses:", 'h5', content);
+ const list = content.appendChild(contentDoc.createElement("ul"));
+ for (const recipient of emails.slice(0, 10)) {
+ const a = contentDoc.createElement("a");
+ a.href = `mailto:${recipient}?subject=${encodeURIComponent(prefs["pref_subject"])
+ }&body=${encodeURIComponent(prefs["pref_body"])
+ }`;
+ a.textContent = recipient;
+ list.appendChild(contentDoc.createElement("li")).appendChild(a);
+ }
+ }
+ // contentDoc.querySelectorAll(".close, a").forEach(makeCloser);
+ debug("frame initialized");
+ }
- frame.addEventListener("load", e => {
- debug("frame loaded");
- myPort = browser.runtime.connect({ name: "contact_finder" }).onMessage.addListener(m => {
- prefs = m;
- initFrame();
+ frame.addEventListener("load", _ => {
+ debug("frame loaded");
+ browser.runtime.connect({ name: "contact_finder" }).onMessage.addListener(initFrame);
});
- });
-}
+ }
-main();
+ main();
+})();