diff options
Diffstat (limited to 'data/extensions/uBlock0@raymondhill.net/js/html-filtering.js')
-rw-r--r-- | data/extensions/uBlock0@raymondhill.net/js/html-filtering.js | 442 |
1 files changed, 442 insertions, 0 deletions
diff --git a/data/extensions/uBlock0@raymondhill.net/js/html-filtering.js b/data/extensions/uBlock0@raymondhill.net/js/html-filtering.js new file mode 100644 index 0000000..9b5a504 --- /dev/null +++ b/data/extensions/uBlock0@raymondhill.net/js/html-filtering.js @@ -0,0 +1,442 @@ +/******************************************************************************* + + uBlock Origin - a comprehensive, efficient content blocker + Copyright (C) 2017-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js'; +import { entityFromHostname } from './uri-utils.js'; +import logger from './logger.js'; +import { sessionFirewall } from './filtering-engines.js'; +import µb from './background.js'; + +/******************************************************************************/ + +const pselectors = new Map(); +const duplicates = new Set(); + +const filterDB = new StaticExtFilteringHostnameDB(); + +let acceptedCount = 0; +let discardedCount = 0; +let docRegister; + +const htmlFilteringEngine = { + get acceptedCount() { + return acceptedCount; + }, + get discardedCount() { + return discardedCount; + }, + getFilterCount() { + return filterDB.size; + }, +}; + +const regexFromString = (s, exact = false) => { + if ( s === '' ) { return /^/; } + const match = /^\/(.+)\/([i]?)$/.exec(s); + if ( match !== null ) { + return new RegExp(match[1], match[2] || undefined); + } + const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(exact ? `^${reStr}$` : reStr, 'i'); +}; + +class PSelectorVoidTask { + constructor(task) { + console.info(`[uBO] HTML filtering: :${task[0]}() operator is not supported`); + } + transpose() { + } +} +class PSelectorHasTextTask { + constructor(task) { + this.needle = regexFromString(task[1]); + } + transpose(node, output) { + if ( this.needle.test(node.textContent) ) { + output.push(node); + } + } +} + +const PSelectorIfTask = class { + constructor(task) { + this.pselector = new PSelector(task[1]); + } + transpose(node, output) { + if ( this.pselector.test(node) === this.target ) { + output.push(node); + } + } +}; +PSelectorIfTask.prototype.target = true; + +class PSelectorIfNotTask extends PSelectorIfTask { +} +PSelectorIfNotTask.prototype.target = false; + +class PSelectorMinTextLengthTask { + constructor(task) { + this.min = task[1]; + } + transpose(node, output) { + if ( node.textContent.length >= this.min ) { + output.push(node); + } + } +} + +class PSelectorSpathTask { + constructor(task) { + this.spath = task[1]; + this.nth = /^(?:\s*[+~]|:)/.test(this.spath); + if ( this.nth ) { return; } + if ( /^\s*>/.test(this.spath) ) { + this.spath = `:scope ${this.spath.trim()}`; + } + } + transpose(node, output) { + const nodes = this.nth + ? PSelectorSpathTask.qsa(node, this.spath) + : node.querySelectorAll(this.spath); + for ( const node of nodes ) { + output.push(node); + } + } + // Helper method for other operators. + static qsa(node, selector) { + const parent = node.parentElement; + if ( parent === null ) { return []; } + let pos = 1; + for (;;) { + node = node.previousElementSibling; + if ( node === null ) { break; } + pos += 1; + } + return parent.querySelectorAll( + `:scope > :nth-child(${pos})${selector}` + ); + } +} + +class PSelectorUpwardTask { + constructor(task) { + const arg = task[1]; + if ( typeof arg === 'number' ) { + this.i = arg; + } else { + this.s = arg; + } + } + transpose(node, output) { + if ( this.s !== '' ) { + const parent = node.parentElement; + if ( parent === null ) { return; } + node = parent.closest(this.s); + if ( node === null ) { return; } + } else { + let nth = this.i; + for (;;) { + node = node.parentElement; + if ( node === null ) { return; } + nth -= 1; + if ( nth === 0 ) { break; } + } + } + output.push(node); + } +} +PSelectorUpwardTask.prototype.i = 0; +PSelectorUpwardTask.prototype.s = ''; + +class PSelectorXpathTask { + constructor(task) { + this.xpe = task[1]; + } + transpose(node, output) { + const xpr = docRegister.evaluate( + this.xpe, + node, + null, + XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, + null + ); + let j = xpr.snapshotLength; + while ( j-- ) { + const node = xpr.snapshotItem(j); + if ( node.nodeType === 1 ) { + output.push(node); + } + } + } +} + +class PSelector { + constructor(o) { + this.raw = o.raw; + this.selector = o.selector; + this.tasks = []; + if ( !o.tasks ) { return; } + for ( const task of o.tasks ) { + const ctor = this.operatorToTaskMap.get(task[0]) || PSelectorVoidTask; + const pselector = new ctor(task); + this.tasks.push(pselector); + } + } + prime(input) { + const root = input || docRegister; + if ( this.selector === '' ) { return [ root ]; } + if ( input !== docRegister && /^ ?[>+~]/.test(this.selector) ) { + return Array.from(PSelectorSpathTask.qsa(input, this.selector)); + } + return Array.from(root.querySelectorAll(this.selector)); + } + exec(input) { + let nodes = this.prime(input); + for ( const task of this.tasks ) { + if ( nodes.length === 0 ) { break; } + const transposed = []; + for ( const node of nodes ) { + task.transpose(node, transposed); + } + nodes = transposed; + } + return nodes; + } + test(input) { + const nodes = this.prime(input); + for ( const node of nodes ) { + let output = [ node ]; + for ( const task of this.tasks ) { + const transposed = []; + for ( const node of output ) { + task.transpose(node, transposed); + } + output = transposed; + if ( output.length === 0 ) { break; } + } + if ( output.length !== 0 ) { return true; } + } + return false; + } +} +PSelector.prototype.operatorToTaskMap = new Map([ + [ 'has', PSelectorIfTask ], + [ 'has-text', PSelectorHasTextTask ], + [ 'if', PSelectorIfTask ], + [ 'if-not', PSelectorIfNotTask ], + [ 'min-text-length', PSelectorMinTextLengthTask ], + [ 'not', PSelectorIfNotTask ], + [ 'nth-ancestor', PSelectorUpwardTask ], + [ 'spath', PSelectorSpathTask ], + [ 'upward', PSelectorUpwardTask ], + [ 'xpath', PSelectorXpathTask ], +]); + +function logOne(details, exception, selector) { + µb.filteringContext + .duplicate() + .fromTabId(details.tabId) + .setRealm('extended') + .setType('dom') + .setURL(details.url) + .setDocOriginFromURL(details.url) + .setFilter({ + source: 'extended', + raw: `${exception === 0 ? '##' : '#@#'}^${selector}`, + }) + .toLogger(); +} + +function applyProceduralSelector(details, selector) { + let pselector = pselectors.get(selector); + if ( pselector === undefined ) { + pselector = new PSelector(JSON.parse(selector)); + pselectors.set(selector, pselector); + } + const nodes = pselector.exec(); + let modified = false; + for ( const node of nodes ) { + node.remove(); + modified = true; + } + if ( modified && logger.enabled ) { + logOne(details, 0, pselector.raw); + } + return modified; +} + +function applyCSSSelector(details, selector) { + const nodes = docRegister.querySelectorAll(selector); + let modified = false; + for ( const node of nodes ) { + node.remove(); + modified = true; + } + if ( modified && logger.enabled ) { + logOne(details, 0, selector); + } + return modified; +} + +function logError(writer, msg) { + logger.writeOne({ + realm: 'message', + type: 'error', + text: msg.replace('{who}', writer.properties.get('name') || '?') + }); +} + +htmlFilteringEngine.reset = function() { + filterDB.clear(); + pselectors.clear(); + duplicates.clear(); + acceptedCount = 0; + discardedCount = 0; +}; + +htmlFilteringEngine.freeze = function() { + duplicates.clear(); + filterDB.collectGarbage(); +}; + +htmlFilteringEngine.compile = function(parser, writer) { + const isException = parser.isException(); + const { raw, compiled } = parser.result; + if ( compiled === undefined ) { + return logError(writer, `Invalid HTML filter in {who}: ##${raw}`); + } + + writer.select('HTML_FILTERS'); + + // Only exception filters are allowed to be global. + if ( parser.hasOptions() === false ) { + if ( isException ) { + writer.push([ 64, '', 1, compiled ]); + } + return; + } + + const compiledFilters = []; + let hasOnlyNegated = true; + for ( const { hn, not, bad } of parser.getExtFilterDomainIterator() ) { + if ( bad ) { continue; } + const prefix = ((isException ? 1 : 0) ^ (not ? 1 : 0)) ? '-' : '+'; + if ( not === false ) { + hasOnlyNegated = false; + } + compiledFilters.push([ 64, hn, `${prefix}${compiled}` ]); + } + + // Not allowed since it's equivalent to forbidden generic HTML filters + if ( isException === false && hasOnlyNegated ) { + return logError(writer, `Invalid HTML filter in {who}: ##${raw}`); + } + + writer.pushMany(compiledFilters); +}; + +htmlFilteringEngine.fromCompiledContent = function(reader) { + // Don't bother loading filters if stream filtering is not supported. + if ( µb.canFilterResponseData === false ) { return; } + + reader.select('HTML_FILTERS'); + + while ( reader.next() ) { + acceptedCount += 1; + const fingerprint = reader.fingerprint(); + if ( duplicates.has(fingerprint) ) { + discardedCount += 1; + continue; + } + duplicates.add(fingerprint); + const args = reader.args(); + filterDB.store(args[1], args[2]); + } +}; + +htmlFilteringEngine.retrieve = function(fctxt) { + const all = new Set(); + const hostname = fctxt.getHostname(); + filterDB.retrieveSpecifics(all, hostname); + const entity = entityFromHostname(hostname, fctxt.getDomain()); + filterDB.retrieveSpecifics(all, entity); + filterDB.retrieveSpecificsByRegex(all, hostname, fctxt.url); + filterDB.retrieveGenerics(all); + if ( all.size === 0 ) { return; } + + // https://github.com/gorhill/uBlock/issues/2835 + // Do not filter if the site is under an `allow` rule. + if ( µb.userSettings.advancedUserEnabled ) { + if ( sessionFirewall.evaluateCellZY(hostname, hostname, '*') === 2 ) { return; } + } + + // Split filters in different groups + const plains = new Set(); + const procedurals = new Set(); + for ( const s of all ) { + if ( s.charCodeAt(0) === 0x2D /* - */ ) { continue; } + const selector = s.slice(1); + const isProcedural = selector.startsWith('{'); + if ( all.has(`-${selector}`) ) { + logOne(fctxt, 1, isProcedural ? JSON.parse(selector).raw : selector); + } else if ( isProcedural ) { + procedurals.add(selector); + } else { + plains.add(selector); + } + } + + if ( plains.size === 0 && procedurals.size === 0 ) { return; } + + return { plains, procedurals }; +}; + +htmlFilteringEngine.apply = function(doc, details, selectors) { + docRegister = doc; + let modified = false; + for ( const selector of selectors.plains ) { + if ( applyCSSSelector(details, selector) ) { + modified = true; + } + } + for ( const selector of selectors.procedurals ) { + if ( applyProceduralSelector(details, selector) ) { + modified = true; + } + } + docRegister = undefined; + return modified; +}; + +htmlFilteringEngine.toSelfie = function() { + return filterDB.toSelfie(); +}; + +htmlFilteringEngine.fromSelfie = function(selfie) { + filterDB.fromSelfie(selfie); + pselectors.clear(); +}; + +/******************************************************************************/ + +export default htmlFilteringEngine; + +/******************************************************************************/ |