diff options
Diffstat (limited to 'data/extensions/uBlock0@raymondhill.net/js/static-dnr-filtering.js')
-rw-r--r-- | data/extensions/uBlock0@raymondhill.net/js/static-dnr-filtering.js | 509 |
1 files changed, 509 insertions, 0 deletions
diff --git a/data/extensions/uBlock0@raymondhill.net/js/static-dnr-filtering.js b/data/extensions/uBlock0@raymondhill.net/js/static-dnr-filtering.js new file mode 100644 index 0000000..cca012b --- /dev/null +++ b/data/extensions/uBlock0@raymondhill.net/js/static-dnr-filtering.js @@ -0,0 +1,509 @@ +/******************************************************************************* + + uBlock Origin - a comprehensive, efficient content blocker + Copyright (C) 2014-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +import * as sfp from './static-filtering-parser.js'; + +import { + CompiledListReader, + CompiledListWriter, +} from './static-filtering-io.js'; + +import { LineIterator } from './text-utils.js'; +import staticNetFilteringEngine from './static-net-filtering.js'; + +/******************************************************************************/ + +const isRegexOrPath = hn => hn.includes('/'); + +/******************************************************************************/ + +// Copied from cosmetic-filter.js for the time being to avoid unwanted +// dependencies + +const rePlainSelector = /^[#.][\w\\-]+/; +const rePlainSelectorEx = /^[^#.[(]+([#.][\w-]+)|([#.][\w-]+)$/; +const rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/; +const reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g; + +const keyFromSelector = selector => { + let key = ''; + let matches = rePlainSelector.exec(selector); + if ( matches ) { + key = matches[0]; + } else { + matches = rePlainSelectorEx.exec(selector); + if ( matches === null ) { return; } + key = matches[1] || matches[2]; + } + if ( key.indexOf('\\') === -1 ) { return key; } + matches = rePlainSelectorEscaped.exec(selector); + if ( matches === null ) { return; } + key = ''; + const escaped = matches[0]; + let beg = 0; + reEscapeSequence.lastIndex = 0; + for (;;) { + matches = reEscapeSequence.exec(escaped); + if ( matches === null ) { + return key + escaped.slice(beg); + } + key += escaped.slice(beg, matches.index); + beg = reEscapeSequence.lastIndex; + if ( matches[1].length === 1 ) { + key += matches[1]; + } else { + key += String.fromCharCode(parseInt(matches[1], 16)); + } + } +}; + +/******************************************************************************/ + +function addGenericCosmeticFilter(context, selector, isException) { + if ( selector === undefined ) { return; } + if ( selector.length <= 1 ) { return; } + if ( selector.charCodeAt(0) === 0x7B /* '{' */ ) { return; } + const key = keyFromSelector(selector); + if ( isException ) { + if ( context.genericCosmeticExceptions === undefined ) { + context.genericCosmeticExceptions = []; + } + context.genericCosmeticExceptions.push({ key, selector }); + return; + } + if ( context.genericCosmeticFilters === undefined ) { + context.genericCosmeticFilters = []; + } + context.genericCosmeticFilters.push({ key, selector }); +} + +/******************************************************************************/ + +function addExtendedToDNR(context, parser) { + if ( parser.isExtendedFilter() === false ) { return false; } + + // Scriptlet injection + if ( parser.isScriptletFilter() ) { + if ( parser.hasOptions() === false ) { return; } + if ( context.scriptletFilters === undefined ) { + context.scriptletFilters = new Map(); + } + const exception = parser.isException(); + const args = parser.getScriptletArgs(); + const argsToken = JSON.stringify(args); + for ( const { hn, not, bad } of parser.getExtFilterDomainIterator() ) { + if ( bad ) { continue; } + if ( exception ) { continue; } + if ( isRegexOrPath(hn) ) { continue; } + let details = context.scriptletFilters.get(argsToken); + if ( details === undefined ) { + context.scriptletFilters.set(argsToken, details = { args }); + if ( context.trustedSource ) { + details.trustedSource = true; + } + } + if ( not ) { + if ( details.excludeMatches === undefined ) { + details.excludeMatches = []; + } + details.excludeMatches.push(hn); + continue; + } + if ( details.matches === undefined ) { + details.matches = []; + } + if ( details.matches.includes('*') ) { continue; } + if ( hn === '*' ) { + details.matches = [ '*' ]; + continue; + } + details.matches.push(hn); + } + return; + } + + // Response header filtering + if ( parser.isResponseheaderFilter() ) { + if ( parser.hasError() ) { return; } + if ( parser.hasOptions() === false ) { return; } + if ( parser.isException() ) { return; } + const node = parser.getBranchFromType(sfp.NODE_TYPE_EXT_PATTERN_RESPONSEHEADER); + if ( node === 0 ) { return; } + const header = parser.getNodeString(node); + if ( context.responseHeaderRules === undefined ) { + context.responseHeaderRules = []; + } + const rule = { + action: { + responseHeaders: [ + { + header, + operation: 'remove', + } + ], + type: 'modifyHeaders' + }, + condition: { + resourceTypes: [ + 'main_frame', + 'sub_frame' + ] + }, + }; + for ( const { hn, not, bad } of parser.getExtFilterDomainIterator() ) { + if ( bad ) { continue; } + if ( isRegexOrPath(hn) ) { continue; } + if ( not ) { + if ( rule.condition.excludedInitiatorDomains === undefined ) { + rule.condition.excludedInitiatorDomains = []; + } + rule.condition.excludedInitiatorDomains.push(hn); + continue; + } + if ( hn === '*' ) { + if ( rule.condition.initiatorDomains !== undefined ) { + rule.condition.initiatorDomains = undefined; + } + continue; + } + if ( rule.condition.initiatorDomains === undefined ) { + rule.condition.initiatorDomains = []; + } + rule.condition.initiatorDomains.push(hn); + } + context.responseHeaderRules.push(rule); + return; + } + + // HTML filtering + if ( (parser.flavorBits & parser.BITFlavorExtHTML) !== 0 ) { + return; + } + + // Cosmetic filtering + + // Generic cosmetic filtering + if ( parser.hasOptions() === false ) { + const { compiled, exception } = parser.result; + addGenericCosmeticFilter(context, compiled, exception); + return; + } + + // Specific cosmetic filtering + // https://github.com/chrisaljoudi/uBlock/issues/151 + // Negated hostname means the filter applies to all non-negated hostnames + // of same filter OR globally if there is no non-negated hostnames. + if ( context.specificCosmeticFilters === undefined ) { + context.specificCosmeticFilters = new Map(); + } + const { compiled, exception, raw } = parser.result; + if ( compiled === undefined ) { + context.specificCosmeticFilters.set(`Invalid filter: ...##${raw}`, { + rejected: true + }); + return; + } + let details = context.specificCosmeticFilters.get(compiled); + let isGeneric = true; + for ( const { hn, not, bad } of parser.getExtFilterDomainIterator() ) { + if ( bad ) { continue; } + if ( not && exception ) { continue; } + isGeneric = false; + // TODO: Support regex- and path-based entries + if ( isRegexOrPath(hn) ) { continue; } + if ( details === undefined ) { + context.specificCosmeticFilters.set(compiled, details = {}); + } + if ( compiled.startsWith('{') === false ) { + details.key = keyFromSelector(compiled); + } + if ( exception || not ) { + if ( details.excludeMatches === undefined ) { + details.excludeMatches = []; + } + details.excludeMatches.push(hn); + continue; + } + if ( details.matches === undefined ) { + details.matches = []; + } + if ( details.matches.includes('*') ) { continue; } + if ( hn === '*' ) { + details.matches = [ '*' ]; + continue; + } + details.matches.push(hn); + } + if ( details === undefined ) { return; } + if ( exception ) { return; } + if ( compiled.startsWith('{') ) { return; } + if ( isGeneric ) { + addGenericCosmeticFilter(context, compiled, false); + } +} + +/******************************************************************************/ + +function addToDNR(context, list) { + const env = context.env || []; + const writer = new CompiledListWriter(); + const lineIter = new LineIterator( + sfp.utils.preparser.prune(list.text, env) + ); + const parser = new sfp.AstFilterParser({ + toDNR: true, + nativeCssHas: env.includes('native_css_has'), + badTypes: [ sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE ], + trustedSource: list.trustedSource || undefined, + }); + const compiler = staticNetFilteringEngine.createCompiler(); + + writer.properties.set('name', list.name); + compiler.start(writer); + + while ( lineIter.eot() === false ) { + let line = lineIter.next(); + while ( line.endsWith(' \\') ) { + if ( lineIter.peek(4) !== ' ' ) { break; } + line = line.slice(0, -2).trim() + lineIter.next().trim(); + } + + parser.parse(line); + + if ( parser.isComment() ) { + if ( line === `!#trusted on ${context.secret}` ) { + parser.options.trustedSource = true; + context.trustedSource = true; + } else if ( line === `!#trusted off ${context.secret}` ) { + parser.options.trustedSource = false; + context.trustedSource = false; + } + continue; + } + + if ( parser.isFilter() === false ) { continue; } + if ( parser.hasError() ) { + if ( parser.astError === sfp.AST_ERROR_OPTION_EXCLUDED ) { + context.invalid.add(`Incompatible with DNR: ${line}`); + } else { + context.invalid.add(`Rejected filter: ${line}`); + } + continue; + } + + if ( parser.isExtendedFilter() ) { + addExtendedToDNR(context, parser); + continue; + } + if ( parser.isNetworkFilter() === false ) { continue; } + + if ( compiler.compile(parser, writer) ) { continue; } + + if ( compiler.error !== undefined ) { + context.invalid.add(compiler.error); + } + } + + compiler.finish(writer); + + staticNetFilteringEngine.dnrFromCompiled( + 'add', + context, + new CompiledListReader(writer.toString()) + ); +} + +/******************************************************************************/ + +// Merge rules where possible by merging arrays of a specific property. +// +// https://github.com/uBlockOrigin/uBOL-home/issues/10#issuecomment-1304822579 +// Do not merge rules which have errors. + +function mergeRules(rulesetMap, mergeTarget) { + const sorter = (_, v) => { + if ( Array.isArray(v) ) { + return typeof v[0] === 'string' ? v.sort() : v; + } + if ( v instanceof Object ) { + const sorted = {}; + for ( const kk of Object.keys(v).sort() ) { + sorted[kk] = v[kk]; + } + return sorted; + } + return v; + }; + const ruleHasher = (rule, target) => { + return JSON.stringify(rule, (k, v) => { + if ( k.startsWith('_') ) { return; } + if ( k === target ) { return; } + return sorter(k, v); + }); + }; + const extractTargetValue = (obj, target) => { + for ( const [ k, v ] of Object.entries(obj) ) { + if ( Array.isArray(v) && k === target ) { return v; } + if ( v instanceof Object ) { + const r = extractTargetValue(v, target); + if ( r !== undefined ) { return r; } + } + } + }; + const extractTargetOwner = (obj, target) => { + for ( const [ k, v ] of Object.entries(obj) ) { + if ( Array.isArray(v) && k === target ) { return obj; } + if ( v instanceof Object ) { + const r = extractTargetOwner(v, target); + if ( r !== undefined ) { return r; } + } + } + }; + const mergeMap = new Map(); + for ( const [ id, rule ] of rulesetMap ) { + if ( rule._error !== undefined ) { continue; } + const hash = ruleHasher(rule, mergeTarget); + if ( mergeMap.has(hash) === false ) { + mergeMap.set(hash, []); + } + mergeMap.get(hash).push(id); + } + for ( const ids of mergeMap.values() ) { + if ( ids.length === 1 ) { continue; } + const leftHand = rulesetMap.get(ids[0]); + const leftHandSet = new Set( + extractTargetValue(leftHand, mergeTarget) || [] + ); + for ( let i = 1; i < ids.length; i++ ) { + const rightHandId = ids[i]; + const rightHand = rulesetMap.get(rightHandId); + const rightHandArray = extractTargetValue(rightHand, mergeTarget); + if ( rightHandArray !== undefined ) { + if ( leftHandSet.size !== 0 ) { + for ( const item of rightHandArray ) { + leftHandSet.add(item); + } + } + } else { + leftHandSet.clear(); + } + rulesetMap.delete(rightHandId); + } + const leftHandOwner = extractTargetOwner(leftHand, mergeTarget); + if ( leftHandSet.size > 1 ) { + //if ( leftHandOwner === undefined ) { debugger; } + leftHandOwner[mergeTarget] = Array.from(leftHandSet).sort(); + } else if ( leftHandSet.size === 0 ) { + if ( leftHandOwner !== undefined ) { + leftHandOwner[mergeTarget] = undefined; + } + } + } +} + +/******************************************************************************/ + +function finalizeRuleset(context, network) { + const ruleset = network.ruleset; + + // Assign rule ids + const rulesetMap = new Map(); + { + let ruleId = 1; + for ( const rule of ruleset ) { + rulesetMap.set(ruleId++, rule); + } + } + mergeRules(rulesetMap, 'resourceTypes'); + mergeRules(rulesetMap, 'removeParams'); + mergeRules(rulesetMap, 'initiatorDomains'); + mergeRules(rulesetMap, 'requestDomains'); + mergeRules(rulesetMap, 'responseHeaders'); + + // Convert back single-entry requestDomains into pattern-based filters + // https://github.com/uBlockOrigin/uBOL-home/issues/327 + // TODO: Remove when (if) Safari is changed to interpret requestDomains as + // in other browsers. + for ( const rule of rulesetMap.values() ) { + const { condition } = rule; + if ( condition?.requestDomains === undefined ) { continue; } + if ( condition.requestDomains.length !== 1 ) { continue; } + if ( condition.urlFilter !== undefined ) { continue; } + if ( condition.regexFilter !== undefined ) { continue; } + condition.urlFilter = `||${condition.requestDomains[0]}^`; + condition.requestDomains = undefined; + } + + // Patch id + const rulesetFinal = []; + { + let ruleId = 1; + for ( const rule of rulesetMap.values() ) { + if ( rule._error === undefined ) { + rule.id = ruleId++; + } else { + rule.id = 0; + } + rulesetFinal.push(rule); + } + for ( const invalid of context.invalid ) { + rulesetFinal.push({ _error: [ invalid ] }); + } + } + + network.ruleset = rulesetFinal; +} + +/******************************************************************************/ + +async function dnrRulesetFromRawLists(lists, options = {}) { + const context = Object.assign({}, options); + context.bad = options.networkBad; + staticNetFilteringEngine.dnrFromCompiled('begin', context); + context.extensionPaths = new Map(context.extensionPaths || []); + const toLoad = []; + const toDNR = (context, list) => addToDNR(context, list); + for ( const list of lists ) { + if ( list instanceof Promise ) { + toLoad.push(list.then(list => toDNR(context, list))); + } else { + toLoad.push(toDNR(context, list)); + } + } + await Promise.all(toLoad); + const result = { + network: staticNetFilteringEngine.dnrFromCompiled('end', context), + networkBad: context.bad, + genericCosmeticFilters: context.genericCosmeticFilters, + genericCosmeticExceptions: context.genericCosmeticExceptions, + specificCosmetic: context.specificCosmeticFilters, + scriptlet: context.scriptletFilters, + }; + if ( context.responseHeaderRules ) { + result.network.ruleset.push(...context.responseHeaderRules); + } + finalizeRuleset(context, result.network); + return result; +} + +/******************************************************************************/ + +export { dnrRulesetFromRawLists, mergeRules }; |