/******************************************************************************* uBlock Origin - a comprehensive, efficient content blocker Copyright (C) 2014-present Raymond Hill This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see {http://www.gnu.org/licenses/}. Home: https://github.com/gorhill/uBlock */ import * as sfp from './static-filtering-parser.js'; import { domainFromHostname, hostnameFromNetworkURL } from './uri-utils.js'; import { dropTask, queueTask } from './tasks.js'; import { isRE2, toHeaderPattern, tokenizableStrFromRegex } from './regex-analyzer.js'; import BidiTrieContainer from './biditrie.js'; import { CompiledListReader } from './static-filtering-io.js'; import { FilteringContext } from './filtering-context.js'; import HNTrieContainer from './hntrie.js'; import { urlSkip } from './urlskip.js'; /******************************************************************************/ // Access to a key-val store is optional and useful only for optimal // initialization at module load time. Probably could re-arrange code // to export an init() function with optimization parameters which would // need to be called by module clients. For now, I want modularizing with // minimal amount of changes. const keyvalStore = typeof vAPI !== 'undefined' ? vAPI.localStorage : { getItem() { return null; }, setItem() {}, removeItem() {} }; /******************************************************************************/ // 10fedcba9876543210 // |||||||| | || | // |||||||| | || | // |||||||| | || | // |||||||| | || | // |||||||| | || +---- bit 0- 1: block=0, allow=1, block important=2 // |||||||| | |+------ bit 2: unused // |||||||| | +------- bit 3- 4: party [0-3] // |||||||| +--------- bit 5- 9: type [0-31] // |||||||+-------------- bit 10: headers-based filters // ||||||+--------------- bit 11: redirect filters // |||||+---------------- bit 12: removeparam filters // ||||+----------------- bit 13: csp filters // |||+------------------ bit 14: permissions filters // ||+------------------- bit 15: uritransform filters // |+-------------------- bit 16: replace filters // +--------------------- bit 17: urlskip filters // TODO: bit 11-17 could be converted into 3-bit value, as these options are not // meant to be combined. const BLOCK_REALM = 0b0000_0000_0000_0000_0000; const ALLOW_REALM = 0b0000_0000_0000_0000_0001; const IMPORTANT_REALM = 0b0000_0000_0000_0000_0010; const BLOCKALLOW_REALM = BLOCK_REALM | ALLOW_REALM | IMPORTANT_REALM; const BLOCKIMPORTANT_REALM = BLOCK_REALM | IMPORTANT_REALM; const ANYPARTY_REALM = 0b0000_0000_0000_0000_0000; const FIRSTPARTY_REALM = 0b0000_0000_0000_0000_1000; const THIRDPARTY_REALM = 0b0000_0000_0000_0001_0000; const ALLPARTIES_REALM = FIRSTPARTY_REALM | THIRDPARTY_REALM; const TYPE_REALM = 0b0000_0000_0011_1110_0000; const HEADERS_REALM = 0b0000_0000_0100_0000_0000; const REDIRECT_REALM = 0b0000_0000_1000_0000_0000; const REMOVEPARAM_REALM = 0b0000_0001_0000_0000_0000; const CSP_REALM = 0b0000_0010_0000_0000_0000; const PERMISSIONS_REALM = 0b0000_0100_0000_0000_0000; const URLTRANSFORM_REALM = 0b0000_1000_0000_0000_0000; const REPLACE_REALM = 0b0001_0000_0000_0000_0000; const URLSKIP_REALM = 0b0010_0000_0000_0000_0000; const MODIFY_REALMS = REDIRECT_REALM | CSP_REALM | REMOVEPARAM_REALM | PERMISSIONS_REALM | URLTRANSFORM_REALM | REPLACE_REALM | URLSKIP_REALM; const TYPE_REALM_OFFSET = 5; const typeNameToTypeValue = { 'no_type': 0 << TYPE_REALM_OFFSET, 'stylesheet': 1 << TYPE_REALM_OFFSET, 'image': 2 << TYPE_REALM_OFFSET, 'object': 3 << TYPE_REALM_OFFSET, 'object_subrequest': 3 << TYPE_REALM_OFFSET, 'script': 4 << TYPE_REALM_OFFSET, 'fetch': 5 << TYPE_REALM_OFFSET, 'xmlhttprequest': 5 << TYPE_REALM_OFFSET, 'sub_frame': 6 << TYPE_REALM_OFFSET, 'font': 7 << TYPE_REALM_OFFSET, 'media': 8 << TYPE_REALM_OFFSET, 'websocket': 9 << TYPE_REALM_OFFSET, 'beacon': 10 << TYPE_REALM_OFFSET, 'ping': 10 << TYPE_REALM_OFFSET, 'other': 11 << TYPE_REALM_OFFSET, 'popup': 12 << TYPE_REALM_OFFSET, // start of behavioral filtering 'popunder': 13 << TYPE_REALM_OFFSET, 'main_frame': 14 << TYPE_REALM_OFFSET, // start of 1p behavioral filtering 'generichide': 15 << TYPE_REALM_OFFSET, 'specifichide': 16 << TYPE_REALM_OFFSET, 'inline-font': 17 << TYPE_REALM_OFFSET, 'inline-script': 18 << TYPE_REALM_OFFSET, 'cname': 19 << TYPE_REALM_OFFSET, 'webrtc': 20 << TYPE_REALM_OFFSET, 'unsupported': 21 << TYPE_REALM_OFFSET, }; const otherTypeBitValue = typeNameToTypeValue.other; const bitFromType = type => 1 << ((typeNameToTypeValue[type] >>> TYPE_REALM_OFFSET) - 1); // All network request types to bitmap // bring origin to 0 (from TYPE_REALM_OFFSET -- see typeNameToTypeValue) // left-shift 1 by the above-calculated value // subtract 1 to set all type bits const allNetworkTypesBits = (1 << (otherTypeBitValue >>> TYPE_REALM_OFFSET)) - 1; const allTypesBits = allNetworkTypesBits | 1 << (typeNameToTypeValue['popup'] >>> TYPE_REALM_OFFSET) - 1 | 1 << (typeNameToTypeValue['main_frame'] >>> TYPE_REALM_OFFSET) - 1 | 1 << (typeNameToTypeValue['inline-font'] >>> TYPE_REALM_OFFSET) - 1 | 1 << (typeNameToTypeValue['inline-script'] >>> TYPE_REALM_OFFSET) - 1; const unsupportedTypeBit = 1 << (typeNameToTypeValue['unsupported'] >>> TYPE_REALM_OFFSET) - 1; const typeValueToTypeName = [ '', 'stylesheet', 'image', 'object', 'script', 'xhr', 'frame', 'font', 'media', 'websocket', 'ping', 'other', 'popup', 'popunder', 'document', 'generichide', 'specifichide', 'inline-font', 'inline-script', 'cname', '', '', 'webrtc', 'unsupported', ]; const typeValueToDNRTypeName = [ '', 'stylesheet', 'image', 'object', 'script', 'xmlhttprequest', 'sub_frame', 'font', 'media', 'websocket', 'ping', 'other', ]; // Do not change order. Compiled filter lists rely on this order being // consistent across sessions. const MODIFIER_TYPE_REDIRECT = 1; const MODIFIER_TYPE_REDIRECTRULE = 2; const MODIFIER_TYPE_REMOVEPARAM = 3; const MODIFIER_TYPE_CSP = 4; const MODIFIER_TYPE_PERMISSIONS = 5; const MODIFIER_TYPE_URLTRANSFORM = 6; const MODIFIER_TYPE_REPLACE = 7; const MODIFIER_TYPE_URLSKIP = 8; const modifierBitsFromType = new Map([ [ MODIFIER_TYPE_REDIRECT, REDIRECT_REALM ], [ MODIFIER_TYPE_REDIRECTRULE, REDIRECT_REALM ], [ MODIFIER_TYPE_REMOVEPARAM, REMOVEPARAM_REALM ], [ MODIFIER_TYPE_CSP, CSP_REALM ], [ MODIFIER_TYPE_PERMISSIONS, PERMISSIONS_REALM ], [ MODIFIER_TYPE_URLTRANSFORM, URLTRANSFORM_REALM ], [ MODIFIER_TYPE_REPLACE, REPLACE_REALM ], [ MODIFIER_TYPE_URLSKIP, URLSKIP_REALM ], ]); const modifierTypeFromName = new Map([ [ 'redirect', MODIFIER_TYPE_REDIRECT ], [ 'redirect-rule', MODIFIER_TYPE_REDIRECTRULE ], [ 'removeparam', MODIFIER_TYPE_REMOVEPARAM ], [ 'csp', MODIFIER_TYPE_CSP ], [ 'permissions', MODIFIER_TYPE_PERMISSIONS ], [ 'uritransform', MODIFIER_TYPE_URLTRANSFORM ], [ 'replace', MODIFIER_TYPE_REPLACE ], [ 'urlskip', MODIFIER_TYPE_URLSKIP ], ]); const modifierNameFromType = new Map([ [ MODIFIER_TYPE_REDIRECT, 'redirect' ], [ MODIFIER_TYPE_REDIRECTRULE, 'redirect-rule' ], [ MODIFIER_TYPE_REMOVEPARAM, 'removeparam' ], [ MODIFIER_TYPE_CSP, 'csp' ], [ MODIFIER_TYPE_PERMISSIONS, 'permissions' ], [ MODIFIER_TYPE_URLTRANSFORM, 'uritransform' ], [ MODIFIER_TYPE_REPLACE, 'replace' ], [ MODIFIER_TYPE_URLSKIP, 'urlskip' ], ]); //const typeValueFromCatBits = catBits => (catBits >>> TYPE_REALM_OFFSET) & 0b11111; const MAX_TOKEN_LENGTH = 7; // Four upper bits of token hash are reserved for built-in predefined // token hashes, which should never end up being used when tokenizing // any arbitrary string. const NO_TOKEN_HASH = 0x5000_0000; const DOT_TOKEN_HASH = 0x1000_0000; const ANY_TOKEN_HASH = 0x2000_0000; const ANY_HTTPS_TOKEN_HASH = 0x3000_0000; const ANY_HTTP_TOKEN_HASH = 0x4000_0000; const EMPTY_TOKEN_HASH = 0xF000_0000; const INVALID_TOKEN_HASH = 0xFFFF_FFFF; /******************************************************************************/ // See the following as short-lived registers, used during evaluation. They are // valid until the next evaluation. let $requestMethodBit = 0; let $requestTypeValue = 0; let $requestURL = ''; let $requestURLRaw = ''; let $requestHostname = ''; let $requestAddress = ''; let $docHostname = ''; let $docDomain = ''; let $tokenBeg = 0; let $patternMatchLeft = 0; let $patternMatchRight = 0; let $isBlockImportant = false; const $docEntity = { entity: '', last: '', compute() { if ( this.last !== $docHostname ) { this.last = $docHostname; const pos = $docDomain.indexOf('.'); this.entity = pos !== -1 ? `${$docHostname.slice(0, pos - $docDomain.length)}.*` : ''; } return this.entity; }, }; const $requestEntity = { entity: '', last: '', compute() { if ( this.last !== $requestHostname ) { this.last = $requestHostname; const requestDomain = domainFromHostname($requestHostname); const pos = requestDomain.indexOf('.'); this.entity = pos !== -1 ? `${$requestHostname.slice(0, pos - requestDomain.length)}.*` : ''; } return this.entity; }, }; const $httpHeaders = { init(headers) { this.headers = headers; this.parsed.clear(); }, reset() { this.headers = []; this.parsed.clear(); }, lookup(name) { if ( this.parsed.size === 0 ) { for ( const { name, value } of this.headers ) { this.parsed.set(name.toLowerCase(), value); } } return this.parsed.get(name); }, headers: [], parsed: new Map(), }; /******************************************************************************/ // Local helpers const restrSeparator = '(?:[^%.0-9a-z_-]|$)'; // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions const reEscape = /[.*+?^${}()|[\]\\]/g; // Convert a plain string (devoid of special characters) into a regex. const restrFromPlainPattern = s => s.replace(reEscape, '\\$&'); const restrFromGenericPattern = function(s, anchor = 0) { let reStr = s.replace(restrFromGenericPattern.rePlainChars, '\\$&') .replace(restrFromGenericPattern.reSeparators, restrSeparator) .replace(restrFromGenericPattern.reDanglingAsterisks, '') .replace(restrFromGenericPattern.reAsterisks, '\\S*?'); if ( anchor & 0b100 ) { reStr = ( reStr.startsWith('\\.') ? restrFromGenericPattern.restrHostnameAnchor2 : restrFromGenericPattern.restrHostnameAnchor1 ) + reStr; } else if ( anchor & 0b010 ) { reStr = '^' + reStr; } if ( anchor & 0b001 ) { reStr += '$'; } return reStr; }; restrFromGenericPattern.rePlainChars = /[.+?${}()|[\]\\]/g; restrFromGenericPattern.reSeparators = /\^/g; restrFromGenericPattern.reDanglingAsterisks = /^\*+|\*+$/g; restrFromGenericPattern.reAsterisks = /\*+/g; restrFromGenericPattern.restrHostnameAnchor1 = '^[a-z-]+://(?:[^/?#]+\\.)?'; restrFromGenericPattern.restrHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?'; /******************************************************************************/ class LogData { constructor(categoryBits, tokenHash, iunit) { this.result = 0; this.source = 'static'; this.tokenHash = tokenHash; if ( iunit === 0 ) { this.raw = this.regex = ''; return; } this.result = (categoryBits & ALLOW_REALM) === 0 ? 1 : 2; const pattern = []; const regex = []; const options = []; const denyallow = []; const fromDomains = []; const toDomains = []; const logData = { pattern, regex, denyallow, fromDomains, toDomains, options, isRegex: false, }; filterLogData(iunit, logData); if ( (categoryBits & THIRDPARTY_REALM) !== 0 ) { logData.options.unshift('3p'); } else if ( (categoryBits & FIRSTPARTY_REALM) !== 0 ) { logData.options.unshift('1p'); } const type = categoryBits & TYPE_REALM; if ( type !== 0 ) { logData.options.unshift(typeValueToTypeName[type >>> TYPE_REALM_OFFSET]); } let raw = logData.pattern.join(''); if ( logData.isRegex === false && raw.charCodeAt(0) === 0x2F /* '/' */ && raw.charCodeAt(raw.length - 1) === 0x2F /* '/' */ ) { raw += '*'; } if ( (categoryBits & ALLOW_REALM) !== 0 ) { raw = '@@' + raw; } if ( denyallow.length !== 0 ) { options.push(`denyallow=${denyallow.join('|')}`); } if ( fromDomains.length !== 0 ) { options.push(`from=${fromDomains.join('|')}`); } if ( toDomains.length !== 0 ) { options.push(`to=${toDomains.join('|')}`); } if ( options.length !== 0 ) { raw += '$' + options.join(','); } this.raw = raw; this.regex = logData.regex.join(''); if ( logData.reason ) { this.reason = logData.reason; } } isUntokenized() { return this.tokenHash === NO_TOKEN_HASH; } isPureHostname() { return this.tokenHash === DOT_TOKEN_HASH; } static requote(s) { if ( /^\$|^(["'`]).*\1$|,/.test(s) === false ) { return s; } if ( s.includes("'") === false ) { return `'${s}'`; } if ( s.includes('"') === false ) { return `"${s}"`; } if ( s.includes('`') === false ) { return `\`${s}\``; } return `'${s.replace(/'/g, "\\'")}'`; } } /******************************************************************************/ const charClassMap = new Uint32Array(128); const CHAR_CLASS_SEPARATOR = 0b00000001; { const reSeparators = /[^\w%.-]/; for ( let i = 0; i < 128; i++ ) { if ( reSeparators.test(String.fromCharCode(i)) ) { charClassMap[i] |= CHAR_CLASS_SEPARATOR; } } } const isSeparatorChar = c => (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0; /******************************************************************************/ const FILTER_DATA_PAGE_SIZE = 65536; const roundToFilterDataPageSize = len => (len + FILTER_DATA_PAGE_SIZE-1) & ~(FILTER_DATA_PAGE_SIZE-1); let filterData = new Int32Array(FILTER_DATA_PAGE_SIZE * 5); let filterDataWritePtr = 2; const filterDataGrow = len => { if ( len <= filterData.length ) { return; } const newLen = roundToFilterDataPageSize(len); const newBuf = new Int32Array(newLen); newBuf.set(filterData); filterData = newBuf; }; const filterDataShrink = ( ) => { const newLen = Math.max( roundToFilterDataPageSize(filterDataWritePtr), FILTER_DATA_PAGE_SIZE ); if ( newLen >= filterData.length ) { return; } const newBuf = new Int32Array(newLen); newBuf.set(filterData.subarray(0, filterDataWritePtr)); filterData = newBuf; }; const filterDataAlloc = (...args) => { const len = args.length; const idata = filterDataAllocLen(len); for ( let i = 0; i < len; i++ ) { filterData[idata+i] = args[i]; } return idata; }; const filterDataAllocLen = len => { const idata = filterDataWritePtr; filterDataWritePtr += len; if ( filterDataWritePtr > filterData.length ) { filterDataGrow(filterDataWritePtr); } return idata; }; const filterSequenceAdd = (a, b) => { const iseq = filterDataAllocLen(2); filterData[iseq+0] = a; filterData[iseq+1] = b; return iseq; }; const filterDataReset = ( ) => { filterData.fill(0); filterDataWritePtr = 2; }; const filterDataToSelfie = ( ) => filterData.subarray(0, filterDataWritePtr); const filterDataFromSelfie = selfie => { if ( selfie instanceof Int32Array === false ) { return false; } filterData = selfie; filterDataWritePtr = selfie.length; return true; }; const filterRefs = [ null ]; let filterRefsWritePtr = 1; const filterRefAdd = ref => { const i = filterRefsWritePtr; filterRefs[i] = ref; filterRefsWritePtr += 1; return i; }; const filterRefsReset = ( ) => { filterRefs.fill(null); filterRefsWritePtr = 1; }; const filterRefsToSelfie = ( ) => filterRefs.slice(0, filterRefsWritePtr); const filterRefsFromSelfie = selfie => { if ( Array.isArray(selfie) === false ) { return false; } for ( let i = 0, n = selfie.length; i < n; i++ ) { filterRefs[i] = selfie[i]; } filterRefsWritePtr = selfie.length; return true; }; /******************************************************************************/ const origHNTrieContainer = new HNTrieContainer(); const destHNTrieContainer = new HNTrieContainer(); /******************************************************************************/ const bidiTrieMatchExtra = (l, r, ix) => { for (;;) { $patternMatchLeft = l; $patternMatchRight = r; const iu = filterData[ix+0]; if ( filterMatch(iu) ) { return iu; } ix = filterData[ix+1]; if ( ix === 0 ) { break; } } return 0; }; const bidiTrie = new BidiTrieContainer(bidiTrieMatchExtra); /******************************************************************************* Each filter class will register itself in the map. IMPORTANT: any change which modifies the mapping will have to be reflected with µBlock.systemSettings.compiledMagic. */ const filterClasses = []; const filterArgsToUnit = new Map(); let filterClassIdGenerator = 0; const registerFilterClass = fc => { const fid = filterClassIdGenerator++; fc.fid = fid; fc.fidstr = `${fid}`; filterClasses[fid] = fc; }; const filterFromCompiled = args => { const fc = filterClasses[args[0]]; const keygen = fc.keyFromArgs; if ( keygen === undefined ) { return fc.fromCompiled(args); } const key = `${fc.fidstr} ${(keygen(args) || '')}`; let idata = filterArgsToUnit.get(key); if ( idata !== undefined ) { return idata; } idata = fc.fromCompiled(args); filterArgsToUnit.set(key, idata); return idata; }; const filterGetClass = idata => { return filterClasses[filterData[idata+0]]; }; const filterMatch = idata => filterClasses[filterData[idata+0]].match(idata); const filterHasOriginHit = idata => { const fc = filterClasses[filterData[idata+0]]; return fc.hasOriginHit !== undefined && fc.hasOriginHit(idata); }; const filterGetDomainOpt = (idata, out) => { const fc = filterClasses[filterData[idata+0]]; if ( fc.getDomainOpt === undefined ) { return; } const fromOpt = fc.getDomainOpt(idata); if ( out === undefined ) { return fromOpt; } out.push(fromOpt); }; const filterGetRegexPattern = (idata, out) => { const fc = filterClasses[filterData[idata+0]]; if ( fc.hasRegexPattern === undefined ) { return; } const reStr = fc.getRegexPattern(idata); if ( out === undefined ) { return reStr; } out.push(reStr); }; const filterIsBidiTrieable = idata => { const fc = filterClasses[filterData[idata+0]]; if ( fc.isBidiTrieable === undefined ) { return false; } return fc.isBidiTrieable(idata) === true; }; const filterToBidiTrie = idata => { const fc = filterClasses[filterData[idata+0]]; if ( fc.toBidiTrie === undefined ) { return; } return fc.toBidiTrie(idata); }; const filterMatchAndFetchModifiers = (idata, env) => { const fc = filterClasses[filterData[idata+0]]; if ( fc.matchAndFetchModifiers === undefined ) { return; } return fc.matchAndFetchModifiers(idata, env); }; const filterGetModifierType = idata => { const fc = filterClasses[filterData[idata+0]]; if ( fc.getModifierType === undefined ) { return; } return fc.getModifierType(idata); }; const filterLogData = (idata, details) => { const fc = filterClasses[filterData[idata+0]]; if ( fc.logData === undefined ) { return; } fc.logData(idata, details); }; const filterDumpInfo = (idata) => { const fc = filterGetClass(idata); if ( fc.dumpInfo === undefined ) { return; } return fc.dumpInfo(idata); }; const dnrRuleFromCompiled = (args, rule) => { const fc = filterClasses[args[0]]; if ( fc.dnrFromCompiled === undefined ) { return false; } fc.dnrFromCompiled(args, rule); return true; }; const dnrAddRuleError = (rule, msg) => { rule._error = rule._error || []; rule._error.push(msg); }; const dnrAddRuleWarning = (rule, msg) => { rule._warning = rule._warning || []; rule._warning.push(msg); }; /******************************************************************************* Filter classes Pattern: FilterPatternAny FilterPatternPlain FilterPatternPlain1 FilterPatternPlainX FilterPatternGeneric FilterRegex FilterPlainTrie FilterHostnameDict Pattern modifiers: FilterAnchorHnLeft FilterAnchorHn FilterAnchorRight FilterAnchorLeft FilterTrailingSeparator Context, immediate: FilterOriginHit FilterOriginMiss FilterOriginEntityMiss FilterOriginEntityHit FilterOriginHitSet FilterOriginMissSet FilterJustOrigin FilterHTTPJustOrigin FilterHTTPSJustOrigin Other options: FilterDenyAllow FilterImportant FilterNotType FilterStrictParty FilterModifier FilterOnHeaders FilterIPAddress Collection: FilterCollection FilterCompositeAll FilterBucket FilterBucketIf FilterBucketIfOriginHits FilterBucketIfRegexHits FilterDomainHitAny A single filter can be made of many parts, in which case FilterCompositeAll is used to hold all the parts, and where all the parts must be a match in order for the filter to be a match. **/ /******************************************************************************/ class FilterPatternAny { static match() { return true; } static compile() { return [ FilterPatternAny.fid ]; } static fromCompiled(args) { return filterDataAlloc(args[0]); } static keyFromArgs() { } static logData(idata, details) { details.pattern.push('*'); details.regex.push('^'); } } registerFilterClass(FilterPatternAny); /******************************************************************************/ class FilterImportant { static match() { return ($isBlockImportant = true); } static compile() { return [ FilterImportant.fid ]; } static fromCompiled(args) { return filterDataAlloc(args[0]); } static dnrFromCompiled(args, rule) { rule.__important = true; } static keyFromArgs() { } static logData(idata, details) { details.options.unshift('important'); } } registerFilterClass(FilterImportant); /******************************************************************************/ class FilterPatternPlain { static isBidiTrieable(idata) { return filterData[idata+2] <= 255; } static toBidiTrie(idata) { return { i: filterData[idata+1], n: filterData[idata+2], itok: filterData[idata+3], }; } static match(idata) { const left = $tokenBeg; const n = filterData[idata+2]; if ( bidiTrie.startsWith( left, bidiTrie.getHaystackLen(), filterData[idata+1], n ) === 0 ) { return false; } $patternMatchLeft = left; $patternMatchRight = left + n; return true; } static compile(details) { const { tokenBeg } = details; if ( tokenBeg === 0 ) { return [ FilterPatternPlain.fid, details.pattern, 0 ]; } if ( tokenBeg === 1 ) { return [ FilterPatternPlain1.fid, details.pattern, 1 ]; } return [ FilterPatternPlainX.fid, details.pattern, tokenBeg ]; } static fromCompiled(args) { const idata = filterDataAllocLen(4); filterData[idata+0] = args[0]; // fid filterData[idata+1] = bidiTrie.storeString(args[1]); // i filterData[idata+2] = args[1].length; // n filterData[idata+3] = args[2]; // tokenBeg return idata; } static dnrFromCompiled(args, rule) { if ( rule.condition === undefined ) { rule.condition = {}; } else if ( rule.condition.urlFilter !== undefined ) { dnrAddRuleError(rule, `urlFilter already defined: ${rule.condition.urlFilter}`); } rule.condition.urlFilter = args[1]; } static logData(idata, details) { const s = bidiTrie.extractString( filterData[idata+1], filterData[idata+2] ); details.pattern.push(s); details.regex.push(restrFromPlainPattern(s)); // https://github.com/gorhill/uBlock/issues/3037 // Make sure the logger reflects accurately internal match, taking // into account MAX_TOKEN_LENGTH. if ( /^[0-9a-z%]{1,6}$/i.exec(s.slice(filterData[idata+3])) !== null ) { details.regex.push('(?![0-9A-Za-z%])'); } } static dumpInfo(idata) { const pattern = bidiTrie.extractString( filterData[idata+1], filterData[idata+2] ); return `${pattern} ${filterData[idata+3]}`; } } FilterPatternPlain.isPatternPlain = true; registerFilterClass(FilterPatternPlain); class FilterPatternPlain1 extends FilterPatternPlain { static match(idata) { const left = $tokenBeg - 1; const n = filterData[idata+2]; if ( bidiTrie.startsWith( left, bidiTrie.getHaystackLen(), filterData[idata+1], n ) === 0 ) { return false; } $patternMatchLeft = left; $patternMatchRight = left + n; return true; } } registerFilterClass(FilterPatternPlain1); class FilterPatternPlainX extends FilterPatternPlain { static match(idata) { const left = $tokenBeg - filterData[idata+3]; const n = filterData[idata+2]; if ( bidiTrie.startsWith( left, bidiTrie.getHaystackLen(), filterData[idata+1], n ) === 0 ) { return false; } $patternMatchLeft = left; $patternMatchRight = left + n; return true; } } registerFilterClass(FilterPatternPlainX); /******************************************************************************/ class FilterPatternGeneric { static hasRegexPattern() { return true; } static getRegexPattern(idata) { return restrFromGenericPattern( bidiTrie.extractString( filterData[idata+1], filterData[idata+2] ), filterData[idata+3] ); } static match(idata) { const refs = filterRefs[filterData[idata+4]]; if ( refs.$re === null ) { refs.$re = new RegExp(this.getRegexPattern(idata)); } return refs.$re.test($requestURL); } static compile(details) { const out = [ FilterPatternGeneric.fid, details.pattern, details.anchor, ]; details.anchor = 0; return out; } static fromCompiled(args) { const idata = filterDataAllocLen(5); filterData[idata+0] = args[0]; // fid filterData[idata+1] = bidiTrie.storeString(args[1]); // i filterData[idata+2] = args[1].length; // n filterData[idata+3] = args[2]; // anchor filterData[idata+4] = filterRefAdd({ $re: null }); return idata; } static dnrFromCompiled(args, rule) { if ( rule.condition === undefined ) { rule.condition = {}; } else if ( rule.condition.urlFilter !== undefined ) { dnrAddRuleError(rule, `urlFilter already defined: ${rule.condition.urlFilter}`); } let pattern = args[1]; if ( args[2] & 0b100 ) { if ( pattern.startsWith('.') ) { pattern = `*${pattern}`; } pattern = `||${pattern}`; } else if ( args[2] & 0b010 ) { pattern = `|${pattern}`; } if ( args[2] & 0b001 ) { pattern += '|'; } rule.condition.urlFilter = pattern; } static keyFromArgs(args) { return `${args[1]}\t${args[2]}`; } static logData(idata, details) { details.pattern.length = 0; const anchor = filterData[idata+3]; if ( (anchor & 0b100) !== 0 ) { details.pattern.push('||'); } else if ( (anchor & 0b010) !== 0 ) { details.pattern.push('|'); } const s = bidiTrie.extractString( filterData[idata+1], filterData[idata+2] ); details.pattern.push(s); if ( (anchor & 0b001) !== 0 ) { details.pattern.push('|'); } details.regex.length = 0; details.regex.push(restrFromGenericPattern(s, anchor & ~0b100)); } static dumpInfo(idata) { return bidiTrie.extractString( filterData[idata+1], filterData[idata+2] ); } } FilterPatternGeneric.isSlow = true; registerFilterClass(FilterPatternGeneric); /******************************************************************************/ class FilterAnchorHnLeft { static match(idata) { const len = $requestHostname.length; const haystackCodes = bidiTrie.haystack; let lastBeg = filterData[idata+2]; let lastEnd = filterData[idata+3]; if ( len !== filterData[idata+1] || lastBeg === -1 || haystackCodes[lastBeg-3] !== 0x3A /* ':' */ || haystackCodes[lastBeg-2] !== 0x2F /* '/' */ || haystackCodes[lastBeg-1] !== 0x2F /* '/' */ ) { lastBeg = len !== 0 ? haystackCodes.indexOf(0x3A) : -1; if ( lastBeg !== -1 ) { if ( lastBeg >= bidiTrie.getHaystackLen() || haystackCodes[lastBeg+1] !== 0x2F || haystackCodes[lastBeg+2] !== 0x2F ) { lastBeg = -1; } } if ( lastBeg !== -1 ) { lastBeg += 3; lastEnd = lastBeg + len; } else { lastEnd = -1; } filterData[idata+1] = len; filterData[idata+2] = lastBeg; filterData[idata+3] = lastEnd; } const left = $patternMatchLeft; return left < lastEnd && ( left === lastBeg || left > lastBeg && haystackCodes[left-1] === 0x2E /* '.' */ ); } static compile() { return [ FilterAnchorHnLeft.fid ]; } static fromCompiled(args) { const idata = filterDataAllocLen(4); filterData[idata+0] = args[0]; // fid filterData[idata+1] = 0; // lastLen filterData[idata+2] = -1; // lastBeg filterData[idata+3] = -1; // lastEnd return idata; } static dnrFromCompiled(args, rule) { rule.condition.urlFilter = `||${rule.condition.urlFilter}`; } static keyFromArgs() { } static logData(idata, details) { details.pattern.unshift('||'); } } registerFilterClass(FilterAnchorHnLeft); /******************************************************************************/ class FilterAnchorHn extends FilterAnchorHnLeft { static match(idata) { return super.match(idata) && filterData[idata+3] === $patternMatchRight; } static compile() { return [ FilterAnchorHn.fid ]; } static dnrFromCompiled(args, rule) { rule.condition.requestDomains = [ rule.condition.urlFilter ]; rule.condition.urlFilter = undefined; } static keyFromArgs() { } static logData(idata, details) { super.logData(idata, details); details.pattern.push('^'); details.regex.push('\\.?', restrSeparator); } } registerFilterClass(FilterAnchorHn); /******************************************************************************/ class FilterAnchorLeft { static match() { return $patternMatchLeft === 0; } static compile() { return [ FilterAnchorLeft.fid ]; } static fromCompiled(args) { return filterDataAlloc(args[0]); } static dnrFromCompiled(args, rule) { rule.condition.urlFilter = `|${rule.condition.urlFilter}`; } static keyFromArgs() { } static logData(idata, details) { details.pattern.unshift('|'); details.regex.unshift('^'); } } registerFilterClass(FilterAnchorLeft); /******************************************************************************/ class FilterAnchorRight { static match() { return $patternMatchRight === $requestURL.length; } static compile() { return [ FilterAnchorRight.fid ]; } static fromCompiled(args) { return filterDataAlloc(args[0]); } static dnrFromCompiled(args, rule) { rule.condition.urlFilter = `${rule.condition.urlFilter}|`; } static keyFromArgs() { } static logData(idata, details) { details.pattern.push('|'); details.regex.push('$'); } } registerFilterClass(FilterAnchorRight); /******************************************************************************/ class FilterTrailingSeparator { static match() { if ( $patternMatchRight === $requestURL.length ) { return true; } if ( isSeparatorChar(bidiTrie.haystack[$patternMatchRight]) ) { $patternMatchRight += 1; return true; } return false; } static compile() { return [ FilterTrailingSeparator.fid ]; } static fromCompiled(args) { return filterDataAlloc(args[0]); } static dnrFromCompiled(args, rule) { rule.condition.urlFilter = `${rule.condition.urlFilter}^`; } static keyFromArgs() { } static logData(idata, details) { details.pattern.push('^'); details.regex.push(restrSeparator); } } registerFilterClass(FilterTrailingSeparator); /******************************************************************************/ class FilterRegex { static hasRegexPattern() { return true; } static getRegexPattern(idata) { return bidiTrie.extractString( filterData[idata+1], filterData[idata+2] ); } static match(idata) { const refs = filterRefs[filterData[idata+4]]; if ( refs.$re === null ) { refs.$re = new RegExp( this.getRegexPattern(idata), filterData[idata+3] === 0 ? 'i' : '' ); } if ( refs.$re.test($requestURLRaw) === false ) { return false; } $patternMatchLeft = $requestURLRaw.search(refs.$re); return true; } static compile(details) { return [ FilterRegex.fid, details.pattern, details.optionValues.has('match-case') ? 1 : 0, ]; } static fromCompiled(args) { const idata = filterDataAllocLen(5); filterData[idata+0] = args[0]; // fid filterData[idata+1] = bidiTrie.storeString(args[1]); // i filterData[idata+2] = args[1].length; // n filterData[idata+3] = args[2]; // match-case filterData[idata+4] = filterRefAdd({ $re: null }); return idata; } static dnrFromCompiled(args, rule) { if ( rule.condition === undefined ) { rule.condition = {}; } if ( isRE2(args[1]) === false ) { dnrAddRuleError(rule, `regexFilter is not RE2-compatible: ${args[1]}`); } rule.condition.regexFilter = args[1]; if ( args[2] === 1 ) { rule.condition.isUrlFilterCaseSensitive = true; } } static keyFromArgs(args) { return `${args[1]}\t${args[2]}`; } static logData(idata, details) { const s = bidiTrie.extractString( filterData[idata+1], filterData[idata+2] ); details.pattern.push('/', s, '/'); details.regex.push(s); details.isRegex = true; if ( filterData[idata+3] !== 0 ) { details.options.push('match-case'); } } static dumpInfo(idata) { return [ '/', bidiTrie.extractString( filterData[idata+1], filterData[idata+2] ), '/', filterData[idata+3] !== 0 ? ' (match-case)' : '', ].join(''); } } FilterRegex.isSlow = true; registerFilterClass(FilterRegex); /******************************************************************************/ class FilterMethod { static match(idata) { if ( $requestMethodBit === 0 ) { return false; } const methodBits = filterData[idata+1]; const notMethodBits = filterData[idata+2]; return (methodBits !== 0 && ($requestMethodBit & methodBits) !== 0) || (notMethodBits !== 0 && ($requestMethodBit & notMethodBits) === 0); } static compile(details) { return [ FilterMethod.fid, details.methodBits, details.notMethodBits ]; } static fromCompiled(args) { const idata = filterDataAllocLen(3); filterData[idata+0] = args[0]; // fid filterData[idata+1] = args[1]; // methodBits filterData[idata+2] = args[2]; // notMethodBits return idata; } static dnrFromCompiled(args, rule) { rule.condition = rule.condition || {}; const rc = rule.condition; let methodBits = args[1]; let notMethodBits = args[2]; if ( methodBits !== 0 && rc.requestMethods === undefined ) { rc.requestMethods = []; } if ( notMethodBits !== 0 && rc.excludedRequestMethods === undefined ) { rc.excludedRequestMethods = []; } for ( let i = 1; methodBits !== 0 || notMethodBits !== 0; i++ ) { const bit = 1 << i; const methodName = FilteringContext.getMethodName(bit); if ( (methodBits & bit) !== 0 ) { methodBits &= ~bit; rc.requestMethods.push(methodName); } else if ( (notMethodBits & bit) !== 0 ) { notMethodBits &= ~bit; rc.excludedRequestMethods.push(methodName); } } } static keyFromArgs(args) { return `${args[1]} ${args[2]}`; } static logData(idata, details) { const methods = []; let methodBits = filterData[idata+1]; let notMethodBits = filterData[idata+2]; for ( let i = 0; methodBits !== 0 || notMethodBits !== 0; i++ ) { const bit = 1 << i; const methodName = FilteringContext.getMethodName(bit); if ( (methodBits & bit) !== 0 ) { methodBits &= ~bit; methods.push(methodName); } else if ( (notMethodBits & bit) !== 0 ) { notMethodBits &= ~bit; methods.push(`~${methodName}`); } } details.options.push(`method=${methods.join('|')}`); } static dumpInfo(idata) { return `0b${filterData[idata+1].toString(2)} 0b${filterData[idata+2].toString(2)}`; } } registerFilterClass(FilterMethod); /******************************************************************************/ // stylesheet: 1 => bit 0 // image: 2 => bit 1 // object: 3 => bit 2 // script: 4 => bit 3 // ... class FilterNotType { static match(idata) { return $requestTypeValue !== 0 && (filterData[idata+1] & (1 << ($requestTypeValue - 1))) === 0; } static compile(details) { return [ FilterNotType.fid, details.notTypeBits ]; } static fromCompiled(args) { const idata = filterDataAllocLen(2); filterData[idata+0] = args[0]; // fid filterData[idata+1] = args[1]; // notTypeBits return idata; } static dnrFromCompiled(args, rule) { rule.condition = rule.condition || {}; const rc = rule.condition; if ( rc.excludedResourceTypes === undefined ) { rc.excludedResourceTypes = [ 'main_frame' ]; } let bits = args[1]; for ( let i = 1; bits !== 0 && i < typeValueToDNRTypeName.length; i++ ) { const bit = 1 << (i - 1); if ( (bits & bit) === 0 ) { continue; } bits &= ~bit; const type = typeValueToDNRTypeName[i]; if ( type === undefined ) { continue; } if ( rc.excludedResourceTypes.includes(type) ) { continue; } rc.excludedResourceTypes.push(type); } } static keyFromArgs(args) { return `${args[1]}`; } static logData(idata, details) { let bits = filterData[idata+1]; for ( let i = 1; bits !== 0 && i < typeValueToTypeName.length; i++ ) { const bit = 1 << (i - 1); if ( (bits & bit) === 0 ) { continue; } bits &= ~bit; details.options.push(`~${typeValueToTypeName[i]}`); } } static dumpInfo(idata) { return `0b${filterData[idata+1].toString(2)}`; } } registerFilterClass(FilterNotType); /******************************************************************************/ // A helper class to parse `domain=` option. class DomainOptIterator { constructor(domainOpt) { this.reset(domainOpt); } reset(domainOpt) { this.domainOpt = domainOpt; this.i = 0; this.value = undefined; this.done = false; return this; } next() { if ( this.i === -1 ) { this.domainOpt = ''; this.value = undefined; this.done = true; return this; } const pos = this.domainOpt.indexOf('|', this.i); if ( pos !== -1 ) { this.value = this.domainOpt.slice(this.i, pos); this.i = pos + 1; } else { this.value = this.domainOpt.slice(this.i); this.i = -1; } return this; } [Symbol.iterator]() { return this; } } // A helper instance to reuse throughout const domainOptIterator = new DomainOptIterator(''); /******************************************************************************/ // The optimal class is picked according to the content of the `from=` // filter option. const compileDomainOpt = (ctors, iterable, prepend, units) => { const hostnameHits = []; const hostnameMisses = []; const entityHits = []; const entityMisses = []; const regexHits = []; const regexMisses = []; for ( const s of iterable ) { const len = s.length; const beg = len > 1 && s.charCodeAt(0) === 0x7E /* '~' */ ? 1 : 0; if ( len <= beg ) { continue; } if ( s.charCodeAt(beg) === 0x2F /* / */ ) { if ( beg === 0 ) { regexHits.push(s); continue; } regexMisses.push(s.slice(1)); continue; } if ( s.endsWith('.*') === false ) { if ( beg === 0 ) { hostnameHits.push(s); continue; } hostnameMisses.push(s.slice(1)); continue; } if ( beg === 0 ) { entityHits.push(s); continue; } entityMisses.push(s.slice(1)); continue; } const toTrie = []; let trieWhich = 0b00; if ( hostnameHits.length > 1 ) { toTrie.push(...hostnameHits); hostnameHits.length = 0; trieWhich |= 0b01; } if ( entityHits.length > 1 ) { toTrie.push(...entityHits); entityHits.length = 0; trieWhich |= 0b10; } const compiledHit = []; if ( toTrie.length !== 0 ) { compiledHit.push( ctors[2].compile(toTrie.sort(), trieWhich) ); } for ( const hn of hostnameHits ) { compiledHit.push(ctors[0].compile(hn)); } for ( const hn of entityHits ) { compiledHit.push(ctors[1].compile(hn)); } for ( const hn of regexHits ) { compiledHit.push(ctors[3].compile(hn)); } if ( compiledHit.length > 1 ) { compiledHit[0] = FilterDomainHitAny.compile(compiledHit.slice()); compiledHit.length = 1; } toTrie.length = trieWhich = 0; if ( hostnameMisses.length > 1 ) { toTrie.push(...hostnameMisses); hostnameMisses.length = 0; trieWhich |= 0b01; } if ( entityMisses.length > 1 ) { toTrie.push(...entityMisses); entityMisses.length = 0; trieWhich |= 0b10; } const compiledMiss = []; if ( toTrie.length !== 0 ) { compiledMiss.push( ctors[6].compile(toTrie.sort(), trieWhich) ); } for ( const hn of hostnameMisses ) { compiledMiss.push(ctors[4].compile(hn)); } for ( const hn of entityMisses ) { compiledMiss.push(ctors[5].compile(hn)); } for ( const hn of regexMisses ) { compiledMiss.push(ctors[7].compile(hn)); } if ( prepend ) { if ( compiledHit.length !== 0 ) { units.unshift(compiledHit[0]); } if ( compiledMiss.length !== 0 ) { units.unshift(...compiledMiss); } } else { if ( compiledMiss.length !== 0 ) { units.push(...compiledMiss); } if ( compiledHit.length !== 0 ) { units.push(compiledHit[0]); } } }; /******************************************************************************/ class FilterDomainHit { static getDomainOpt(idata) { return this.hntrieContainer.extractHostname( filterData[idata+1], filterData[idata+2] ); } static match(idata) { return this.hntrieContainer.matchesHostname( this.getMatchTarget(), filterData[idata+1], filterData[idata+2] ); } static compile(hostname) { return [ this.fid, hostname ]; } static fromCompiled(args) { const idata = filterDataAllocLen(3); filterData[idata+0] = args[0]; // fid filterData[idata+1] = this.hntrieContainer.storeHostname(args[1]); // i filterData[idata+2] = args[1].length; // n return idata; } static dnrFromCompiled(args, rule) { rule.condition = rule.condition || {}; const prop = this.dnrConditionName; if ( rule.condition[prop] === undefined ) { rule.condition[prop] = []; } rule.condition[prop].push(args[1]); } static dumpInfo(idata) { return this.getDomainOpt(idata); } } /******************************************************************************/ class FilterDomainHitSet { static getDomainOpt(idata) { return this.hntrieContainer.extractDomainOpt( filterData[idata+1], filterData[idata+2] ); } static getTrieCount(idata) { const itrie = filterData[idata+4]; if ( itrie === 0 ) { return 0; } return Array.from( this.hntrieContainer.trieIterator(filterData[idata+4]) ).length; } static getLastResult(idata) { return filterData[idata+5]; } static getMatchedHostname(idata) { const lastResult = filterData[idata+5]; if ( lastResult === -1 ) { return ''; } return this.getMatchTarget(lastResult >>> 8).slice(lastResult & 0xFF); } static match(idata) { const refs = filterRefs[filterData[idata+6]]; const docHostname = this.getMatchTarget(0b01); if ( docHostname === refs.$last ) { return filterData[idata+5] !== -1; } refs.$last = docHostname; const which = filterData[idata+3]; const itrie = filterData[idata+4] || this.toTrie(idata); if ( itrie === 0 ) { return false; } if ( (which & 0b01) !== 0 ) { const pos = this.hntrieContainer .setNeedle(docHostname) .matches(itrie); if ( pos !== -1 ) { filterData[idata+5] = 0b01 << 8 | pos; return true; } } if ( (which & 0b10) !== 0 ) { const pos = this.hntrieContainer .setNeedle(this.getMatchTarget(0b10)) .matches(itrie); if ( pos !== -1 ) { filterData[idata+5] = 0b10 << 8 | pos; return true; } } filterData[idata+5] = -1; return false; } static add(idata, hn) { this.hntrieContainer.setNeedle(hn).add(filterData[idata+4]); filterData[idata+3] |= hn.charCodeAt(hn.length - 1) !== 0x2A /* '*' */ ? 0b01 : 0b10; filterData[idata+5] = -1; filterRefs[filterData[idata+6]].$last = ''; } static create(fid = -1) { const idata = filterDataAllocLen(7); filterData[idata+0] = fid !== -1 ? fid : this.fid; filterData[idata+1] = 0; filterData[idata+2] = 0; filterData[idata+3] = 0; filterData[idata+4] = this.hntrieContainer.createTrie(); filterData[idata+5] = -1; // $lastResult filterData[idata+6] = filterRefAdd({ $last: '' }); return idata; } static compile(hostnames, which) { const stringified = Array.isArray(hostnames) ? hostnames.join('|') : hostnames; return [ this.fid, stringified, which ]; } static fromCompiled(args) { const idata = filterDataAllocLen(7); filterData[idata+0] = args[0]; // fid filterData[idata+1] = this.hntrieContainer.storeDomainOpt(args[1]); filterData[idata+2] = args[1].length; filterData[idata+3] = args[2]; // which filterData[idata+4] = 0; // itrie filterData[idata+5] = -1; // $lastResult filterData[idata+6] = filterRefAdd({ $last: '' }); return idata; } static dnrFromCompiled(args, rule) { rule.condition = rule.condition || {}; const prop = this.dnrConditionName; if ( rule.condition[prop] === undefined ) { rule.condition[prop] = []; } rule.condition[prop].push(...args[1].split('|')); } static toTrie(idata) { if ( filterData[idata+2] === 0 ) { return 0; } const itrie = filterData[idata+4] = this.hntrieContainer.createTrieFromStoredDomainOpt( filterData[idata+1], filterData[idata+2] ); return itrie; } static keyFromArgs(args) { return args[1]; } static dumpInfo(idata) { return `0b${filterData[idata+3].toString(2)} ${this.getDomainOpt(idata)}`; } } /******************************************************************************/ class FilterDomainRegexHit { static getDomainOpt(idata) { const ref = filterRefs[filterData[idata+1]]; return ref.restr; } static match(idata) { const ref = filterRefs[filterData[idata+1]]; if ( ref.$re === null ) { ref.$re = new RegExp(ref.restr.slice(1,-1)); } return ref.$re.test(this.getMatchTarget()); } static compile(restr) { return [ this.fid, restr ]; } static fromCompiled(args) { const idata = filterDataAllocLen(2); filterData[idata+0] = args[0]; // fid filterData[idata+1] = filterRefAdd({ restr: args[1], $re: null }); return idata; } static dnrFromCompiled(args, rule) { rule.condition = rule.condition || {}; const prop = this.dnrConditionName; if ( rule.condition[prop] === undefined ) { rule.condition[prop] = []; } rule.condition[prop].push(args[1]); } static dumpInfo(idata) { return this.getDomainOpt(idata); } } /******************************************************************************/ // Implement the following filter option: // - domain= // - from= class FilterFromDomainHit extends FilterDomainHit { static hasOriginHit() { return true; } static getMatchTarget() { return $docHostname; } static get dnrConditionName() { return 'initiatorDomains'; } static logData(idata, details) { details.fromDomains.push(this.getDomainOpt(idata)); } } Object.defineProperty(FilterFromDomainHit, 'hntrieContainer', { value: origHNTrieContainer }); class FilterFromDomainMiss extends FilterFromDomainHit { static hasOriginHit() { return false; } static get dnrConditionName() { return 'excludedInitiatorDomains'; } static match(idata) { return super.match(idata) === false; } static logData(idata, details) { details.fromDomains.push(`~${this.getDomainOpt(idata)}`); } } class FilterFromEntityHit extends FilterFromDomainHit { static getMatchTarget() { return $docEntity.compute(); } } class FilterFromEntityMiss extends FilterFromDomainMiss { static getMatchTarget() { return $docEntity.compute(); } } class FilterFromDomainHitSet extends FilterDomainHitSet { static hasOriginHit() { return true; } static getMatchTarget(which) { return (which & 0b01) !== 0 ? $docHostname : $docEntity.compute(); } static get dnrConditionName() { return 'initiatorDomains'; } static logData(idata, details) { details.fromDomains.push(this.getDomainOpt(idata)); } } Object.defineProperty(FilterFromDomainHitSet, 'hntrieContainer', { value: origHNTrieContainer }); class FilterFromDomainMissSet extends FilterFromDomainHitSet { static hasOriginHit() { return false; } static match(idata) { return super.match(idata) === false; } static get dnrConditionName() { return 'excludedInitiatorDomains'; } static logData(idata, details) { details.fromDomains.push('~' + this.getDomainOpt(idata).replace(/\|/g, '|~')); } } class FilterFromRegexHit extends FilterDomainRegexHit { static getMatchTarget() { return $docHostname; } static get dnrConditionName() { return 'initiatorDomains'; } static logData(idata, details) { details.fromDomains.push(`${this.getDomainOpt(idata)}`); } } class FilterFromRegexMiss extends FilterFromRegexHit { static match(idata) { return super.match(idata) === false; } static get dnrConditionName() { return 'excludedInitiatorDomains'; } static logData(idata, details) { details.fromDomains.push(`~${this.getDomainOpt(idata)}`); } } registerFilterClass(FilterFromDomainHit); registerFilterClass(FilterFromDomainMiss); registerFilterClass(FilterFromEntityHit); registerFilterClass(FilterFromEntityMiss); registerFilterClass(FilterFromDomainHitSet); registerFilterClass(FilterFromDomainMissSet); registerFilterClass(FilterFromRegexHit); registerFilterClass(FilterFromRegexMiss); const fromOptClasses = [ FilterFromDomainHit, FilterFromEntityHit, FilterFromDomainHitSet, FilterFromRegexHit, FilterFromDomainMiss, FilterFromEntityMiss, FilterFromDomainMissSet, FilterFromRegexMiss, ]; const compileFromDomainOpt = (...args) => { return compileDomainOpt(fromOptClasses, ...args); }; /******************************************************************************/ // Implement the following filter option: // - to= class FilterToDomainHit extends FilterDomainHit { static getMatchTarget() { return $requestHostname; } static get dnrConditionName() { return 'requestDomains'; } static logData(idata, details) { details.toDomains.push(this.getDomainOpt(idata)); } } Object.defineProperty(FilterToDomainHit, 'hntrieContainer', { value: destHNTrieContainer }); class FilterToDomainMiss extends FilterToDomainHit { static get dnrConditionName() { return 'excludedRequestDomains'; } static match(idata) { return super.match(idata) === false; } static logData(idata, details) { details.toDomains.push(`~${this.getDomainOpt(idata)}`); } } class FilterToEntityHit extends FilterToDomainHit { static getMatchTarget() { return $requestEntity.compute(); } } class FilterToEntityMiss extends FilterToDomainMiss { static getMatchTarget() { return $requestEntity.compute(); } } class FilterToDomainHitSet extends FilterDomainHitSet { static getMatchTarget(which) { return (which & 0b01) !== 0 ? $requestHostname : $requestEntity.compute(); } static get dnrConditionName() { return 'requestDomains'; } static logData(idata, details) { details.toDomains.push(this.getDomainOpt(idata)); } } Object.defineProperty(FilterToDomainHitSet, 'hntrieContainer', { value: destHNTrieContainer }); class FilterToDomainMissSet extends FilterToDomainHitSet { static match(idata) { return super.match(idata) === false; } static get dnrConditionName() { return 'excludedRequestDomains'; } static logData(idata, details) { details.toDomains.push('~' + this.getDomainOpt(idata).replace(/\|/g, '|~')); } } class FilterToRegexHit extends FilterDomainRegexHit { static getMatchTarget() { return $requestHostname; } static get dnrConditionName() { return 'requestDomains'; } static logData(idata, details) { details.toDomains.push(`${this.getDomainOpt(idata)}`); } } class FilterToRegexMiss extends FilterToRegexHit { static match(idata) { return super.match(idata) === false; } static get dnrConditionName() { return 'excludedRequestDomains'; } static logData(idata, details) { details.toDomains.push(`~${this.getDomainOpt(idata)}`); } } registerFilterClass(FilterToDomainHit); registerFilterClass(FilterToDomainMiss); registerFilterClass(FilterToEntityHit); registerFilterClass(FilterToEntityMiss); registerFilterClass(FilterToDomainHitSet); registerFilterClass(FilterToDomainMissSet); registerFilterClass(FilterToRegexHit); registerFilterClass(FilterToRegexMiss); const toOptClasses = [ FilterToDomainHit, FilterToEntityHit, FilterToDomainHitSet, FilterToRegexHit, FilterToDomainMiss, FilterToEntityMiss, FilterToDomainMissSet, FilterToRegexMiss, ]; const compileToDomainOpt = (...args) => { return compileDomainOpt(toOptClasses, ...args); }; /******************************************************************************/ class FilterDenyAllow extends FilterToDomainMissSet { static compile(details) { return super.compile(details.optionValues.get('denyallow'), 0b01); } static logData(idata, details) { details.denyallow.push(this.getDomainOpt(idata)); } } registerFilterClass(FilterDenyAllow); /******************************************************************************/ class FilterModifier { static getModifierType(idata) { return filterData[idata+2]; } static match() { return true; } static matchAndFetchModifiers(idata, env) { if ( this.getModifierType(idata) !== env.type ) { return; } env.results.push(new FilterModifierResult(idata, env)); } static compile(details) { return [ FilterModifier.fid, details.action, details.modifyType, details.modifyValue || '', ]; } static fromCompiled(args) { const idata = filterDataAllocLen(4); filterData[idata+0] = args[0]; // fid filterData[idata+1] = args[1]; // actionBits filterData[idata+2] = args[2]; // type filterData[idata+3] = filterRefAdd({ value: args[3], $cache: null, }); return idata; } static dnrFromCompiled(args, rule) { rule.__modifierAction = args[1]; rule.__modifierType = modifierNameFromType.get(args[2]); rule.__modifierValue = args[3]; } static keyFromArgs(args) { return `${args[1]}\t${args[2]}\t${args[3]}`; } static logData(idata, details) { let opt = modifierNameFromType.get(filterData[idata+2]); const refs = filterRefs[filterData[idata+3]]; if ( refs.value !== '' ) { opt += `=${LogData.requote(refs.value)}`; } details.options.push(opt); } static dumpInfo(idata) { const s = modifierNameFromType.get(filterData[idata+2]); const refs = filterRefs[filterData[idata+3]]; if ( refs.value === '' ) { return s; } return `${s}=${refs.value}`; } } registerFilterClass(FilterModifier); // Helper class for storing instances of FilterModifier which were found to // be a match. class FilterModifierResult { constructor(imodifierunit, env) { this.imodifierunit = imodifierunit; this.refs = filterRefs[filterData[imodifierunit+3]]; this.ireportedunit = env.iunit; this.th = env.th; this.bits = (env.bits & ~BLOCKALLOW_REALM) | filterData[imodifierunit+1]; } get result() { return (this.bits & ALLOW_REALM) === 0 ? 1 : 2; } get value() { return this.refs.value; } get cache() { return this.refs.$cache; } set cache(a) { this.refs.$cache = a; } logData() { const r = new LogData(this.bits, this.th, this.ireportedunit); r.result = this.result; r.modifier = true; return r; } } /******************************************************************************/ class FilterCollection { static getCount(idata) { let n = 0; this.forEach(idata, ( ) => { n += 1; }); return n; } static forEach(idata, fn) { let i = filterData[idata+1]; if ( i === 0 ) { return; } do { const iunit = filterData[i+0]; const r = fn(iunit); if ( r !== undefined ) { return r; } i = filterData[i+1]; } while ( i !== 0 ); } static unshift(idata, iunit) { filterData[idata+1] = filterSequenceAdd(iunit, filterData[idata+1]); } static shift(idata) { filterData[idata+1] = filterData[filterData[idata+1]+1]; } static create(fid = -1) { return filterDataAlloc( fid !== -1 ? fid : FilterCollection.fid, 0 ); } static compile(fc, fdata) { return [ fc.fid, fdata ]; } static fromCompiled(args) { const units = args[1]; const n = units.length; let iunit, inext = 0; let i = n; while ( i-- ) { iunit = filterFromCompiled(units[i]); inext = filterSequenceAdd(iunit, inext); } const idata = filterDataAllocLen(2); filterData[idata+0] = args[0]; // fid filterData[idata+1] = inext; // i return idata; } static dnrFromCompiled(args, rule) { for ( const unit of args[1] ) { dnrRuleFromCompiled(unit, rule); } } static logData(idata, details) { this.forEach(idata, iunit => { filterLogData(iunit, details); }); } static dumpInfo(idata) { return this.getCount(idata); } } registerFilterClass(FilterCollection); /******************************************************************************/ class FilterDomainHitAny extends FilterCollection { static getDomainOpt(idata) { const domainOpts = []; this.forEach(idata, iunit => { if ( filterHasOriginHit(iunit) !== true ) { return; } filterGetDomainOpt(iunit, domainOpts); }); return domainOpts.join('|'); } static hasOriginHit(idata) { this.forEach(idata, iunit => { if ( filterHasOriginHit(iunit) ) { return true; } }); return false; } static match(idata) { let i = filterData[idata+1]; while ( i !== 0 ) { if ( filterMatch(filterData[i+0]) ) { return true; } i = filterData[i+1]; } return false; } static compile(fdata) { return super.compile(FilterDomainHitAny, fdata); } static fromCompiled(args) { return super.fromCompiled(args); } } registerFilterClass(FilterDomainHitAny); /******************************************************************************/ class FilterCompositeAll extends FilterCollection { // FilterPatternPlain is assumed to be first filter in sequence. This can // be revisited if needed. static isBidiTrieable(idata) { return filterIsBidiTrieable(filterData[filterData[idata+1]+0]); } static toBidiTrie(idata) { const iseq = filterData[idata+1]; const details = filterToBidiTrie(filterData[iseq+0]); this.shift(idata); return details; } static getDomainOpt(idata) { return this.forEach(idata, iunit => { if ( filterHasOriginHit(iunit) !== true ) { return; } return filterGetDomainOpt(iunit); }); } static hasOriginHit(idata) { return this.forEach(idata, iunit => { if ( filterHasOriginHit(iunit) === true ) { return true; } }) || false; } static hasRegexPattern(idata) { return this.forEach(idata, iunit => { const fc = filterGetClass(iunit); if ( fc.hasRegexPattern === undefined ) { return; } if ( fc.hasRegexPattern(iunit) === true ) { return true; } }) || false; } static getRegexPattern(idata) { return this.forEach(idata, iunit => { const fc = filterGetClass(iunit); if ( fc.getRegexPattern === undefined ) { return; } return fc.getRegexPattern(iunit); }); } static match(idata) { let i = filterData[idata+1]; while ( i !== 0 ) { if ( filterMatch(filterData[i+0]) !== true ) { return false; } i = filterData[i+1]; } return true; } // IMPORTANT: the modifier filter unit is assumed to be ALWAYS the // first unit in the sequence. This requirement ensures that we do // not have to traverse the sequence to find the modifier filter // unit. static getModifierType(idata) { const iseq = filterData[idata+1]; const iunit = filterData[iseq+0]; return filterGetModifierType(iunit); } static matchAndFetchModifiers(idata, env) { const iseq = filterData[idata+1]; const iunit = filterData[iseq+0]; if ( filterGetModifierType(iunit) === env.type && this.match(idata) ) { filterMatchAndFetchModifiers(iunit, env); } } static compile(fdata) { return super.compile(FilterCompositeAll, fdata); } static fromCompiled(args) { return super.fromCompiled(args); } } registerFilterClass(FilterCompositeAll); /******************************************************************************/ // Dictionary of hostnames class FilterHostnameDict { static getCount(idata) { const itrie = filterData[idata+1]; if ( itrie !== 0 ) { return Array.from(destHNTrieContainer.trieIterator(itrie)).length; } return filterRefs[filterData[idata+3]].length; } static match(idata) { const itrie = filterData[idata+1] || this.optimize(idata); return ( filterData[idata+2] = destHNTrieContainer .setNeedle($requestHostname) .matches(itrie) ) !== -1; } static add(idata, hn) { const itrie = filterData[idata+1]; if ( itrie === 0 ) { filterRefs[filterData[idata+3]].push(hn); } else { destHNTrieContainer.setNeedle(hn).add(itrie); } } static optimize(idata) { const itrie = filterData[idata+1]; if ( itrie !== 0 ) { return itrie; } const hostnames = filterRefs[filterData[idata+3]]; filterData[idata+1] = destHNTrieContainer.createTrieFromIterable(hostnames); filterRefs[filterData[idata+3]] = null; return filterData[idata+1]; } static create() { const idata = filterDataAllocLen(4); filterData[idata+0] = FilterHostnameDict.fid; // fid filterData[idata+1] = 0; // itrie filterData[idata+2] = -1; // lastResult filterData[idata+3] = filterRefAdd([]); // []: hostnames return idata; } static logData(idata, details) { const hostname = $requestHostname.slice(filterData[idata+2]); details.pattern.push('||', hostname, '^'); details.regex.push( restrFromPlainPattern(hostname), '\\.?', restrSeparator ); } static dumpInfo(idata) { return this.getCount(idata); } } registerFilterClass(FilterHostnameDict); /******************************************************************************/ // Dictionary of hostnames for filters which only purpose is to match // the document origin. class FilterJustOrigin extends FilterFromDomainHitSet { static create(fid = -1) { return super.create(fid !== -1 ? fid : FilterJustOrigin.fid); } static logPattern(idata, details) { details.pattern.push('*'); details.regex.push('^'); } static logData(idata, details) { this.logPattern(idata, details); details.fromDomains.push(this.getMatchedHostname(idata)); } static dumpInfo(idata) { return this.getTrieCount(idata); } } registerFilterClass(FilterJustOrigin); /******************************************************************************/ class FilterHTTPSJustOrigin extends FilterJustOrigin { static match(idata) { return $requestURL.startsWith('https://') && super.match(idata); } static create() { return super.create(FilterHTTPSJustOrigin.fid); } static logPattern(idata, details) { details.pattern.push('|https://'); details.regex.push('^https://'); } } registerFilterClass(FilterHTTPSJustOrigin); /******************************************************************************/ class FilterHTTPJustOrigin extends FilterJustOrigin { static match(idata) { return $requestURL.startsWith('http://') && super.match(idata); } static create() { return super.create(FilterHTTPJustOrigin.fid); } static logPattern(idata, details) { details.pattern.push('|http://'); details.regex.push('^http://'); } } registerFilterClass(FilterHTTPJustOrigin); /******************************************************************************/ class FilterPlainTrie { static match(idata) { if ( bidiTrie.matches(filterData[idata+1], $tokenBeg) !== 0 ) { filterData[idata+2] = bidiTrie.$iu; return true; } return false; } static create() { const idata = filterDataAllocLen(3); filterData[idata+0] = FilterPlainTrie.fid; // fid filterData[idata+1] = bidiTrie.createTrie(); // itrie filterData[idata+2] = 0; // matchedUnit return idata; } static addUnitToTrie(idata, iunit) { const trieDetails = filterToBidiTrie(iunit); const itrie = filterData[idata+1]; const id = bidiTrie.add( itrie, trieDetails.i, trieDetails.n, trieDetails.itok ); // No point storing a pattern with conditions if the bidi-trie already // contain a pattern with no conditions. const ix = bidiTrie.getExtra(id); if ( ix === 1 ) { return; } // If the newly stored pattern has no condition, short-circuit existing // ones since they will always be short-circuited by the condition-less // pattern. const fc = filterGetClass(iunit); if ( fc.isPatternPlain ) { bidiTrie.setExtra(id, 1); return; } // FilterCompositeAll is assumed here, i.e. with conditions. if ( fc === FilterCompositeAll && fc.getCount(iunit) === 1 ) { iunit = filterData[filterData[iunit+1]+0]; } bidiTrie.setExtra(id, filterSequenceAdd(iunit, ix)); } static logData(idata, details) { const s = $requestURL.slice(bidiTrie.$l, bidiTrie.$r); details.pattern.push(s); details.regex.push(restrFromPlainPattern(s)); if ( filterData[idata+2] !== -1 ) { filterLogData(filterData[idata+2], details); } } static dumpInfo(idata) { return `${Array.from(bidiTrie.trieIterator(filterData[idata+1])).length}`; } } registerFilterClass(FilterPlainTrie); /******************************************************************************/ class FilterBucket extends FilterCollection { static getCount(idata) { return filterData[idata+2]; } static forEach(idata, fn) { return super.forEach(filterData[idata+1], fn); } static match(idata) { const icollection = filterData[idata+1]; let iseq = filterData[icollection+1]; while ( iseq !== 0 ) { const iunit = filterData[iseq+0]; if ( filterMatch(iunit) ) { filterData[idata+3] = iunit; return true; } iseq = filterData[iseq+1]; } return false; } static matchAndFetchModifiers(idata, env) { const icollection = filterData[idata+1]; let iseq = filterData[icollection+1]; while ( iseq !== 0 ) { const iunit = filterData[iseq+0]; env.iunit = iunit; filterMatchAndFetchModifiers(iunit, env); iseq = filterData[iseq+1]; } } static unshift(idata, iunit) { super.unshift(filterData[idata+1], iunit); filterData[idata+2] += 1; } static shift(idata) { super.shift(filterData[idata+1]); filterData[idata+2] -= 1; } static create() { const idata = filterDataAllocLen(4); filterData[idata+0] = FilterBucket.fid; // fid filterData[idata+1] = FilterCollection.create(); // icollection filterData[idata+2] = 0; // n filterData[idata+3] = 0; // $matchedUnit return idata; } static logData(idata, details) { filterLogData(filterData[idata+3], details); } static optimize(idata, optimizeBits = 0b11) { if ( (optimizeBits & 0b01) !== 0 ) { if ( filterData[idata+2] >= 3 ) { const iplaintrie = this.optimizePatternTests(idata); if ( iplaintrie !== 0 ) { const icollection = filterData[idata+1]; const i = filterData[icollection+1]; if ( i === 0 ) { return iplaintrie; } this.unshift(idata, iplaintrie); } } } if ( (optimizeBits & 0b10) !== 0 ) { if ( filterData[idata+2] >= 5 ) { const ioptimized = this.optimizeMatch( idata, FilterBucketIfOriginHits, 5 ); if ( ioptimized !== 0 ) { const icollection = filterData[idata+1]; const i = filterData[icollection+1]; if ( i === 0 ) { return ioptimized; } this.unshift(idata, ioptimized); } } if ( filterData[idata+2] >= 5 ) { const ioptimized = this.optimizeMatch( idata, FilterBucketIfRegexHits, 5 ); if ( ioptimized !== 0 ) { const icollection = filterData[idata+1]; const i = filterData[icollection+1]; if ( i === 0 ) { return ioptimized; } this.unshift(idata, ioptimized); } } } return 0; } static optimizePatternTests(idata) { const isrccollection = filterData[idata+1]; let n = 0; let iseq = filterData[isrccollection+1]; do { if ( filterIsBidiTrieable(filterData[iseq+0]) ) { n += 1; } iseq = filterData[iseq+1]; } while ( iseq !== 0 && n < 3 ); if ( n < 3 ) { return 0; } const iplaintrie = FilterPlainTrie.create(); iseq = filterData[isrccollection+1]; let iprev = 0; for (;;) { const iunit = filterData[iseq+0]; const inext = filterData[iseq+1]; if ( filterIsBidiTrieable(iunit) ) { FilterPlainTrie.addUnitToTrie(iplaintrie, iunit); if ( iprev !== 0 ) { filterData[iprev+1] = inext; } else { filterData[isrccollection+1] = inext; } filterData[idata+2] -= 1; } else { iprev = iseq; } if ( inext === 0 ) { break; } iseq = inext; } return iplaintrie; } static optimizeMatch(idata, fc, min) { const isrccollection = filterData[idata+1]; const candidates = []; this.forEach(idata, iunit => { if ( fc.canCoalesce(iunit) === false ) { return; } candidates.push(iunit); }); if ( candidates.length < min ) { return 0; } const idesbucket = FilterBucket.create(); const idescollection = filterData[idesbucket+1]; let coalesced; let isrcseq = filterData[isrccollection+1]; let iprev = 0; for (;;) { const iunit = filterData[isrcseq+0]; const inext = filterData[isrcseq+1]; if ( candidates.includes(iunit) ) { coalesced = fc.coalesce(iunit, coalesced); // move the sequence slot to new bucket filterData[isrcseq+1] = filterData[idescollection+1]; filterData[idescollection+1] = isrcseq; filterData[idesbucket+2] += 1; if ( iprev !== 0 ) { filterData[iprev+1] = inext; } else { filterData[isrccollection+1] = inext; } filterData[idata+2] -= 1; } else { iprev = isrcseq; } if ( inext === 0 ) { break; } isrcseq = inext; } return fc.create(coalesced, idesbucket); } static dumpInfo(idata) { return this.getCount(idata); } } registerFilterClass(FilterBucket); /******************************************************************************/ // Filter bucket objects which have a pre-test method before being treated // as a plain filter bucket -- the pre-test method should be fast as it is // used to avoid having to iterate through the content of the filter bucket. class FilterBucketIf extends FilterBucket { static getCount(idata) { return super.getCount(filterData[idata+1]); } static forEach(idata, fn) { return super.forEach(filterData[idata+1], fn); } static match(idata) { return this.preTest(idata) && super.match(filterData[idata+1]); } static matchAndFetchModifiers(idata, env) { if ( this.preTest(idata) ) { super.matchAndFetchModifiers(filterData[idata+1], env); } } static create(fid, ibucket, itest) { const idata = filterDataAllocLen(3); filterData[idata+0] = fid; filterData[idata+1] = ibucket; filterData[idata+2] = itest; return idata; } static logData(idata, details) { filterLogData(filterData[idata+1], details); } } registerFilterClass(FilterBucketIf); /******************************************************************************/ class FilterBucketIfOriginHits extends FilterBucketIf { static preTest(idata) { return filterMatch(filterData[idata+2]); } static canCoalesce(iunit) { return filterHasOriginHit(iunit); } static coalesce(iunit, coalesced) { if ( coalesced === undefined ) { coalesced = new Set(); } const domainOpt = filterGetDomainOpt(iunit); if ( domainOpt.includes('|') ) { for ( const hn of domainOptIterator.reset(domainOpt) ) { coalesced.add(hn); } } else { coalesced.add(domainOpt); } return coalesced; } static create(coalesced, ibucket) { const units = []; compileFromDomainOpt(coalesced, false, units); const ihittest = filterFromCompiled(units[0]); const ipretest = super.create( FilterBucketIfOriginHits.fid, ibucket, ihittest ); return ipretest; } } registerFilterClass(FilterBucketIfOriginHits); /******************************************************************************/ class FilterBucketIfRegexHits extends FilterBucketIf { static preTest(idata) { return filterRefs[filterData[idata+2]].test($requestURLRaw); } static canCoalesce(iunit) { const fc = filterGetClass(iunit); if ( fc.hasRegexPattern === undefined ) { return false; } if ( fc.hasRegexPattern(iunit) !== true ) { return false; } return true; } static coalesce(iunit, coalesced) { if ( coalesced === undefined ) { coalesced = new Set(); } coalesced.add(filterGetRegexPattern(iunit)); return coalesced; } static create(coalesced, ibucket) { const reString = Array.from(coalesced).join('|'); return super.create( FilterBucketIfRegexHits.fid, ibucket, filterRefAdd(new RegExp(reString, 'i')) ); } static dumpInfo(idata) { return filterRefs[filterData[idata+2]].source; } } registerFilterClass(FilterBucketIfRegexHits); /******************************************************************************/ class FilterStrictParty { // TODO: disregard `www.`? static match(idata) { return ($requestHostname === $docHostname) === (filterData[idata+1] === 0); } static compile(details) { return [ FilterStrictParty.fid, details.strictParty > 0 ? 0 : 1 ]; } static fromCompiled(args) { return filterDataAlloc( args[0], // fid args[1] ); } static dnrFromCompiled(args, rule) { const partyness = args[1] === 0 ? 1 : 3; dnrAddRuleError(rule, `strict${partyness}p not supported`); } static keyFromArgs(args) { return `${args[1]}`; } static logData(idata, details) { details.options.push( filterData[idata+1] === 0 ? 'strict1p' : 'strict3p' ); } } registerFilterClass(FilterStrictParty); /******************************************************************************/ class FilterOnHeaders { static match(idata) { const refs = filterRefs[filterData[idata+1]]; if ( refs.$parsed === null ) { refs.$parsed = sfp.parseHeaderValue(refs.headerOpt); } const { bad, name, not, value } = refs.$parsed; if ( bad ) { return false; } const headerValue = $httpHeaders.lookup(name); if ( headerValue === undefined ) { return false; } if ( value === '' ) { return true; } let { re } = refs.$parsed; if ( re === undefined ) { re = new RegExp(refs.$parsed.reStr, refs.$parsed.reFlags); refs.$parsed.re = re; } return re.test(headerValue) !== not; } static compile(details) { const parsed = sfp.parseHeaderValue(details.optionValues.get('header')); let normalized = parsed.name; if ( parsed.value !== '' ) { normalized += `:${parsed.value}`; } return [ FilterOnHeaders.fid, normalized ]; } static fromCompiled(args) { return filterDataAlloc( args[0], // fid filterRefAdd({ headerOpt: args[1], $parsed: null, }) ); } static dnrFromCompiled(args, rule) { rule.condition ||= {}; const parsed = sfp.parseHeaderValue(args[1]); if ( parsed.bad !== true ) { const value = parsed.isRegex ? toHeaderPattern(parsed.reStr) : parsed.value; if ( value !== undefined ) { const prop = parsed.not ? 'excludedResponseHeaders' : 'responseHeaders'; rule.condition[prop] ||= []; const details = { header: parsed.name, }; if ( value !== '' ) { details.values = [ value ]; } rule.condition[prop].push(details); return; } } dnrAddRuleError(rule, `header="${args[1]}" not supported`); } static logData(idata, details) { const irefs = filterData[idata+1]; const headerOpt = filterRefs[irefs].headerOpt; let opt = 'header'; if ( headerOpt !== '' ) { opt += `=${LogData.requote(headerOpt)}`; } details.options.push(opt); } } registerFilterClass(FilterOnHeaders); /******************************************************************************/ class FilterIPAddress { static TYPE_UNKNOWN = 0; static TYPE_EQUAL = 1; static TYPE_STARTSWITH = 2; static TYPE_LAN = 3; static TYPE_LOOPBACK = 4; static TYPE_RE = 5; static reIPv6IPv4lan = /^::ffff:(7f\w{2}|a\w{2}|a9fe|c0a8):\w+$/; static reIPv6local = /^f[cd]\w{2}:/; static match(idata) { const ipaddr = $requestAddress; if ( ipaddr === '' ) { return false; } const details = filterRefs[filterData[idata+1]]; switch ( details.$type || this.TYPE_UNKNOWN ) { case this.TYPE_LAN: return this.isLAN(ipaddr); case this.TYPE_LOOPBACK: return this.isLoopback(ipaddr); case this.TYPE_EQUAL: case this.TYPE_STARTSWITH: case this.TYPE_RE: return details.$pattern.test(ipaddr); default: break; } const { pattern } = details; if ( pattern === 'lan' ) { details.$type = this.TYPE_LAN; } else if ( pattern === 'loopback' ) { details.$type = this.TYPE_LOOPBACK; } else if ( pattern.startsWith('/') && pattern.endsWith('/') ) { details.$type = this.TYPE_RE; details.$pattern = new RegExp(pattern.slice(1, -1), 'm'); } else if ( pattern.endsWith('*') ) { details.$type = this.TYPE_STARTSWITH; details.$pattern = new RegExp(`^${restrFromPlainPattern(pattern.slice(0, -1))}`, 'm'); } else { details.$type = this.TYPE_EQUAL; details.$pattern = new RegExp(`^${restrFromPlainPattern(pattern)}$`, 'm'); } return this.match(idata); } // https://github.com/uBlockOrigin/uAssets/blob/master/filters/lan-block.txt // https://en.wikipedia.org/wiki/Reserved_IP_addresses // `ipaddr` is assumed well-formed static isLAN(ipaddr) { const c0 = ipaddr.charCodeAt(0); // ipv4 if ( c0 === 0x30 /* 0 */ ) { return ipaddr.startsWith('0.'); } if ( c0 === 0x31 /* 1 */ ) { if ( ipaddr.startsWith('10.') ) { return true; } if ( ipaddr.startsWith('127.') ) { return true; } if ( ipaddr.startsWith('169.254.') ) { return true; } if ( ipaddr.startsWith('172.') ) { const v = parseInt(ipaddr.slice(4), 10); return v >= 16 && v <= 31; } return ipaddr.startsWith('192.168.'); } // ipv6 if ( c0 === 0x3A /* : */ ) { if ( ipaddr.startsWith('::') === false ) { return false; } if ( ipaddr === '::' || ipaddr === '::1' ) { return true; } if ( ipaddr.startsWith('::ffff:') === false ) { return false; } return this.reIPv6IPv4lan.test(ipaddr); } if ( c0 === 0x66 /* f */ ) { return this.reIPv6local.test(ipaddr); } return false; } static isLoopback(ipaddr) { return ipaddr === '127.0.0.1' || ipaddr === '::1'; } static compile(details) { return [ FilterIPAddress.fid, details.optionValues.get('ipaddress') ]; } static fromCompiled(args) { const pattern = args[1]; const details = { pattern }; return filterDataAlloc(args[0], filterRefAdd(details)); } static dnrFromCompiled(args, rule) { dnrAddRuleError(rule, `"ipaddress=${args[1]}" not supported`); } static logData(idata, details) { const irefs = filterData[idata+1]; details.options.push(`ipaddress=${LogData.requote(filterRefs[irefs].pattern)}`); } } registerFilterClass(FilterIPAddress); /******************************************************************************/ class FilterMessage { static match() { return true; } static compile(details) { return [ FilterMessage.fid, encodeURIComponent(details.optionValues.get('reason')), ]; } static fromCompiled(args) { const msg = args[1]; return filterDataAlloc(args[0], bidiTrie.storeString(msg), msg.length); } static keyFromArgs(args) { return `${args[1]}`; } static logData(idata, details) { const reason = decodeURIComponent( bidiTrie.extractString(filterData[idata+1], filterData[idata+2]) ); details.reason = reason; details.options.push(`reason=${reason}`); } } registerFilterClass(FilterMessage); /******************************************************************************/ /******************************************************************************/ // https://github.com/gorhill/uBlock/issues/2630 // Slice input URL into a list of safe-integer token values, instead of a list // of substrings. The assumption is that with dealing only with numeric // values, less underlying memory allocations, and also as a consequence // less work for the garbage collector down the road. // Another assumption is that using a numeric-based key value for Map() is // more efficient than string-based key value (but that is something I would // have to benchmark). // Benchmark for string-based tokens vs. safe-integer token values: // https://gorhill.github.io/obj-vs-set-vs-map/tokenize-to-str-vs-to-int.html // http://www.cse.yorku.ca/~oz/hash.html#djb2 // Use above algorithm to generate token hash. const urlTokenizer = new (class { constructor() { this._chars = '0123456789%abcdefghijklmnopqrstuvwxyz'; this._validTokenChars = new Uint8Array(128); for ( let i = 0, n = this._chars.length; i < n; i++ ) { this._validTokenChars[this._chars.charCodeAt(i)] = i + 1; } this._urlIn = ''; this._urlOut = ''; this._tokenized = false; this._hasQuery = 0; // https://www.reddit.com/r/uBlockOrigin/comments/dzw57l/ // Remember: 1 token needs two slots this._tokens = new Uint32Array(bidiTrie.haystack.length + 16); this.knownTokens = new Uint8Array(65536); this.resetKnownTokens(); } setURL(url) { if ( url !== this._urlIn ) { this._urlIn = url; this._urlOut = url.toLowerCase(); this._hasQuery = 0; this._tokenized = false; } return this._urlOut; } resetKnownTokens() { this.knownTokens.fill(0); this.addKnownToken(DOT_TOKEN_HASH); this.addKnownToken(ANY_TOKEN_HASH); this.addKnownToken(ANY_HTTPS_TOKEN_HASH); this.addKnownToken(ANY_HTTP_TOKEN_HASH); this.addKnownToken(NO_TOKEN_HASH); } addKnownToken(th) { this.knownTokens[th & 0xFFFF] = 1; } // Tokenize on demand. getTokens() { if ( this._tokenized ) { return this._tokens; } let i = this._tokenize(); this._tokens[i+0] = ANY_TOKEN_HASH; this._tokens[i+1] = 0; i += 2; if ( this._urlOut.startsWith('https://') ) { this._tokens[i+0] = ANY_HTTPS_TOKEN_HASH; this._tokens[i+1] = 0; i += 2; } else if ( this._urlOut.startsWith('http://') ) { this._tokens[i+0] = ANY_HTTP_TOKEN_HASH; this._tokens[i+1] = 0; i += 2; } this._tokens[i+0] = NO_TOKEN_HASH; this._tokens[i+1] = 0; this._tokens[i+2] = INVALID_TOKEN_HASH; this._tokenized = true; return this._tokens; } hasQuery() { if ( this._hasQuery === 0 ) { const i = this._urlOut.indexOf('?'); this._hasQuery = i !== -1 ? i + 1 : -1; } return this._hasQuery > 0; } // http://www.cse.yorku.ca/~oz/hash.html#djb2 tokenHashFromString(s) { const l = s.length; if ( l === 0 ) { return EMPTY_TOKEN_HASH; } const vtc = this._validTokenChars; let th = vtc[s.charCodeAt(0)]; for ( let i = 1; i !== 7 /* MAX_TOKEN_LENGTH */ && i !== l; i++ ) { th = (th << 5) + th ^ vtc[s.charCodeAt(i)]; } return th & 0xFFFFFFF; } stringFromTokenHash(th) { if ( th === 0 ) { return ''; } return th.toString(16); } toSelfie() { return this.knownTokens; } fromSelfie(selfie) { this.knownTokens = selfie; } // https://github.com/chrisaljoudi/uBlock/issues/1118 // We limit to a maximum number of tokens. _tokenize() { const tokens = this._tokens; const url = this._urlOut; const l = bidiTrie.setHaystackLen(url.length); if ( l === 0 ) { return 0; } let j = 0; let hasq = -1; mainLoop: { const knownTokens = this.knownTokens; const vtc = this._validTokenChars; const charCodes = bidiTrie.haystack; let i = 0, n = 0, ti = 0, th = 0; for (;;) { for (;;) { if ( i === l ) { break mainLoop; } const cc = url.charCodeAt(i); charCodes[i] = cc; i += 1; th = vtc[cc]; if ( th !== 0 ) { break; } if ( cc === 0x3F /* '?' */ ) { hasq = i; } } ti = i - 1; n = 1; for (;;) { if ( i === l ) { break; } const cc = url.charCodeAt(i); charCodes[i] = cc; i += 1; const v = vtc[cc]; if ( v === 0 ) { if ( cc === 0x3F /* '?' */ ) { hasq = i; } break; } if ( n === 7 /* MAX_TOKEN_LENGTH */ ) { continue; } th = (th << 5) + th ^ v; n += 1; } if ( knownTokens[th & 0xFFFF] !== 0 ) { tokens[j+0] = th & 0xFFFFFFF; tokens[j+1] = ti; j += 2; } } } this._hasQuery = hasq; return j; } })(); /******************************************************************************/ /******************************************************************************/ class FilterCompiler { constructor(other = undefined) { if ( other !== undefined ) { return Object.assign(this, other); } this.reTokens = /[%0-9A-Za-z]+/g; this.optionValues = new Map(); this.tokenIdToNormalizedType = new Map([ [ sfp.NODE_TYPE_NET_OPTION_NAME_CNAME, bitFromType('cname') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_CSS, bitFromType('stylesheet') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_DOC, bitFromType('main_frame') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_FONT, bitFromType('font') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_FRAME, bitFromType('sub_frame') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_GENERICBLOCK, bitFromType('unsupported') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_GHIDE, bitFromType('generichide') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_IMAGE, bitFromType('image') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_INLINEFONT, bitFromType('inline-font') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_INLINESCRIPT, bitFromType('inline-script') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_MEDIA, bitFromType('media') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_OBJECT, bitFromType('object') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_OTHER, bitFromType('other') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_PING, bitFromType('ping') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_POPUNDER, bitFromType('popunder') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_POPUP, bitFromType('popup') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_SCRIPT, bitFromType('script') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_SHIDE, bitFromType('specifichide') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_XHR, bitFromType('xmlhttprequest') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_WEBRTC, bitFromType('unsupported') ], [ sfp.NODE_TYPE_NET_OPTION_NAME_WEBSOCKET, bitFromType('websocket') ], ]); this.modifierIdToNormalizedId = new Map([ [ sfp.NODE_TYPE_NET_OPTION_NAME_CSP, MODIFIER_TYPE_CSP ], [ sfp.NODE_TYPE_NET_OPTION_NAME_PERMISSIONS, MODIFIER_TYPE_PERMISSIONS ], [ sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT, MODIFIER_TYPE_REDIRECT ], [ sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE, MODIFIER_TYPE_REDIRECTRULE ], [ sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM, MODIFIER_TYPE_REMOVEPARAM ], [ sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM, MODIFIER_TYPE_URLTRANSFORM ], [ sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE, MODIFIER_TYPE_REPLACE ], [ sfp.NODE_TYPE_NET_OPTION_NAME_URLSKIP, MODIFIER_TYPE_URLSKIP ], ]); // These top 100 "bad tokens" are collated using the "miss" histogram // from tokenHistograms(). The "score" is their occurrence among the // 200K+ URLs used in the benchmark and executed against default // filter lists. this.badTokens = new Map([ [ 'https',123617 ], [ 'com',76987 ], [ 'js',43620 ], [ 'www',33129 ], [ 'jpg',32221 ], [ 'images',31812 ], [ 'css',19715 ], [ 'png',19140 ], [ 'static',15724 ], [ 'net',15239 ], [ 'de',13155 ], [ 'img',11109 ], [ 'assets',10746 ], [ 'min',7807 ], [ 'cdn',7568 ], [ 'content',6900 ], [ 'wp',6444 ], [ 'fonts',6095 ], [ 'svg',5976 ], [ 'http',5813 ], [ 'ssl',5735 ], [ 'amazon',5440 ], [ 'ru',5427 ], [ 'fr',5199 ], [ 'facebook',5178 ], [ 'en',5146 ], [ 'image',5028 ], [ 'html',4837 ], [ 'media',4833 ], [ 'co',4783 ], [ 'php',3972 ], [ '2019',3943 ], [ 'org',3924 ], [ 'jquery',3531 ], [ '02',3438 ], [ 'api',3382 ], [ 'gif',3350 ], [ 'eu',3322 ], [ 'prod',3289 ], [ 'woff2',3200 ], [ 'logo',3194 ], [ 'themes',3107 ], [ 'icon',3048 ], [ 'google',3026 ], [ 'v1',3019 ], [ 'uploads',2963 ], [ 'googleapis',2860 ], [ 'v3',2816 ], [ 'tv',2762 ], [ 'icons',2748 ], [ 'core',2601 ], [ 'gstatic',2581 ], [ 'ac',2509 ], [ 'utag',2466 ], [ 'id',2459 ], [ 'ver',2448 ], [ 'rsrc',2387 ], [ 'files',2361 ], [ 'uk',2357 ], [ 'us',2271 ], [ 'pl',2262 ], [ 'common',2205 ], [ 'public',2076 ], [ '01',2016 ], [ 'na',1957 ], [ 'v2',1954 ], [ '12',1914 ], [ 'thumb',1895 ], [ 'web',1853 ], [ 'ui',1841 ], [ 'default',1825 ], [ 'main',1737 ], [ 'false',1715 ], [ '2018',1697 ], [ 'embed',1639 ], [ 'player',1634 ], [ 'dist',1599 ], [ 'woff',1593 ], [ 'global',1593 ], [ 'json',1572 ], [ '11',1566 ], [ '600',1559 ], [ 'app',1556 ], [ 'styles',1533 ], [ 'plugins',1526 ], [ '274',1512 ], [ 'random',1505 ], [ 'sites',1505 ], [ 'imasdk',1501 ], [ 'bridge3',1501 ], [ 'news',1496 ], [ 'width',1494 ], [ 'thumbs',1485 ], [ 'ttf',1470 ], [ 'ajax',1463 ], [ 'user',1454 ], [ 'scripts',1446 ], [ 'twitter',1440 ], [ 'crop',1431 ], [ 'new',1412], ]); this.reset(); } reset() { this.action = BLOCK_REALM; // anchor: bit vector // 0000 (0x0): no anchoring // 0001 (0x1): anchored to the end of the URL. // 0010 (0x2): anchored to the start of the URL. // 0011 (0x3): anchored to the start and end of the URL. // 0100 (0x4): anchored to the hostname of the URL. // 0101 (0x5): anchored to the hostname and end of the URL. this.anchor = 0; this.badFilter = false; this.error = undefined; this.modifyType = undefined; this.modifyValue = undefined; this.pattern = ''; this.party = ANYPARTY_REALM; this.optionUnitBits = 0; this.optionValues.clear(); this.isPureHostname = false; this.isGeneric = false; this.isRegex = false; this.strictParty = 0; this.token = '*'; this.tokenHash = NO_TOKEN_HASH; this.tokenBeg = 0; this.typeBits = 0; this.notTypeBits = 0; this.methodBits = 0; this.notMethodBits = 0; return this; } start(/* writer */) { } finish(/* writer */) { } clone() { return new FilterCompiler(this); } normalizeRegexSource(s) { try { const re = new RegExp(s); return re.source; } catch { } return ''; } processMethodOption(value) { for ( const method of value.split('|') ) { if ( method.charCodeAt(0) === 0x7E /* '~' */ ) { const bit = FilteringContext.getMethod(method.slice(1)) || 0; if ( bit === 0 ) { continue; } this.notMethodBits |= bit; } else { const bit = FilteringContext.getMethod(method) || 0; if ( bit === 0 ) { continue; } this.methodBits |= bit; } } this.methodBits &= ~this.notMethodBits; } // https://github.com/chrisaljoudi/uBlock/issues/589 // Be ready to handle multiple negated types processTypeOption(id, not) { if ( id !== -1 ) { const typeBit = this.tokenIdToNormalizedType.get(id); if ( not ) { this.notTypeBits |= typeBit; } else { this.typeBits |= typeBit; } return; } // `all` option if ( not ) { this.notTypeBits |= allTypesBits; } else { this.typeBits |= allTypesBits; } } processPartyOption(firstParty, not) { if ( not ) { firstParty = !firstParty; } this.party |= firstParty ? FIRSTPARTY_REALM : THIRDPARTY_REALM; } processHostnameList(iter, out = []) { let i = 0; for ( const { hn, not, bad } of iter ) { if ( bad ) { return ''; } out[i] = not ? `~${hn}` : hn; i += 1; } out.length = i; return i === 1 ? out[0] : out.join('|'); } processModifierOption(modifier, value) { if ( this.modifyType !== undefined ) { return false; } const normalized = this.modifierIdToNormalizedId.get(modifier); if ( normalized === undefined ) { return false; } this.modifyType = normalized; this.modifyValue = value || ''; return true; } processCspOption(value) { this.modifyType = MODIFIER_TYPE_CSP; this.modifyValue = value || ''; this.optionUnitBits |= MODIFY_BIT; return true; } processOptionWithValue(parser, id) { switch ( id ) { case sfp.NODE_TYPE_NET_OPTION_NAME_CSP: if ( this.processCspOption(parser.getNetOptionValue(id)) === false ) { return false; } break; case sfp.NODE_TYPE_NET_OPTION_NAME_DENYALLOW: { const value = this.processHostnameList( parser.getNetFilterDenyallowOptionIterator() ); if ( value === '' ) { return false; } this.optionValues.set('denyallow', value); this.optionUnitBits |= DENYALLOW_BIT; break; } case sfp.NODE_TYPE_NET_OPTION_NAME_FROM: { const iter = parser.getNetFilterFromOptionIterator(); const list = []; const value = this.processHostnameList(iter, list); if ( value === '' ) { return false; } this.optionValues.set('from', value); this.optionValues.set('fromList', list); this.optionUnitBits |= FROM_BIT; break; } case sfp.NODE_TYPE_NET_OPTION_NAME_HEADER: { this.optionValues.set('header', parser.getNetOptionValue(id) || ''); this.optionUnitBits |= HEADER_BIT; break; } case sfp.NODE_TYPE_NET_OPTION_NAME_IPADDRESS: this.optionValues.set('ipaddress', parser.getNetOptionValue(id) || ''); this.optionUnitBits |= IPADDRESS_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_METHOD: this.processMethodOption(parser.getNetOptionValue(id)); this.optionUnitBits |= METHOD_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_PERMISSIONS: case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE: case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM: case sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE: case sfp.NODE_TYPE_NET_OPTION_NAME_URLSKIP: case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) { return false; } this.optionUnitBits |= MODIFY_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_REASON: this.optionValues.set('reason', parser.getNetOptionValue(id)); this.optionUnitBits |= MESSAGE_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT: { const actualId = this.action === ALLOW_REALM ? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE : id; if ( this.processModifierOption(actualId, parser.getNetOptionValue(id)) === false ) { return false; } this.optionUnitBits |= MODIFY_BIT; break; } case sfp.NODE_TYPE_NET_OPTION_NAME_TO: { const iter = parser.getNetFilterToOptionIterator(); const list = []; const value = this.processHostnameList(iter, list); if ( value === '' ) { return false; } this.optionValues.set('to', value); this.optionValues.set('toList', list); this.optionUnitBits |= TO_BIT; break; } default: break; } return true; } process(parser) { // important! this.reset(); if ( parser.hasError() ) { return this.FILTER_INVALID; } if ( parser.isException() ) { this.action = ALLOW_REALM; } if ( parser.isLeftHnAnchored() ) { this.anchor |= 0b100; } else if ( parser.isLeftAnchored() ) { this.anchor |= 0b010; } if ( parser.isRightAnchored() ) { this.anchor |= 0b001; } this.pattern = parser.getNetPattern(); if ( parser.isHostnamePattern() ) { this.isPureHostname = true; } else if ( parser.isGenericPattern() ) { this.isGeneric = true; } else if ( parser.isRegexPattern() ) { this.isRegex = true; } for ( const type of parser.getNodeTypes() ) { switch ( type ) { case sfp.NODE_TYPE_NET_OPTION_NAME_1P: this.processPartyOption(true, parser.isNegatedOption(type)); break; case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT1P: this.strictParty = this.strictParty === -1 ? 0 : 1; this.optionUnitBits |= STRICT_PARTY_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_3P: this.processPartyOption(false, parser.isNegatedOption(type)); break; case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT3P: this.strictParty = this.strictParty === 1 ? 0 : -1; this.optionUnitBits |= STRICT_PARTY_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_ALL: this.processTypeOption(-1); break; case sfp.NODE_TYPE_NET_OPTION_NAME_BADFILTER: this.badFilter = true; break; case sfp.NODE_TYPE_NET_OPTION_NAME_CNAME: case sfp.NODE_TYPE_NET_OPTION_NAME_CSS: case sfp.NODE_TYPE_NET_OPTION_NAME_DOC: case sfp.NODE_TYPE_NET_OPTION_NAME_FONT: case sfp.NODE_TYPE_NET_OPTION_NAME_FRAME: case sfp.NODE_TYPE_NET_OPTION_NAME_GENERICBLOCK: case sfp.NODE_TYPE_NET_OPTION_NAME_GHIDE: case sfp.NODE_TYPE_NET_OPTION_NAME_IMAGE: case sfp.NODE_TYPE_NET_OPTION_NAME_INLINEFONT: case sfp.NODE_TYPE_NET_OPTION_NAME_INLINESCRIPT: case sfp.NODE_TYPE_NET_OPTION_NAME_MEDIA: case sfp.NODE_TYPE_NET_OPTION_NAME_OBJECT: case sfp.NODE_TYPE_NET_OPTION_NAME_OTHER: case sfp.NODE_TYPE_NET_OPTION_NAME_PING: case sfp.NODE_TYPE_NET_OPTION_NAME_POPUNDER: case sfp.NODE_TYPE_NET_OPTION_NAME_POPUP: case sfp.NODE_TYPE_NET_OPTION_NAME_SCRIPT: case sfp.NODE_TYPE_NET_OPTION_NAME_SHIDE: case sfp.NODE_TYPE_NET_OPTION_NAME_XHR: case sfp.NODE_TYPE_NET_OPTION_NAME_WEBRTC: case sfp.NODE_TYPE_NET_OPTION_NAME_WEBSOCKET: this.processTypeOption(type, parser.isNegatedOption(type)); break; case sfp.NODE_TYPE_NET_OPTION_NAME_CSP: case sfp.NODE_TYPE_NET_OPTION_NAME_DENYALLOW: case sfp.NODE_TYPE_NET_OPTION_NAME_FROM: case sfp.NODE_TYPE_NET_OPTION_NAME_HEADER: case sfp.NODE_TYPE_NET_OPTION_NAME_IPADDRESS: case sfp.NODE_TYPE_NET_OPTION_NAME_METHOD: case sfp.NODE_TYPE_NET_OPTION_NAME_PERMISSIONS: case sfp.NODE_TYPE_NET_OPTION_NAME_REASON: case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT: case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE: case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM: case sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE: case sfp.NODE_TYPE_NET_OPTION_NAME_TO: case sfp.NODE_TYPE_NET_OPTION_NAME_URLSKIP: case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: if ( this.processOptionWithValue(parser, type) === false ) { return this.FILTER_INVALID; } break; case sfp.NODE_TYPE_NET_OPTION_NAME_EHIDE: { const not = parser.isNegatedOption(type); this.processTypeOption(sfp.NODE_TYPE_NET_OPTION_NAME_SHIDE, not); this.processTypeOption(sfp.NODE_TYPE_NET_OPTION_NAME_GHIDE, not); break; } case sfp.NODE_TYPE_NET_OPTION_NAME_EMPTY: { const id = this.action === ALLOW_REALM ? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE : sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT; if ( this.processModifierOption(id, 'empty') === false ) { return this.FILTER_INVALID; } this.optionUnitBits |= MODIFY_BIT; break; } case sfp.NODE_TYPE_NET_OPTION_NAME_IMPORTANT: this.optionUnitBits |= IMPORTANT_BIT; this.action = BLOCKIMPORTANT_REALM; break; case sfp.NODE_TYPE_NET_OPTION_NAME_MATCHCASE: this.optionValues.set('match-case', true); break; case sfp.NODE_TYPE_NET_OPTION_NAME_MP4: { const id = this.action === ALLOW_REALM ? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE : sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT; if ( this.processModifierOption(id, 'noopmp4-1s') === false ) { return this.FILTER_INVALID; } this.processTypeOption(sfp.NODE_TYPE_NET_OPTION_NAME_MEDIA, false); this.optionUnitBits |= MODIFY_BIT; break; } default: break; } } if ( this.party === ALLPARTIES_REALM ) { this.party = ANYPARTY_REALM; } // Negated network types? Toggle on all network type bits. // Negated non-network types can only toggle themselves. // // https://github.com/gorhill/uBlock/issues/2385 // Toggle on all network types if: // - at least one network type is negated; or // - no network type is present -- i.e. all network types are // implicitly toggled on if ( this.notTypeBits !== 0 ) { if ( (this.typeBits && allNetworkTypesBits) === allNetworkTypesBits ) { this.typeBits &= ~this.notTypeBits | allNetworkTypesBits; } else { this.typeBits &= ~this.notTypeBits; } this.optionUnitBits |= NOT_TYPE_BIT; } // CSP/permissions options implicitly apply only to // document/subdocument. if ( this.modifyType === MODIFIER_TYPE_CSP || this.modifyType === MODIFIER_TYPE_PERMISSIONS ) { if ( this.typeBits === 0 ) { this.processTypeOption(sfp.NODE_TYPE_NET_OPTION_NAME_DOC, false); this.processTypeOption(sfp.NODE_TYPE_NET_OPTION_NAME_FRAME, false); } } // https://github.com/gorhill/uBlock/issues/2283 // Abort if type is only for unsupported types, otherwise // toggle off `unsupported` bit. if ( this.typeBits & unsupportedTypeBit ) { this.typeBits &= ~unsupportedTypeBit; if ( this.typeBits === 0 ) { return this.FILTER_UNSUPPORTED; } } // Plain hostname? (from HOSTS file) if ( this.isPureHostname && parser.hasOptions() === false ) { this.anchor |= 0b100; return this.FILTER_OK; } // regex? if ( this.isRegex ) { return this.FILTER_OK; } if ( this.pattern.length > 1024 ) { return this.FILTER_UNSUPPORTED; } return this.FILTER_OK; } // Given a string, find a good token. Tokens which are too generic, // i.e. very common with a high probability of ending up as a miss, // are not good. Avoid if possible. This has a significant positive // impact on performance. // // For pattern-less removeparam filters, try to derive a pattern from // the removeparam value. makeToken() { if ( this.pattern === '*' ) { if ( this.modifyType !== MODIFIER_TYPE_REMOVEPARAM ) { return; } return this.extractTokenFromQuerypruneValue(); } if ( this.isRegex ) { return this.extractTokenFromRegex(this.pattern); } this.extractTokenFromPattern(this.pattern); } // Note: a one-char token is better than a documented bad token. extractTokenFromPattern(pattern) { this.reTokens.lastIndex = 0; let bestMatch = null; let bestBadness = 0x7FFFFFFF; for (;;) { const match = this.reTokens.exec(pattern); if ( match === null ) { break; } const token = match[0]; const badness = token.length > 1 ? this.badTokens.get(token) || 0 : 1; if ( badness >= bestBadness ) { continue; } if ( match.index > 0 ) { const c = pattern.charCodeAt(match.index - 1); if ( c === 0x2A /* '*' */ ) { continue; } } if ( token.length < MAX_TOKEN_LENGTH ) { const lastIndex = this.reTokens.lastIndex; if ( lastIndex < pattern.length ) { const c = pattern.charCodeAt(lastIndex); if ( c === 0x2A /* '*' */ ) { continue; } } } bestMatch = match; if ( badness === 0 ) { break; } bestBadness = badness; } if ( bestMatch !== null ) { this.token = bestMatch[0]; this.tokenHash = urlTokenizer.tokenHashFromString(this.token); this.tokenBeg = bestMatch.index; } } // https://github.com/gorhill/uBlock/issues/2781 // For efficiency purpose, try to extract a token from a regex-based // filter. // https://github.com/uBlockOrigin/uBlock-issues/issues/1145#issuecomment-657036902 // Mind `\b` directives: `/\bads\b/` should result in token being `ads`, // not `bads`. extractTokenFromRegex(pattern) { pattern = tokenizableStrFromRegex(pattern); this.reTokens.lastIndex = 0; let bestToken; let bestBadness = 0x7FFFFFFF; for (;;) { const matches = this.reTokens.exec(pattern); if ( matches === null ) { break; } const { 0: token, index } = matches; if ( index === 0 || pattern.charAt(index - 1) === '\x01' ) { continue; } const { lastIndex } = this.reTokens; if ( token.length < MAX_TOKEN_LENGTH && ( lastIndex === pattern.length || pattern.charAt(lastIndex) === '\x01' ) ) { continue; } const badness = token.length > 1 ? this.badTokens.get(token) || 0 : 1; if ( badness < bestBadness ) { bestToken = token; if ( badness === 0 ) { break; } bestBadness = badness; } } if ( bestToken !== undefined ) { this.token = bestToken.toLowerCase(); this.tokenHash = urlTokenizer.tokenHashFromString(this.token); } } // https://github.com/uBlockOrigin/uAssets/discussions/14683#discussioncomment-3559284 // If the removeparam value is a regex, unescape escaped commas extractTokenFromQuerypruneValue() { const pattern = this.modifyValue; if ( pattern === '*' || pattern.charCodeAt(0) === 0x7E /* '~' */ ) { return; } const match = /^\/(.+)\/i?$/.exec(pattern); if ( match !== null ) { return this.extractTokenFromRegex( match[1].replace(/(\{\d*)\\,/, '$1,') ); } if ( pattern.startsWith('|') ) { return this.extractTokenFromRegex('\\b' + pattern.slice(1)); } this.extractTokenFromPattern(pattern.toLowerCase()); } hasNoOptionUnits() { return this.optionUnitBits === 0; } isJustOrigin() { if ( this.optionUnitBits !== FROM_BIT ) { return false; } if ( this.isRegex ) { return false; } if ( /[/~]/.test(this.optionValues.get('from')) ) { return false; } if ( this.pattern === '*' ) { return true; } if ( this.anchor !== 0b010 ) { return false; } if ( /^(?:http[s*]?:(?:\/\/)?)$/.test(this.pattern) ) { return true; } return false; } domainIsEntity(s) { const l = s.length; return l > 2 && s.charCodeAt(l-1) === 0x2A /* '*' */ && s.charCodeAt(l-2) === 0x2E /* '.' */; } compile(parser, writer) { const r = this.process(parser); // Ignore non-static network filters if ( r === this.FILTER_INVALID ) { return false; } // Ignore filters with unsupported options if ( r === this.FILTER_UNSUPPORTED ) { const who = writer.properties.get('name') || '?'; this.error = `Invalid network filter in ${who}: ${parser.raw}`; return false; } writer.select( this.badFilter ? 'NETWORK_FILTERS:BAD' : 'NETWORK_FILTERS:GOOD' ); // Reminder: // `redirect=` is a combination of a `redirect-rule` filter and a // block filter. if ( this.modifyType === MODIFIER_TYPE_REDIRECT ) { this.modifyType = MODIFIER_TYPE_REDIRECTRULE; // Do not generate block rule when compiling to DNR ruleset if ( parser.options.toDNR !== true ) { const parsedBlock = this.clone(); parsedBlock.modifyType = undefined; parsedBlock.optionUnitBits &= ~MODIFY_BIT; parsedBlock.compileToFilter(writer); } } this.compileToFilter(writer); return true; } compileToFilter(writer) { // Pure hostnames, use more efficient dictionary lookup if ( this.isPureHostname && this.hasNoOptionUnits() ) { this.tokenHash = DOT_TOKEN_HASH; this.compileToAtomicFilter(this.pattern, writer); return; } this.makeToken(); // Special pattern/option cases: // - `*$domain=...` // - `|http://$domain=...` // - `|https://$domain=...` // The semantic of "just-origin" filters is that contrary to normal // filters, the original filter is split into as many filters as there // are entries in the `domain=` option. if ( this.isJustOrigin() ) { if ( this.pattern === '*' || this.pattern.startsWith('http*') ) { this.tokenHash = ANY_TOKEN_HASH; } else if /* 'https:' */ ( this.pattern.startsWith('https') ) { this.tokenHash = ANY_HTTPS_TOKEN_HASH; } else /* 'http:' */ { this.tokenHash = ANY_HTTP_TOKEN_HASH; } for ( const hn of this.optionValues.get('fromList') ) { this.compileToAtomicFilter(hn, writer); } return; } const units = []; // Pattern const patternClass = this.compilePattern(units); // Anchor: must never appear before pattern unit if ( (this.anchor & 0b100) !== 0 ) { if ( this.isPureHostname ) { units.push(FilterAnchorHn.compile()); } else { units.push(FilterAnchorHnLeft.compile()); } } else if ( (this.anchor & 0b010) !== 0 ) { units.push(FilterAnchorLeft.compile()); } if ( (this.anchor & 0b001) !== 0 ) { units.push(FilterAnchorRight.compile()); } // Method(s) if ( this.methodBits !== 0 || this.notMethodBits !== 0 ) { units.push(FilterMethod.compile(this)); } // Not types if ( this.notTypeBits !== 0 ) { units.push(FilterNotType.compile(this)); } // Strict partiness if ( this.strictParty !== 0 ) { units.push(FilterStrictParty.compile(this)); } // Origin if ( (this.optionUnitBits & FROM_BIT) !== 0 ) { compileFromDomainOpt( this.optionValues.get('fromList'), units.length !== 0 && patternClass.isSlow === true, units ); } // Destination if ( (this.optionUnitBits & TO_BIT) !== 0 ) { compileToDomainOpt( this.optionValues.get('toList'), units.length !== 0 && patternClass.isSlow === true, units ); } // Deny-allow if ( (this.optionUnitBits & DENYALLOW_BIT) !== 0 ) { units.push(FilterDenyAllow.compile(this)); } // IP address if ( (this.optionUnitBits & IPADDRESS_BIT) !== 0 ) { units.push(FilterIPAddress.compile(this)); } // Header if ( (this.optionUnitBits & HEADER_BIT) !== 0 ) { units.push(FilterOnHeaders.compile(this)); this.action |= HEADERS_REALM; } // Message if ( (this.optionUnitBits & MESSAGE_BIT) !== 0 ) { units.push(FilterMessage.compile(this)); } // Important // // IMPORTANT: must always appear at the end of the sequence, so as to // ensure $isBlockImportant is set only for matching filters. if ( (this.optionUnitBits & IMPORTANT_BIT) !== 0 ) { units.push(FilterImportant.compile()); } // Modifier // // IMPORTANT: the modifier unit MUST always appear first in a sequence if ( this.modifyType !== undefined ) { units.unshift(FilterModifier.compile(this)); this.action = (this.action & ~BLOCKALLOW_REALM) | modifierBitsFromType.get(this.modifyType); } const fdata = units.length === 1 ? units[0] : FilterCompositeAll.compile(units); this.compileToAtomicFilter(fdata, writer); if ( (this.optionUnitBits & IPADDRESS_BIT) !== 0 ) { if ( (this.action & HEADERS_REALM) !== 0 ) { return; } this.action |= HEADERS_REALM; this.compileToAtomicFilter(fdata, writer); } } compilePattern(units) { if ( this.isRegex ) { units.push(FilterRegex.compile(this)); return FilterRegex; } if ( this.pattern === '*' ) { units.push(FilterPatternAny.compile()); return FilterPatternAny; } if ( this.tokenHash === NO_TOKEN_HASH ) { units.push(FilterPatternGeneric.compile(this)); return FilterPatternGeneric; } if ( this.pattern.includes('*') === false ) { const caretPos = this.pattern.indexOf('^'); if ( caretPos === -1 ) { units.push(FilterPatternPlain.compile(this)); return FilterPatternPlain; } if ( caretPos === (this.pattern.length - 1) ) { this.pattern = this.pattern.slice(0, -1); units.push(FilterPatternPlain.compile(this)); units.push(FilterTrailingSeparator.compile()); return FilterPatternPlain; } } units.push(FilterPatternGeneric.compile(this)); return FilterPatternGeneric; } compileToAtomicFilter(fdata, writer) { const catBits = this.action | this.party; let { typeBits } = this; // Typeless if ( typeBits === 0 ) { writer.push([ catBits, this.tokenHash, fdata ]); return; } // If all network types are set, create a typeless filter. Excluded // network types are tested at match time, se we act as if they are // set. if ( (typeBits & allNetworkTypesBits) === allNetworkTypesBits ) { writer.push([ catBits, this.tokenHash, fdata ]); typeBits &= ~allNetworkTypesBits; if ( typeBits === 0 ) { return; } } // One filter per specific types let bitOffset = 1; do { if ( typeBits & 1 ) { writer.push([ catBits | (bitOffset << TYPE_REALM_OFFSET), this.tokenHash, fdata ]); } bitOffset += 1; typeBits >>>= 1; } while ( typeBits !== 0 ); } } // These are to quickly test whether a filter is composite const FROM_BIT = 0b00000000001; const TO_BIT = 0b00000000010; const DENYALLOW_BIT = 0b00000000100; const HEADER_BIT = 0b00000001000; const STRICT_PARTY_BIT = 0b00000010000; const MODIFY_BIT = 0b00000100000; const NOT_TYPE_BIT = 0b00001000000; const IMPORTANT_BIT = 0b00010000000; const METHOD_BIT = 0b00100000000; const IPADDRESS_BIT = 0b01000000000; const MESSAGE_BIT = 0b10000000000 FilterCompiler.prototype.FILTER_OK = 0; FilterCompiler.prototype.FILTER_INVALID = 1; FilterCompiler.prototype.FILTER_UNSUPPORTED = 2; /******************************************************************************/ /******************************************************************************/ const StaticNetFilteringEngine = function() { this.compilerVersion = '10'; this.selfieVersion = '10'; this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH; this.optimizeTaskId = undefined; // As long as CategoryCount is reasonably low, we will use an array to // store buckets using category bits as index. If ever CategoryCount // becomes too large, we can just go back to using a Map. this.bitsToBucket = new Map(); this.goodFilters = new Set(); this.badFilters = new Set(); this.unitsToOptimize = []; this.reset(); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.prime = function() { origHNTrieContainer.reset( keyvalStore.getItem('SNFE.origHNTrieContainer.trieDetails') ); destHNTrieContainer.reset( keyvalStore.getItem('SNFE.destHNTrieContainer.trieDetails') ); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.reset = function() { this.processedFilterCount = 0; this.acceptedCount = 0; this.discardedCount = 0; this.goodFilters.clear(); this.badFilters.clear(); this.unitsToOptimize.length = 0; this.bitsToBucket.clear(); urlTokenizer.resetKnownTokens(); filterDataReset(); filterRefsReset(); origHNTrieContainer.reset(); destHNTrieContainer.reset(); bidiTrie.reset(); filterArgsToUnit.clear(); // Cancel potentially pending optimization run. if ( this.optimizeTaskId !== undefined ) { dropTask(this.optimizeTaskId); this.optimizeTaskId = undefined; } this.notReady = false; // Runtime registers this.$catBits = 0; this.$tokenHash = 0; this.$filterUnit = 0; }; /******************************************************************************/ StaticNetFilteringEngine.prototype.freeze = function() { const unserialize = CompiledListReader.unserialize; for ( const line of this.goodFilters ) { if ( this.badFilters.has(line) ) { this.discardedCount += 1; continue; } const args = unserialize(line); const bits = args[0]; const bucket = this.bitsToBucket.get(bits) || (new Map()); if ( bucket.size === 0 ) { this.bitsToBucket.set(bits, bucket); } const tokenHash = args[1]; const fdata = args[2]; let iunit = bucket.get(tokenHash) || 0; if ( tokenHash === DOT_TOKEN_HASH ) { if ( iunit === 0 ) { iunit = FilterHostnameDict.create(); bucket.set(DOT_TOKEN_HASH, iunit); this.unitsToOptimize.push({ bits, tokenHash }); } FilterHostnameDict.add(iunit, fdata); continue; } if ( tokenHash === ANY_TOKEN_HASH ) { if ( iunit === 0 ) { iunit = FilterJustOrigin.create(); bucket.set(ANY_TOKEN_HASH, iunit); } FilterJustOrigin.add(iunit, fdata); continue; } if ( tokenHash === ANY_HTTPS_TOKEN_HASH ) { if ( iunit === 0 ) { iunit = FilterHTTPSJustOrigin.create(); bucket.set(ANY_HTTPS_TOKEN_HASH, iunit); } FilterHTTPSJustOrigin.add(iunit, fdata); continue; } if ( tokenHash === ANY_HTTP_TOKEN_HASH ) { if ( iunit === 0 ) { iunit = FilterHTTPJustOrigin.create(); bucket.set(ANY_HTTP_TOKEN_HASH, iunit); } FilterHTTPJustOrigin.add(iunit, fdata); continue; } urlTokenizer.addKnownToken(tokenHash); this.addFilterUnit(bits, tokenHash, filterFromCompiled(fdata)); // Add block-important filters to the block realm, so as to avoid // to unconditionally match against the block-important realm for // every network request. Block-important filters are quite rare so // the block-important realm should be checked when and only when // there is a matched exception filter, which important filters are // meant to override. if ( (bits & BLOCKALLOW_REALM) === BLOCKIMPORTANT_REALM ) { this.addFilterUnit( bits & ~IMPORTANT_REALM, tokenHash, filterFromCompiled(fdata) ); } } this.badFilters.clear(); this.goodFilters.clear(); filterArgsToUnit.clear(); this.notReady = false; // Optimizing is not critical for the static network filtering engine to // work properly, so defer this until later to allow for reduced delay to // readiness when no valid selfie is available. if ( this.optimizeTaskId !== undefined ) { return; } this.optimizeTaskId = queueTask(( ) => { this.optimizeTaskId = undefined; this.optimize(30); }, 2000); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.dnrFromCompiled = function(op, context, ...args) { if ( op === 'begin' ) { Object.assign(context, { good: new Set(), bad: new Set(context.bad), invalid: new Set(), filterCount: 0, acceptedFilterCount: 0, rejectedFilterCount: 0, }); return; } if ( op === 'add' ) { const reader = args[0]; reader.select('NETWORK_FILTERS:GOOD'); while ( reader.next() ) { context.filterCount += 1; if ( context.good.has(reader.line) === false ) { context.good.add(reader.line); } } reader.select('NETWORK_FILTERS:BAD'); while ( reader.next() ) { context.bad.add(reader.line); } return; } if ( op !== 'end' ) { return; } const { good, bad } = context; const unserialize = CompiledListReader.unserialize; const buckets = new Map(); for ( const line of good ) { if ( bad.has(line) ) { context.rejectedFilterCount += 1; continue; } context.acceptedFilterCount += 1; const args = unserialize(line); const bits = args[0]; const tokenHash = args[1]; const fdata = args[2]; if ( buckets.has(bits) === false ) { buckets.set(bits, new Map()); } const bucket = buckets.get(bits); switch ( tokenHash ) { case DOT_TOKEN_HASH: { if ( bucket.has(DOT_TOKEN_HASH) === false ) { bucket.set(DOT_TOKEN_HASH, [{ condition: { requestDomains: [] } }]); } const rule = bucket.get(DOT_TOKEN_HASH)[0]; rule.condition.requestDomains.push(fdata); break; } case ANY_TOKEN_HASH: { if ( bucket.has(ANY_TOKEN_HASH) === false ) { bucket.set(ANY_TOKEN_HASH, [{ condition: { initiatorDomains: [] } }]); } const rule = bucket.get(ANY_TOKEN_HASH)[0]; rule.condition.initiatorDomains.push(fdata); break; } case ANY_HTTPS_TOKEN_HASH: { if ( bucket.has(ANY_HTTPS_TOKEN_HASH) === false ) { bucket.set(ANY_HTTPS_TOKEN_HASH, [{ condition: { urlFilter: '|https://', initiatorDomains: [] } }]); } const rule = bucket.get(ANY_HTTPS_TOKEN_HASH)[0]; rule.condition.initiatorDomains.push(fdata); break; } case ANY_HTTP_TOKEN_HASH: { if ( bucket.has(ANY_HTTP_TOKEN_HASH) === false ) { bucket.set(ANY_HTTP_TOKEN_HASH, [{ condition: { urlFilter: '|http://', initiatorDomains: [] } }]); } const rule = bucket.get(ANY_HTTP_TOKEN_HASH)[0]; rule.condition.initiatorDomains.push(fdata); break; } default: { if ( bucket.has(EMPTY_TOKEN_HASH) === false ) { bucket.set(EMPTY_TOKEN_HASH, []); } const rule = {}; dnrRuleFromCompiled(fdata, rule); bucket.get(EMPTY_TOKEN_HASH).push(rule); break; } } } // Priority: // Removeparam: 1-4 // Block: 10 (default priority) // Redirect: 11-19 // Excepted redirect: 21-29 // Allow: 30 // Block important: 40 // Redirect important: 41-49 const realms = new Map([ [ BLOCK_REALM, { type: 'block', priority: 10 } ], [ ALLOW_REALM, { type: 'allow', priority: 30 } ], [ BLOCK_REALM | IMPORTANT_REALM, { type: 'block', priority: 10 } ], [ REDIRECT_REALM, { type: 'redirect', priority: 11 } ], [ REMOVEPARAM_REALM, { type: 'removeparam', priority: 0 } ], [ CSP_REALM, { type: 'csp', priority: 0 } ], [ PERMISSIONS_REALM, { type: 'permissions', priority: 0 } ], [ URLTRANSFORM_REALM, { type: 'uritransform', priority: 0 } ], [ HEADERS_REALM, { type: 'block', priority: 10 } ], [ HEADERS_REALM | ALLOW_REALM, { type: 'allow', priority: 30 } ], [ HEADERS_REALM | IMPORTANT_REALM, { type: 'allow', priority: 10 } ], [ URLSKIP_REALM, { type: 'urlskip', priority: 0 } ], ]); const partyness = new Map([ [ ANYPARTY_REALM, '' ], [ FIRSTPARTY_REALM, 'firstParty' ], [ THIRDPARTY_REALM, 'thirdParty' ], ]); const types = new Set([ 'no_type', 'stylesheet', 'image', 'object', 'script', 'xmlhttprequest', 'sub_frame', 'main_frame', 'font', 'media', 'websocket', 'ping', 'other', ]); const ruleset = []; const seen = new Set(); for ( const [ realmBits, realmDetails ] of realms ) { for ( const [ partyBits, partyName ] of partyness ) { for ( const typeName in typeNameToTypeValue ) { if ( types.has(typeName) === false ) { continue; } const typeBits = typeNameToTypeValue[typeName]; const bits = realmBits | partyBits | typeBits; const bucket = buckets.get(bits); if ( bucket === undefined ) { continue; } for ( const rules of bucket.values() ) { for ( const rule of rules ) { rule.action = rule.action || {}; rule.action.type = realmDetails.type; if ( realmDetails.priority !== 0 ) { rule.priority = (rule.priority || 0) + realmDetails.priority; } if ( partyName !== '' ) { rule.condition = rule.condition || {}; rule.condition.domainType = partyName; } if ( typeName !== 'no_type' ) { rule.condition = rule.condition || {}; rule.condition.resourceTypes = [ typeName ]; } const hash = JSON.stringify(rule); if ( seen.has(hash) ) { continue; } seen.add(hash); ruleset.push(rule); } } } } } seen.clear(); // Adjust `important` priority for ( const rule of ruleset ) { if ( rule.__important !== true ) { continue; } if ( rule.priority === undefined ) { continue; } rule.priority += 30; } // Collect generichide filters const generichideExclusions = []; const generichideInclusions = []; { const bucket = buckets.get(ALLOW_REALM | typeNameToTypeValue['generichide']); if ( bucket ) { for ( const rules of bucket.values() ) { for ( const rule of rules ) { if ( rule.condition === undefined ) { continue; } if ( rule.condition.initiatorDomains ) { generichideExclusions.push(...rule.condition.initiatorDomains); } else if ( rule.condition.requestDomains ) { generichideExclusions.push(...rule.condition.requestDomains); } if ( rule.condition.excludedInitiatorDomains ) { generichideInclusions.push(...rule.condition.excludedInitiatorDomains); } else if ( rule.condition.excludedRequestDomains ) { generichideInclusions.push(...rule.condition.excludedRequestDomains); } } } } } { const bucket = buckets.get(BLOCKIMPORTANT_REALM | typeNameToTypeValue['generichide']); if ( bucket ) { for ( const rules of bucket.values() ) { for ( const rule of rules ) { if ( rule.condition === undefined ) { continue; } if ( rule.condition.initiatorDomains ) { generichideInclusions.push(...rule.condition.initiatorDomains); } else if ( rule.condition.requestDomains ) { generichideInclusions.push(...rule.condition.requestDomains); } } } } } // Detect and attempt salvage of rules with entity-based hostnames and/or // regex-based domains. const isUnsupportedDomain = hn => hn.endsWith('.*') || hn.startsWith('/'); for ( const rule of ruleset ) { if ( rule.condition === undefined ) { continue; } for ( const prop of [ 'Initiator', 'Request' ] ) { const hitProp = `${prop.toLowerCase()}Domains`; if ( Array.isArray(rule.condition[hitProp]) ) { if ( rule.condition[hitProp].some(hn => isUnsupportedDomain(hn)) ) { const domains = rule.condition[hitProp].filter( hn => isUnsupportedDomain(hn) === false ); if ( domains.length === 0 ) { dnrAddRuleError(rule, `Can't salvage rule with unsupported domain= option: ${rule.condition[hitProp].join('|')}`); } else { dnrAddRuleWarning(rule, `Salvaged rule by ignoring ${rule.condition[hitProp].length - domains.length} unsupported domain= option: ${rule.condition[hitProp].join('|')}`); rule.condition[hitProp] = domains; } } } const missProp = `excluded${prop}Domains`; if ( Array.isArray(rule.condition[missProp]) ) { if ( rule.condition[missProp].some(hn => isUnsupportedDomain(hn)) ) { const domains = rule.condition[missProp].filter( hn => isUnsupportedDomain(hn) === false ); rule.condition[missProp] = domains.length !== 0 ? domains : undefined; } } } } // Patch modifier filters for ( const rule of ruleset ) { if ( rule.__modifierType === undefined ) { continue; } let patchDomainOption = false; switch ( rule.__modifierType ) { case 'csp': rule.action.type = 'modifyHeaders'; rule.action.responseHeaders = [{ header: 'content-security-policy', operation: 'append', value: rule.__modifierValue, }]; if ( rule.__modifierAction === ALLOW_REALM ) { dnrAddRuleError(rule, `Unsupported csp exception: ${rule.__modifierValue}`); } break; case 'permissions': rule.action.type = 'modifyHeaders'; rule.action.responseHeaders = [{ header: 'permissions-policy', operation: 'append', value: rule.__modifierValue.split('|').join(', '), }]; if ( rule.__modifierAction === ALLOW_REALM ) { dnrAddRuleError(rule, `Unsupported permissions exception: ${rule.__modifierValue}`); } patchDomainOption = true; break; case 'redirect-rule': { let token = rule.__modifierValue; if ( token !== '' ) { const match = /:(\d+)$/.exec(token); if ( match !== null ) { rule.priority += Math.min(parseInt(match[1], 10), 8); token = token.slice(0, match.index); } } const resource = context.extensionPaths.get(token); if ( rule.__modifierValue !== '' && resource === undefined ) { dnrAddRuleError(rule, `Unpatchable redirect filter: ${rule.__modifierValue}`); } if ( rule.__modifierAction !== ALLOW_REALM ) { const extensionPath = resource || token; rule.action.type = 'redirect'; rule.action.redirect = { extensionPath }; } else { rule.action.type = 'block'; rule.priority += 10; } break; } case 'removeparam': { rule.action.type = 'redirect'; if ( rule.__modifierValue === '|' ) { rule.__modifierValue = ''; } if ( rule.__modifierValue !== '' ) { rule.action.redirect = { transform: { queryTransform: { removeParams: [ rule.__modifierValue ] } } }; if ( /^~?\/.+\/$/.test(rule.__modifierValue) ) { dnrAddRuleError(rule, `Unsupported regex-based removeParam: ${rule.__modifierValue}`); } } else { rule.action.redirect = { transform: { query: '' } }; } if ( rule.condition === undefined ) { rule.condition = { }; } if ( rule.condition.resourceTypes === undefined ) { if ( rule.condition.excludedResourceTypes === undefined ) { rule.condition.resourceTypes = [ 'main_frame', 'sub_frame', 'xmlhttprequest', ]; } } // https://github.com/uBlockOrigin/uBOL-home/issues/140 // Mitigate until DNR API flaw is addressed by browser vendors let priority = rule.priority || 1; if ( rule.condition.urlFilter !== undefined ) { priority += 1; } if ( rule.condition.regexFilter !== undefined ) { priority += 1; } if ( rule.condition.initiatorDomains !== undefined ) { priority += 1; } if ( rule.condition.requestDomains !== undefined ) { priority += 1; } if ( priority !== 1 ) { rule.priority = priority; } if ( rule.__modifierAction === ALLOW_REALM ) { dnrAddRuleError(rule, `Unsupported removeparam exception: ${rule.__modifierValue}`); } break; } case 'uritransform': { dnrAddRuleError(rule, `Incompatible with DNR: uritransform=${rule.__modifierValue}`); break; } case 'urlskip': { let urlFilter = rule.condition?.urlFilter; if ( urlFilter === undefined ) { break; } let anchor = 0b000; if ( urlFilter.startsWith('||') ) { anchor |= 0b100; urlFilter = urlFilter.slice(2); } else if ( urlFilter.startsWith('|') ) { anchor |= 0b10; urlFilter = urlFilter.slice(1); } if ( urlFilter.endsWith('|') ) { anchor |= 0b001; urlFilter = urlFilter.slice(0, -1); } rule.condition.urlFilter = undefined; rule.condition.regexFilter = restrFromGenericPattern(urlFilter, anchor); break; } default: dnrAddRuleError(rule, `Unsupported modifier ${rule.__modifierType}`); break; } // Some modifiers only work on document resources // Related issue: https://github.com/uBlockOrigin/uBOL-home/issues/156 if ( patchDomainOption ) { const domains = rule.condition.initiatorDomains; if ( Array.isArray(domains) && domains.length !== 0 ) { rule.condition.requestDomains ||= []; rule.condition.requestDomains.push(...domains); } const notDomains = rule.condition.excludedInitiatorDomains; if ( Array.isArray(notDomains) && notDomains.length !== 0 ) { rule.condition.excludedRequestDomains ||= []; rule.condition.excludedRequestDomains.push(...notDomains); } } } return { ruleset, filterCount: context.filterCount, acceptedFilterCount: context.acceptedFilterCount, rejectedFilterCount: context.rejectedFilterCount, generichideExclusions: Array.from(new Set(generichideExclusions)), generichideInclusions: Array.from(new Set(generichideInclusions)), }; }; /******************************************************************************/ StaticNetFilteringEngine.prototype.addFilterUnit = function( bits, tokenHash, inewunit ) { const bucket = this.bitsToBucket.get(bits) || (new Map()); if ( bucket.size === 0 ) { this.bitsToBucket.set(bits, bucket); } const istoredunit = bucket.get(tokenHash) || 0; if ( istoredunit === 0 ) { bucket.set(tokenHash, inewunit); return; } if ( filterData[istoredunit+0] === FilterBucket.fid ) { FilterBucket.unshift(istoredunit, inewunit); return; } const ibucketunit = FilterBucket.create(); FilterBucket.unshift(ibucketunit, istoredunit); FilterBucket.unshift(ibucketunit, inewunit); bucket.set(tokenHash, ibucketunit); this.unitsToOptimize.push({ bits, tokenHash }); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.optimize = function(throttle = 0) { if ( this.optimizeTaskId !== undefined ) { dropTask(this.optimizeTaskId); this.optimizeTaskId = undefined; } const later = throttle => { this.optimizeTaskId = queueTask(( ) => { this.optimizeTaskId = undefined; this.optimize(throttle); }, 1000); }; const t0 = Date.now(); while ( this.unitsToOptimize.length !== 0 ) { const { bits, tokenHash } = this.unitsToOptimize.pop(); const bucket = this.bitsToBucket.get(bits); const iunit = bucket.get(tokenHash); const fc = filterGetClass(iunit); switch ( fc ) { case FilterHostnameDict: FilterHostnameDict.optimize(iunit); break; case FilterBucket: { const optimizeBits = (tokenHash === NO_TOKEN_HASH) || (bits & MODIFY_REALMS) !== 0 ? 0b10 : 0b01; const inewunit = FilterBucket.optimize(iunit, optimizeBits); if ( inewunit !== 0 ) { bucket.set(tokenHash, inewunit); } break; } default: break; } if ( throttle > 0 && (Date.now() - t0) > 40 ) { return later(throttle - 1); } } filterArgsToUnit.clear(); // Here we do not optimize origHNTrieContainer because many origin-related // tries are instantiated on demand. keyvalStore.setItem( 'SNFE.destHNTrieContainer.trieDetails', destHNTrieContainer.optimize() ); filterDataShrink(); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.toSelfie = function() { this.optimize(0); bidiTrie.optimize(); keyvalStore.setItem('SNFE.origHNTrieContainer.trieDetails', origHNTrieContainer.optimize() ); return { version: this.selfieVersion, processedFilterCount: this.processedFilterCount, acceptedCount: this.acceptedCount, discardedCount: this.discardedCount, bitsToBucket: this.bitsToBucket, urlTokenizer: urlTokenizer.toSelfie(), destHNTrieContainer: destHNTrieContainer.toSelfie(), origHNTrieContainer: origHNTrieContainer.toSelfie(), bidiTrie: bidiTrie.toSelfie(), filterData: filterDataToSelfie(), filterRefs: filterRefsToSelfie(), }; }; StaticNetFilteringEngine.prototype.serialize = function() { return this.toSelfie(); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.fromSelfie = function(selfie) { if ( typeof selfie !== 'object' || selfie === null ) { return; } this.reset(); this.notReady = true; const results = [ destHNTrieContainer.fromSelfie(selfie.destHNTrieContainer), origHNTrieContainer.fromSelfie(selfie.origHNTrieContainer), bidiTrie.fromSelfie(selfie.bidiTrie), filterDataFromSelfie(selfie.filterData), filterRefsFromSelfie(selfie.filterRefs), ]; if ( results.slice(1).every(v => v === true) === false ) { return false; } if ( selfie.version !== this.selfieVersion ) { return false; } this.processedFilterCount = selfie.processedFilterCount; this.acceptedCount = selfie.acceptedCount; this.discardedCount = selfie.discardedCount; this.bitsToBucket = selfie.bitsToBucket; urlTokenizer.fromSelfie(selfie.urlTokenizer); // If this point is never reached, it means the internal state is // unreliable, and the caller is then responsible for resetting the // engine and populate properly, in which case the `notReady` barrier // will be properly reset. this.notReady = false; return true; }; StaticNetFilteringEngine.prototype.unserialize = function(selfie) { return this.fromSelfie(selfie); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.createCompiler = function() { return new FilterCompiler(); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.fromCompiled = function(reader) { reader.select('NETWORK_FILTERS:GOOD'); while ( reader.next() ) { this.acceptedCount += 1; if ( this.goodFilters.has(reader.line) ) { this.discardedCount += 1; } else { this.goodFilters.add(reader.line); } } reader.select('NETWORK_FILTERS:BAD'); while ( reader.next() ) { this.badFilters.add(reader.line); } }; /******************************************************************************/ StaticNetFilteringEngine.prototype.matchAndFetchModifiers = function( fctxt, modifierName ) { if ( this.notReady ) { return; } const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue; $requestURL = urlTokenizer.setURL(fctxt.url); $requestURLRaw = fctxt.url; $docHostname = fctxt.getDocHostname(); $docDomain = fctxt.getDocDomain(); $requestHostname = fctxt.getHostname(); $requestMethodBit = fctxt.method || 0; $requestTypeValue = (typeBits & TYPE_REALM) >>> TYPE_REALM_OFFSET; $requestAddress = fctxt.getIPAddress(); const modifierType = modifierTypeFromName.get(modifierName); const modifierBits = modifierBitsFromType.get(modifierType); const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM; const catBits00 = modifierBits; const catBits01 = modifierBits | typeBits; const catBits10 = modifierBits | partyBits; const catBits11 = modifierBits | typeBits | partyBits; const bucket00 = this.bitsToBucket.get(catBits00); const bucket01 = typeBits !== 0 ? this.bitsToBucket.get(catBits01) : undefined; const bucket10 = partyBits !== 0 ? this.bitsToBucket.get(catBits10) : undefined; const bucket11 = typeBits !== 0 && partyBits !== 0 ? this.bitsToBucket.get(catBits11) : undefined; if ( bucket00 === undefined && bucket01 === undefined && bucket10 === undefined && bucket11 === undefined ) { return; } const results = []; const env = { type: modifierType || 0, bits: 0, th: 0, iunit: 0, results, }; const tokenHashes = urlTokenizer.getTokens(); let i = 0; let th = 0, iunit = 0; for (;;) { th = tokenHashes[i]; if ( th === INVALID_TOKEN_HASH ) { break; } env.th = th; $tokenBeg = tokenHashes[i+1]; if ( (bucket00 !== undefined) && (iunit = bucket00.get(th) || 0) !== 0 ) { env.bits = catBits00; env.iunit = iunit; filterMatchAndFetchModifiers(iunit, env); } if ( (bucket01 !== undefined) && (iunit = bucket01.get(th) || 0) !== 0 ) { env.bits = catBits01; env.iunit = iunit; filterMatchAndFetchModifiers(iunit, env); } if ( (bucket10 !== undefined) && (iunit = bucket10.get(th) || 0) !== 0 ) { env.bits = catBits10; env.iunit = iunit; filterMatchAndFetchModifiers(iunit, env); } if ( (bucket11 !== undefined) && (iunit = bucket11.get(th) || 0) !== 0 ) { env.bits = catBits11; env.iunit = iunit; filterMatchAndFetchModifiers(iunit, env); } i += 2; } if ( results.length === 0 ) { return; } // One single result is expected to be a common occurrence, and in such // case there is no need to process exception vs. block, block important // occurrences. if ( results.length === 1 ) { const result = results[0]; if ( (result.bits & ALLOW_REALM) !== 0 ) { return; } return [ result ]; } const toAddImportant = new Map(); const toAdd = new Map(); const toRemove = new Map(); for ( const result of results ) { const actionBits = result.bits & BLOCKALLOW_REALM; const modifyValue = result.value; if ( actionBits === BLOCKIMPORTANT_REALM ) { toAddImportant.set(modifyValue, result); } else if ( actionBits === BLOCK_REALM ) { toAdd.set(modifyValue, result); } else { toRemove.set(modifyValue, result); } } if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; } // Remove entries overridden by important block filters. if ( toAddImportant.size !== 0 ) { for ( const key of toAddImportant.keys() ) { toAdd.delete(key); toRemove.delete(key); } } // Exception filters // // Remove excepted block filters and unused exception filters. // // Special case, except-all: // - Except-all applies only if there is at least one normal block filters. // - Except-all does not apply to important block filters. if ( toRemove.size !== 0 ) { if ( toRemove.has('') === false ) { for ( const key of toRemove.keys() ) { if ( toAdd.has(key) ) { toAdd.delete(key); } else { toRemove.delete(key); } } } else if ( toAdd.size !== 0 ) { toAdd.clear(); if ( toRemove.size !== 1 ) { const entry = toRemove.get(''); toRemove.clear(); toRemove.set('', entry); } } else { toRemove.clear(); } } if ( toAdd.size === 0 && toAddImportant.size === 0 && toRemove.size === 0 ) { return; } const out = Array.from(toAdd.values()); if ( toAddImportant.size !== 0 ) { out.push(...toAddImportant.values()); } if ( toRemove.size !== 0 ) { out.push(...toRemove.values()); } return out; }; /******************************************************************************/ StaticNetFilteringEngine.prototype.realmMatchString = function( realmBits, typeBits, partyBits ) { if ( this.notReady ) { return false; } const exactType = typeBits & 0x80000000; typeBits &= 0x7FFFFFFF; const catBits00 = realmBits; const catBits01 = realmBits | typeBits; const catBits10 = realmBits | partyBits; const catBits11 = realmBits | typeBits | partyBits; const bucket00 = exactType === 0 ? this.bitsToBucket.get(catBits00) : undefined; const bucket01 = exactType !== 0 || typeBits !== 0 ? this.bitsToBucket.get(catBits01) : undefined; const bucket10 = exactType === 0 && partyBits !== 0 ? this.bitsToBucket.get(catBits10) : undefined; const bucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0 ? this.bitsToBucket.get(catBits11) : undefined; if ( bucket00 === undefined && bucket01 === undefined && bucket10 === undefined && bucket11 === undefined ) { return false; } let catBits = 0, iunit = 0; // Pure hostname-based filters let tokenHash = DOT_TOKEN_HASH; if ( (bucket00 !== undefined) && (iunit = bucket00.get(DOT_TOKEN_HASH) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits00; } else if ( (bucket01 !== undefined) && (iunit = bucket01.get(DOT_TOKEN_HASH) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits01; } else if ( (bucket10 !== undefined) && (iunit = bucket10.get(DOT_TOKEN_HASH) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits10; } else if ( (bucket11 !== undefined) && (iunit = bucket11.get(DOT_TOKEN_HASH) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits11; } // Pattern-based filters else { const tokenHashes = urlTokenizer.getTokens(); let i = 0; for (;;) { tokenHash = tokenHashes[i]; if ( tokenHash === INVALID_TOKEN_HASH ) { return false; } $tokenBeg = tokenHashes[i+1]; if ( (bucket00 !== undefined) && (iunit = bucket00.get(tokenHash) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits00; break; } if ( (bucket01 !== undefined) && (iunit = bucket01.get(tokenHash) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits01; break; } if ( (bucket10 !== undefined) && (iunit = bucket10.get(tokenHash) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits10; break; } if ( (bucket11 !== undefined) && (iunit = bucket11.get(tokenHash) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits11; break; } i += 2; } } this.$catBits = catBits; this.$tokenHash = tokenHash; this.$filterUnit = iunit; return true; }; /******************************************************************************/ // Specialized handler // https://github.com/gorhill/uBlock/issues/1477 // Special case: blocking-generichide filter ALWAYS exists, it is implicit -- // thus we always first check for exception filters, then for important block // filter if and only if there was a hit on an exception filter. // https://github.com/gorhill/uBlock/issues/2103 // User may want to override `generichide` exception filters. // https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/ // Add support for `specifichide`. StaticNetFilteringEngine.prototype.matchRequestReverse = function(type, url) { const typeBits = typeNameToTypeValue[type] | 0x80000000; // Prime tokenizer: we get a normalized URL in return. $requestURL = urlTokenizer.setURL(url); $requestURLRaw = url; $requestMethodBit = 0; $requestTypeValue = (typeBits & TYPE_REALM) >>> TYPE_REALM_OFFSET; $requestAddress = ''; $isBlockImportant = false; this.$filterUnit = 0; // These registers will be used by various filters $docHostname = $requestHostname = hostnameFromNetworkURL(url); $docDomain = domainFromHostname($docHostname); // Exception filters if ( this.realmMatchString(ALLOW_REALM, typeBits, FIRSTPARTY_REALM) ) { // Important block filters. if ( this.realmMatchString(BLOCKIMPORTANT_REALM, typeBits, FIRSTPARTY_REALM) ) { return 1; } return 2; } return 0; }; /******************************************************************************/ // https://github.com/chrisaljoudi/uBlock/issues/116 // Some type of requests are exceptional, they need custom handling, // not the generic handling. // https://github.com/chrisaljoudi/uBlock/issues/519 // Use exact type match for anything beyond `other`. Also, be prepared to // support unknown types. // https://github.com/uBlockOrigin/uBlock-issues/issues/1501 // Add support to evaluate allow realm before block realm. /** * Matches a URL string using filtering context. * @param {FilteringContext} fctxt - The filtering context * @param {integer} [modifier=0] - A bit vector modifying the behavior of the * matching algorithm: * Bit 0: match exact type. * Bit 1: lookup allow realm regardless of whether there was a match in * block realm. * * @returns {integer} 0=no match, 1=block, 2=allow (exception) */ StaticNetFilteringEngine.prototype.matchRequest = function(fctxt, modifiers = 0) { let typeBits = typeNameToTypeValue[fctxt.type]; if ( modifiers === 0 ) { if ( typeBits === undefined ) { typeBits = otherTypeBitValue; } else if ( typeBits === 0 || typeBits > otherTypeBitValue ) { modifiers |= 0b0001; } } if ( (modifiers & 0b0001) !== 0 ) { if ( typeBits === undefined ) { return 0; } typeBits |= 0x80000000; } const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM; // Prime tokenizer: we get a normalized URL in return. $requestURL = urlTokenizer.setURL(fctxt.url); $requestURLRaw = fctxt.url; this.$filterUnit = 0; // These registers will be used by various filters $docHostname = fctxt.getDocHostname(); $docDomain = fctxt.getDocDomain(); $requestHostname = fctxt.getHostname(); $requestMethodBit = fctxt.method || 0; $requestTypeValue = (typeBits & TYPE_REALM) >>> TYPE_REALM_OFFSET; $requestAddress = fctxt.getIPAddress(); $isBlockImportant = false; // Evaluate block realm before allow realm, and allow realm before // block-important realm, i.e. by order of likelihood of a match. const r = this.realmMatchString(BLOCK_REALM, typeBits, partyBits); if ( r || (modifiers & 0b0010) !== 0 ) { if ( $isBlockImportant ) { return 1; } if ( this.realmMatchString(ALLOW_REALM, typeBits, partyBits) ) { if ( this.realmMatchString(BLOCKIMPORTANT_REALM, typeBits, partyBits) ) { return 1; } return 2; } if ( r ) { return 1; } } return 0; }; /******************************************************************************/ StaticNetFilteringEngine.prototype.matchHeaders = function(fctxt, headers) { const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue; const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM; // Prime tokenizer: we get a normalized URL in return. $requestURL = urlTokenizer.setURL(fctxt.url); $requestURLRaw = fctxt.url; this.$filterUnit = 0; // These registers will be used by various filters $docHostname = fctxt.getDocHostname(); $docDomain = fctxt.getDocDomain(); $requestHostname = fctxt.getHostname(); $requestMethodBit = fctxt.method || 0; $requestTypeValue = (typeBits & TYPE_REALM) >>> TYPE_REALM_OFFSET; $requestAddress = fctxt.getIPAddress(); $isBlockImportant = false; $httpHeaders.init(headers); let r = 0; if ( this.realmMatchString(HEADERS_REALM | BLOCK_REALM, typeBits, partyBits) ) { r = 1; } if ( r !== 0 && $isBlockImportant !== true ) { if ( this.realmMatchString(HEADERS_REALM | ALLOW_REALM, typeBits, partyBits) ) { r = 2; } else if ( this.realmMatchString(ALLOW_REALM, typeBits, partyBits) ) { r = 2; } if ( r === 2 ) { if ( this.realmMatchString(HEADERS_REALM | BLOCKIMPORTANT_REALM, typeBits, partyBits) ) { r = 1; } } } $httpHeaders.reset(); return r; }; /******************************************************************************/ StaticNetFilteringEngine.prototype.redirectRequest = function(redirectEngine, fctxt) { const directives = this.matchAndFetchModifiers(fctxt, 'redirect-rule'); // No directive is the most common occurrence. if ( directives === undefined ) { return; } const highest = directives.length - 1; // More than a single directive means more work. if ( highest !== 0 ) { directives.sort((a, b) => compareRedirectRequests(redirectEngine, a, b)); } // Redirect to highest-ranked directive const directive = directives[highest]; if ( (directive.bits & ALLOW_REALM) !== 0 ) { return directives; } const { token } = parseRedirectRequestValue(directive); fctxt.redirectURL = redirectEngine.tokenToURL(fctxt, token); if ( fctxt.redirectURL === undefined ) { return; } return directives; }; function parseRedirectRequestValue(directive) { if ( directive.cache === null ) { directive.cache = sfp.parseRedirectValue(directive.value); } return directive.cache; } function compareRedirectRequests(redirectEngine, a, b) { const { token: atok, priority: aint, bits: abits } = parseRedirectRequestValue(a); if ( redirectEngine.hasToken(atok) === false ) { return -1; } const { token: btok, priority: bint, bits: bbits } = parseRedirectRequestValue(b); if ( redirectEngine.hasToken(btok) === false ) { return 1; } if ( abits !== bbits ) { if ( (abits & IMPORTANT_REALM) !== 0 ) { return 1; } if ( (bbits & IMPORTANT_REALM) !== 0 ) { return -1; } if ( (abits & ALLOW_REALM) !== 0 ) { return -1; } if ( (bbits & ALLOW_REALM) !== 0 ) { return 1; } } return aint - bint; } /******************************************************************************/ StaticNetFilteringEngine.prototype.transformRequest = function(fctxt, out = []) { const directives = this.matchAndFetchModifiers(fctxt, 'uritransform'); if ( directives === undefined ) { return; } const redirectURL = new URL(fctxt.url); for ( const directive of directives ) { if ( (directive.bits & ALLOW_REALM) !== 0 ) { out.push(directive); continue; } if ( directive.cache === null ) { directive.cache = sfp.parseReplaceByRegexValue(directive.value); } const cache = directive.cache; if ( cache === undefined ) { continue; } const before = `${redirectURL.pathname}${redirectURL.search}${redirectURL.hash}`; if ( cache.re.test(before) !== true ) { continue; } const after = before.replace(cache.re, cache.replacement); if ( after === before ) { continue; } const hashPos = after.indexOf('#'); redirectURL.hash = hashPos !== -1 ? after.slice(hashPos) : ''; const afterMinusHash = hashPos !== -1 ? after.slice(0, hashPos) : after; const searchPos = afterMinusHash.indexOf('?'); redirectURL.search = searchPos !== -1 ? afterMinusHash.slice(searchPos) : ''; redirectURL.pathname = searchPos !== -1 ? after.slice(0, searchPos) : after; out.push(directive); } if ( out.length === 0 ) { return; } if ( redirectURL.href !== fctxt.url ) { fctxt.redirectURL = redirectURL.href; } return out; }; StaticNetFilteringEngine.prototype.urlSkip = function( fctxt, blocked, out = [] ) { if ( fctxt.redirectURL !== undefined ) { return; } const directives = this.matchAndFetchModifiers(fctxt, 'urlskip'); if ( directives === undefined ) { return; } for ( const directive of directives ) { if ( (directive.bits & ALLOW_REALM) !== 0 ) { out.push(directive); continue; } const urlin = fctxt.url; const value = directive.value; const steps = value.includes(' ') && value.split(/ +/) || [ value ]; const urlout = urlSkip(urlin, blocked, steps, directive); if ( urlout === undefined ) { continue; } if ( urlout === urlin ) { continue; } fctxt.redirectURL = urlout; out.push(directive); break; } if ( out.length === 0 ) { return; } return out; }; /******************************************************************************/ // https://github.com/uBlockOrigin/uBlock-issues/issues/1626 // Do not redirect when the number of query parameters does not change. StaticNetFilteringEngine.prototype.filterQuery = function(fctxt, out = []) { if ( fctxt.redirectURL !== undefined ) { return; } const directives = this.matchAndFetchModifiers(fctxt, 'removeparam'); if ( directives === undefined ) { return; } const url = fctxt.url; const qpos = url.indexOf('?'); if ( qpos === -1 ) { return; } let hpos = url.indexOf('#', qpos + 1); if ( hpos === -1 ) { hpos = url.length; } const params = new Map(); const query = url.slice(qpos + 1, hpos); for ( let i = 0; i < query.length; ) { let pos = query.indexOf('&', i); if ( pos === -1 ) { pos = query.length; } const kv = query.slice(i, pos); i = pos + 1; pos = kv.indexOf('='); if ( pos !== -1 ) { params.set(kv.slice(0, pos), kv.slice(pos + 1)); } else { params.set(kv, ''); } } const inParamCount = params.size; for ( const directive of directives ) { if ( params.size === 0 ) { break; } const isException = (directive.bits & ALLOW_REALM) !== 0; if ( isException && directive.value === '' ) { out.push(directive); break; } const { all, bad, name, not, re } = parseQueryPruneValue(directive); if ( bad ) { continue; } if ( all ) { if ( isException === false ) { params.clear(); } out.push(directive); break; } if ( name !== undefined ) { const value = params.get(name); if ( not === false ) { if ( value !== undefined ) { if ( isException === false ) { params.delete(name); } out.push(directive); } continue; } if ( value !== undefined ) { params.delete(name); } if ( params.size !== 0 ) { if ( isException === false ) { params.clear(); } out.push(directive); } if ( value !== undefined ) { params.set(name, value); } continue; } if ( re === undefined ) { continue; } let filtered = false; for ( const [ key, raw ] of params ) { let value = raw; try { value = decodeURIComponent(value); } catch { } if ( re.test(`${key}=${value}`) === not ) { continue; } if ( isException === false ) { params.delete(key); } filtered = true; } if ( filtered ) { out.push(directive); } } if ( out.length === 0 ) { return; } if ( params.size !== inParamCount ) { fctxt.redirectURL = url.slice(0, qpos); if ( params.size !== 0 ) { fctxt.redirectURL += '?' + Array.from(params).map(a => a[1] === '' ? `${a[0]}=` : `${a[0]}=${a[1]}` ).join('&'); } if ( hpos !== url.length ) { fctxt.redirectURL += url.slice(hpos); } } return out; }; function parseQueryPruneValue(directive) { if ( directive.cache === null ) { directive.cache = sfp.parseQueryPruneValue(directive.value); } return directive.cache; } /******************************************************************************/ StaticNetFilteringEngine.prototype.hasQuery = function(fctxt) { urlTokenizer.setURL(fctxt.url); return urlTokenizer.hasQuery(); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.toLogData = function() { if ( this.$filterUnit !== 0 ) { return new LogData(this.$catBits, this.$tokenHash, this.$filterUnit); } }; /******************************************************************************/ StaticNetFilteringEngine.prototype.isBlockImportant = function() { return this.$filterUnit !== 0 && $isBlockImportant; }; /******************************************************************************/ StaticNetFilteringEngine.prototype.getFilterCount = function() { return this.acceptedCount - this.discardedCount; }; /******************************************************************************/ StaticNetFilteringEngine.prototype.enableWASM = function(wasmModuleFetcher, path) { return Promise.all([ bidiTrie.enableWASM(wasmModuleFetcher, path), origHNTrieContainer.enableWASM(wasmModuleFetcher, path), destHNTrieContainer.enableWASM(wasmModuleFetcher, path), ]).then(results => { return results.every(a => a === true); }); }; /******************************************************************************/ StaticNetFilteringEngine.prototype.test = function(details) { const { url, type, from, redirectEngine } = details; if ( url === undefined ) { return; } const fctxt = new FilteringContext(); fctxt.setURL(url); fctxt.setType(type || ''); fctxt.setDocOriginFromURL(from || ''); const r = this.matchRequest(fctxt); const out = [ `url: ${url}` ]; if ( type ) { out.push(`type: ${type}`); } if ( from ) { out.push(`context: ${from}`); } if ( r !== 0 ) { const logdata = this.toLogData(); if ( r === 1 ) { out.push(`blocked: ${logdata.raw}`); } else if ( r === 2 ) { out.push(`unblocked: ${logdata.raw}`); } } else { out.push('not blocked'); } if ( r !== 1 ) { const entries = this.transformRequest(fctxt); if ( entries ) { for ( const entry of entries ) { out.push(`modified: ${entry.logData().raw}`); } } if ( fctxt.redirectURL !== undefined && this.hasQuery(fctxt) ) { const entries = this.filterQuery(fctxt, 'removeparam'); if ( entries ) { for ( const entry of entries ) { out.push(`modified: ${entry.logData().raw}`); } } } if ( fctxt.type === 'main_frame' || fctxt.type === 'sub_frame' ) { const csps = this.matchAndFetchModifiers(fctxt, 'csp'); if ( csps ) { for ( const csp of csps ) { out.push(`modified: ${csp.logData().raw}`); } } const pps = this.matchAndFetchModifiers(fctxt, 'permissions'); if ( pps ) { for ( const pp of pps ) { out.push(`modified: ${pp.logData().raw}`); } } } } else if ( redirectEngine ) { const redirects = this.redirectRequest(redirectEngine, fctxt); if ( redirects ) { for ( const redirect of redirects ) { out.push(`modified: ${redirect.logData().raw}`); } } } const urlskips = this.matchAndFetchModifiers(fctxt, 'urlskip'); if ( urlskips ) { for ( const urlskip of urlskips ) { out.push(`modified: ${urlskip.logData().raw}`); } } return out.join('\n'); } /******************************************************************************/ StaticNetFilteringEngine.prototype.bucketHistogram = function() { const results = []; for ( const [ bits, bucket ] of this.bitsToBucket ) { for ( const [ th, iunit ] of bucket ) { const token = urlTokenizer.stringFromTokenHash(th); const fc = filterGetClass(iunit); const count = fc.getCount !== undefined ? fc.getCount(iunit) : 1; results.push({ bits: bits.toString(16), token, count, f: fc.name }); } } results.sort((a, b) => { return b.count - a.count; }); console.info(results); }; /******************************************************************************/ // Dump the internal state of the filtering engine to the console. // Useful to make development decisions and investigate issues. StaticNetFilteringEngine.prototype.dump = function() { const thConstants = new Map([ [ NO_TOKEN_HASH, 'NO_TOKEN_HASH' ], [ DOT_TOKEN_HASH, 'DOT_TOKEN_HASH' ], [ ANY_TOKEN_HASH, 'ANY_TOKEN_HASH' ], [ ANY_HTTPS_TOKEN_HASH, 'ANY_HTTPS_TOKEN_HASH' ], [ ANY_HTTP_TOKEN_HASH, 'ANY_HTTP_TOKEN_HASH' ], [ EMPTY_TOKEN_HASH, 'EMPTY_TOKEN_HASH' ], ]); const out = []; const toOutput = (depth, line) => { out.push(`${' '.repeat(depth*2)}${line}`); }; const dumpUnit = (idata, depth = 0) => { const fc = filterGetClass(idata); fcCounts.set(fc.name, (fcCounts.get(fc.name) || 0) + 1); const info = filterDumpInfo(idata) || ''; toOutput(depth, info !== '' ? `${fc.name}: ${info}` : fc.name); switch ( fc ) { case FilterBucket: case FilterCompositeAll: case FilterDomainHitAny: { fc.forEach(idata, i => { dumpUnit(i, depth+1); }); break; } case FilterBucketIfOriginHits: { dumpUnit(filterData[idata+2], depth+1); dumpUnit(filterData[idata+1], depth+1); break; } case FilterBucketIfRegexHits: { dumpUnit(filterData[idata+1], depth+1); break; } case FilterPlainTrie: { for ( const details of bidiTrie.trieIterator(filterData[idata+1]) ) { toOutput(depth+1, details.pattern); let ix = details.iextra; if ( ix === 1 ) { continue; } for (;;) { if ( ix === 0 ) { break; } dumpUnit(filterData[ix+0], depth+2); ix = filterData[ix+1]; } } break; } default: break; } }; const fcCounts = new Map(); const thCounts = new Set(); const realms = new Map([ [ BLOCK_REALM, 'block' ], [ BLOCKIMPORTANT_REALM, 'block-important' ], [ ALLOW_REALM, 'unblock' ], [ REDIRECT_REALM, 'redirect' ], [ REMOVEPARAM_REALM, 'removeparam' ], [ CSP_REALM, 'csp' ], [ PERMISSIONS_REALM, 'permissions' ], [ URLTRANSFORM_REALM, 'uritransform' ], [ REPLACE_REALM, 'replace' ], [ URLSKIP_REALM, 'urlskip' ], ]); const partyness = new Map([ [ ANYPARTY_REALM, 'any-party' ], [ FIRSTPARTY_REALM, '1st-party' ], [ THIRDPARTY_REALM, '3rd-party' ], ]); for ( const [ realmBits, realmName ] of realms ) { toOutput(1, `+ realm: ${realmName}`); for ( const [ partyBits, partyName ] of partyness ) { toOutput(2, `+ party: ${partyName}`); const processedTypeBits = new Set(); for ( const typeName in typeNameToTypeValue ) { const typeBits = typeNameToTypeValue[typeName]; if ( processedTypeBits.has(typeBits) ) { continue; } processedTypeBits.add(typeBits); const bits = realmBits | partyBits | typeBits; const bucket = this.bitsToBucket.get(bits); if ( bucket === undefined ) { continue; } const thCount = bucket.size; toOutput(3, `+ type: ${typeName} (${thCount})`); for ( const [ th, iunit ] of bucket) { thCounts.add(th); const ths = thConstants.has(th) ? thConstants.get(th) : `0x${th.toString(16)}`; toOutput(4, `+ th: ${ths}`); dumpUnit(iunit, 5); } } } } const knownTokens = urlTokenizer.knownTokens .reduce((a, b) => b !== 0 ? a+1 : a, 0); out.unshift([ 'Static Network Filtering Engine internals:', ` Distinct token hashes: ${thCounts.size.toLocaleString('en')}`, ` Known-token sieve (Uint8Array): ${knownTokens.toLocaleString('en')} out of 65,536`, ` Filter data (Int32Array): ${filterDataWritePtr.toLocaleString('en')}`, ` Filter refs (JS array): ${filterRefsWritePtr.toLocaleString('en')}`, ' Origin trie container:', origHNTrieContainer.dumpInfo().split('\n').map(a => ` ${a}`).join('\n'), ' Request trie container:', destHNTrieContainer.dumpInfo().split('\n').map(a => ` ${a}`).join('\n'), ' Pattern trie container:', bidiTrie.dumpInfo().split('\n').map(a => ` ${a}`).join('\n'), '+ Filter class stats:', Array.from(fcCounts) .sort((a, b) => b[1] - a[1]) .map(a => ` ${a[0]}: ${a[1].toLocaleString('en')}`) .join('\n'), '+ Filter tree:', ].join('\n')); return out.join('\n'); }; /******************************************************************************/ const staticNetFilteringEngine = new StaticNetFilteringEngine(); export default staticNetFilteringEngine;