/* * This file is part of Adblock Plus , * Copyright (C) 2006-2017 eyeo GmbH * * Adblock Plus is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3 as * published by the Free Software Foundation. * * Adblock Plus is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Adblock Plus. If not, see . */ "use strict"; /** * @fileOverview Matcher class implementing matching addresses against * a list of filters. */ const {Filter, WhitelistFilter} = require("filterClasses"); /** * Blacklist/whitelist filter matching * @constructor */ function Matcher() { this.clear(); } exports.Matcher = Matcher; Matcher.prototype = { /** * Lookup table for filters by their associated keyword * @type {Object} */ filterByKeyword: null, /** * Lookup table for keywords by the filter text * @type {Object} */ keywordByFilter: null, /** * Removes all known filters */ clear() { this.filterByKeyword = Object.create(null); this.keywordByFilter = Object.create(null); }, /** * Adds a filter to the matcher * @param {RegExpFilter} filter */ add(filter) { if (filter.text in this.keywordByFilter) return; // Look for a suitable keyword let keyword = this.findKeyword(filter); let oldEntry = this.filterByKeyword[keyword]; if (typeof oldEntry == "undefined") this.filterByKeyword[keyword] = filter; else if (oldEntry.length == 1) this.filterByKeyword[keyword] = [oldEntry, filter]; else oldEntry.push(filter); this.keywordByFilter[filter.text] = keyword; }, /** * Removes a filter from the matcher * @param {RegExpFilter} filter */ remove(filter) { if (!(filter.text in this.keywordByFilter)) return; let keyword = this.keywordByFilter[filter.text]; let list = this.filterByKeyword[keyword]; if (list.length <= 1) delete this.filterByKeyword[keyword]; else { let index = list.indexOf(filter); if (index >= 0) { list.splice(index, 1); if (list.length == 1) this.filterByKeyword[keyword] = list[0]; } } delete this.keywordByFilter[filter.text]; }, /** * Chooses a keyword to be associated with the filter * @param {Filter} filter * @return {string} keyword or an empty string if no keyword could be found */ findKeyword(filter) { let result = ""; let {text} = filter; if (Filter.regexpRegExp.test(text)) return result; // Remove options let match = Filter.optionsRegExp.exec(text); if (match) text = match.input.substr(0, match.index); // Remove whitelist marker if (text.substr(0, 2) == "@@") text = text.substr(2); let candidates = text.toLowerCase().match( /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g ); if (!candidates) return result; let hash = this.filterByKeyword; let resultCount = 0xFFFFFF; let resultLength = 0; for (let i = 0, l = candidates.length; i < l; i++) { let candidate = candidates[i].substr(1); let count = (candidate in hash ? hash[candidate].length : 0); if (count < resultCount || (count == resultCount && candidate.length > resultLength)) { result = candidate; resultCount = count; resultLength = candidate.length; } } return result; }, /** * Checks whether a particular filter is being matched against. * @param {RegExpFilter} filter * @return {boolean} */ hasFilter(filter) { return (filter.text in this.keywordByFilter); }, /** * Returns the keyword used for a filter, null for unknown filters. * @param {RegExpFilter} filter * @return {string} */ getKeywordForFilter(filter) { if (filter.text in this.keywordByFilter) return this.keywordByFilter[filter.text]; return null; }, /** * Checks whether the entries for a particular keyword match a URL * @param {string} keyword * @param {string} location * @param {number} typeMask * @param {string} docDomain * @param {boolean} thirdParty * @param {string} sitekey * @param {boolean} specificOnly * @return {?Filter} */ _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, specificOnly, privatenode) { let list = this.filterByKeyword[keyword]; for (let i = 0; i < list.length; i++) { let filter = list[i]; if (specificOnly && filter.isGeneric() && !(filter instanceof WhitelistFilter)) continue; if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey, privatenode)) return filter; } return null; }, /** * Tests whether the URL matches any of the known filters * @param {string} location * URL to be tested * @param {number} typeMask * bitmask of content / request types to match * @param {string} docDomain * domain name of the document that loads the URL * @param {boolean} thirdParty * should be true if the URL is a third-party request * @param {string} sitekey * public key provided by the document * @param {boolean} specificOnly * should be true if generic matches should be ignored * @return {?RegExpFilter} * matching filter or null */ matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) { let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); if (candidates === null) candidates = []; candidates.push(""); for (let i = 0, l = candidates.length; i < l; i++) { let substr = candidates[i]; if (substr in this.filterByKeyword) { let result = this._checkEntryMatch(substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly); if (result) return result; } } return null; } }; /** * Combines a matcher for blocking and exception rules, automatically sorts * rules into two Matcher instances. * @constructor * @augments Matcher */ function CombinedMatcher() { this.blacklist = new Matcher(); this.whitelist = new Matcher(); this.resultCache = Object.create(null); } exports.CombinedMatcher = CombinedMatcher; /** * Maximal number of matching cache entries to be kept * @type {number} */ CombinedMatcher.maxCacheEntries = 1000; CombinedMatcher.prototype = { /** * Matcher for blocking rules. * @type {Matcher} */ blacklist: null, /** * Matcher for exception rules. * @type {Matcher} */ whitelist: null, /** * Lookup table of previous matchesAny results * @type {Object} */ resultCache: null, /** * Number of entries in resultCache * @type {number} */ cacheEntries: 0, /** * @see Matcher#clear */ clear() { this.blacklist.clear(); this.whitelist.clear(); this.resultCache = Object.create(null); this.cacheEntries = 0; }, /** * @see Matcher#add * @param {Filter} filter */ add(filter) { if (filter instanceof WhitelistFilter) this.whitelist.add(filter); else this.blacklist.add(filter); if (this.cacheEntries > 0) { this.resultCache = Object.create(null); this.cacheEntries = 0; } }, /** * @see Matcher#remove * @param {Filter} filter */ remove(filter) { if (filter instanceof WhitelistFilter) this.whitelist.remove(filter); else this.blacklist.remove(filter); if (this.cacheEntries > 0) { this.resultCache = Object.create(null); this.cacheEntries = 0; } }, /** * @see Matcher#findKeyword * @param {Filter} filter * @return {string} keyword */ findKeyword(filter) { if (filter instanceof WhitelistFilter) return this.whitelist.findKeyword(filter); return this.blacklist.findKeyword(filter); }, /** * @see Matcher#hasFilter * @param {Filter} filter * @return {boolean} */ hasFilter(filter) { if (filter instanceof WhitelistFilter) return this.whitelist.hasFilter(filter); return this.blacklist.hasFilter(filter); }, /** * @see Matcher#getKeywordForFilter * @param {Filter} filter * @return {string} keyword */ getKeywordForFilter(filter) { if (filter instanceof WhitelistFilter) return this.whitelist.getKeywordForFilter(filter); return this.blacklist.getKeywordForFilter(filter); }, /** * Checks whether a particular filter is slow * @param {RegExpFilter} filter * @return {boolean} */ isSlowFilter(filter) { let matcher = ( filter instanceof WhitelistFilter ? this.whitelist : this.blacklist ); if (matcher.hasFilter(filter)) return !matcher.getKeywordForFilter(filter); return !matcher.findKeyword(filter); }, /** * Optimized filter matching testing both whitelist and blacklist matchers * simultaneously. For parameters see Matcher.matchesAny(). * @see Matcher#matchesAny * @inheritdoc */ matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, specificOnly, privatenode) { let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); if (candidates === null) candidates = []; candidates.push(""); let blacklistHit = null; for (let i = 0, l = candidates.length; i < l; i++) { let substr = candidates[i]; if (substr in this.whitelist.filterByKeyword) { let result = this.whitelist._checkEntryMatch( substr, location, typeMask, docDomain, thirdParty, sitekey, privatenode ); if (result) return result; } if (substr in this.blacklist.filterByKeyword && blacklistHit === null) { blacklistHit = this.blacklist._checkEntryMatch( substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly, privatenode ); } } return blacklistHit; }, /** * @see Matcher#matchesAny * @inheritdoc */ matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly, privatenode) { let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + " " + sitekey + " " + specificOnly; if (!privatenode) if (key in this.resultCache) return this.resultCache[key]; let result = this.matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, specificOnly, privatenode); if (this.cacheEntries >= CombinedMatcher.maxCacheEntries) { this.resultCache = Object.create(null); this.cacheEntries = 0; } if (!privatenode){ this.resultCache[key] = result; this.cacheEntries++; } return result; } }; /** * Shared CombinedMatcher instance that should usually be used. * @type {CombinedMatcher} */ exports.defaultMatcher = new CombinedMatcher();