diff options
author | Ruben Rodriguez <ruben@gnu.org> | 2014-10-20 02:24:51 +0200 |
---|---|---|
committer | Ruben Rodriguez <ruben@gnu.org> | 2014-10-20 02:24:51 +0200 |
commit | 6e7918b6ccb69876d339a320091fdee811445395 (patch) | |
tree | 31cb88ee438d652fddefca1193f70289a8b3dcc8 /data/extensions/spyblock@gnu.org/lib/matcher.js | |
parent | 60e5b13c35d4d3ba21bb03b026750a0a414f6c77 (diff) |
Generalize data directory
Diffstat (limited to 'data/extensions/spyblock@gnu.org/lib/matcher.js')
-rw-r--r-- | data/extensions/spyblock@gnu.org/lib/matcher.js | 446 |
1 files changed, 446 insertions, 0 deletions
diff --git a/data/extensions/spyblock@gnu.org/lib/matcher.js b/data/extensions/spyblock@gnu.org/lib/matcher.js new file mode 100644 index 0000000..908b0b8 --- /dev/null +++ b/data/extensions/spyblock@gnu.org/lib/matcher.js @@ -0,0 +1,446 @@ +/* + * This file is part of Adblock Plus <http://adblockplus.org/>, + * Copyright (C) 2006-2014 Eyeo GmbH + * + * Adblock Plus is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * Adblock Plus is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * @fileOverview Matcher class implementing matching addresses against a list of filters. + */ + +let {Filter, RegExpFilter, WhitelistFilter} = require("filterClasses"); + +/** + * Blacklist/whitelist filter matching + * @constructor + */ +function Matcher() +{ + this.clear(); +} +exports.Matcher = Matcher; + +Matcher.prototype = { + /** + * Lookup table for filters by their associated keyword + * @type Object + */ + filterByKeyword: null, + + /** + * Lookup table for keywords by the filter text + * @type Object + */ + keywordByFilter: null, + + /** + * Removes all known filters + */ + clear: function() + { + this.filterByKeyword = {__proto__: null}; + this.keywordByFilter = {__proto__: null}; + }, + + /** + * Adds a filter to the matcher + * @param {RegExpFilter} filter + */ + add: function(filter) + { + if (filter.text in this.keywordByFilter) + return; + + // Look for a suitable keyword + let keyword = this.findKeyword(filter); + let oldEntry = this.filterByKeyword[keyword]; + if (typeof oldEntry == "undefined") + this.filterByKeyword[keyword] = filter; + else if (oldEntry.length == 1) + this.filterByKeyword[keyword] = [oldEntry, filter]; + else + oldEntry.push(filter); + this.keywordByFilter[filter.text] = keyword; + }, + + /** + * Removes a filter from the matcher + * @param {RegExpFilter} filter + */ + remove: function(filter) + { + if (!(filter.text in this.keywordByFilter)) + return; + + let keyword = this.keywordByFilter[filter.text]; + let list = this.filterByKeyword[keyword]; + if (list.length <= 1) + delete this.filterByKeyword[keyword]; + else + { + let index = list.indexOf(filter); + if (index >= 0) + { + list.splice(index, 1); + if (list.length == 1) + this.filterByKeyword[keyword] = list[0]; + } + } + + delete this.keywordByFilter[filter.text]; + }, + + /** + * Chooses a keyword to be associated with the filter + * @param {String} text text representation of the filter + * @return {String} keyword (might be empty string) + */ + findKeyword: function(filter) + { + let result = ""; + let text = filter.text; + if (Filter.regexpRegExp.test(text)) + return result; + + // Remove options + let match = Filter.optionsRegExp.exec(text); + if (match) + text = match.input.substr(0, match.index); + + // Remove whitelist marker + if (text.substr(0, 2) == "@@") + text = text.substr(2); + + let candidates = text.toLowerCase().match(/[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g); + if (!candidates) + return result; + + let hash = this.filterByKeyword; + let resultCount = 0xFFFFFF; + let resultLength = 0; + for (let i = 0, l = candidates.length; i < l; i++) + { + let candidate = candidates[i].substr(1); + let count = (candidate in hash ? hash[candidate].length : 0); + if (count < resultCount || (count == resultCount && candidate.length > resultLength)) + { + result = candidate; + resultCount = count; + resultLength = candidate.length; + } + } + return result; + }, + + /** + * Checks whether a particular filter is being matched against. + */ + hasFilter: function(/**RegExpFilter*/ filter) /**Boolean*/ + { + return (filter.text in this.keywordByFilter); + }, + + /** + * Returns the keyword used for a filter, null for unknown filters. + */ + getKeywordForFilter: function(/**RegExpFilter*/ filter) /**String*/ + { + if (filter.text in this.keywordByFilter) + return this.keywordByFilter[filter.text]; + else + return null; + }, + + /** + * Checks whether the entries for a particular keyword match a URL + */ + _checkEntryMatch: function(keyword, location, contentType, docDomain, thirdParty, privatenode) + { + let list = this.filterByKeyword[keyword]; + for (let i = 0; i < list.length; i++) + { + let filter = list[i]; + if (filter.matches(location, contentType, docDomain, thirdParty,privatenode)) + return filter; + } + return null; + }, + + /** + * Tests whether the URL matches any of the known filters + * @param {String} location URL to be tested + * @param {String} contentType content type identifier of the URL + * @param {String} docDomain domain name of the document that loads the URL + * @param {Boolean} thirdParty should be true if the URL is a third-party request + * @return {RegExpFilter} matching filter or null + */ + matchesAny: function(location, contentType, docDomain, thirdParty) + { + let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); + if (candidates === null) + candidates = []; + candidates.push(""); + for (let i = 0, l = candidates.length; i < l; i++) + { + let substr = candidates[i]; + if (substr in this.filterByKeyword) + { + let result = this._checkEntryMatch(substr, location, contentType, docDomain, thirdParty); + if (result) + return result; + } + } + + return null; + } +}; + +/** + * Combines a matcher for blocking and exception rules, automatically sorts + * rules into two Matcher instances. + * @constructor + */ +function CombinedMatcher() +{ + this.blacklist = new Matcher(); + this.whitelist = new Matcher(); + this.keys = {__proto__: null}; + this.resultCache = {__proto__: null}; +} +exports.CombinedMatcher = CombinedMatcher; + +/** + * Maximal number of matching cache entries to be kept + * @type Number + */ +CombinedMatcher.maxCacheEntries = 1000; + +CombinedMatcher.prototype = +{ + /** + * Matcher for blocking rules. + * @type Matcher + */ + blacklist: null, + + /** + * Matcher for exception rules. + * @type Matcher + */ + whitelist: null, + + /** + * Exception rules that are limited by public keys, mapped by the corresponding keys. + * @type Object + */ + keys: null, + + /** + * Lookup table of previous matchesAny results + * @type Object + */ + resultCache: null, + + /** + * Number of entries in resultCache + * @type Number + */ + cacheEntries: 0, + + /** + * @see Matcher#clear + */ + clear: function() + { + this.blacklist.clear(); + this.whitelist.clear(); + this.keys = {__proto__: null}; + this.resultCache = {__proto__: null}; + this.cacheEntries = 0; + }, + + /** + * @see Matcher#add + */ + add: function(filter) + { + if (filter instanceof WhitelistFilter) + { + if (filter.siteKeys) + { + for (let i = 0; i < filter.siteKeys.length; i++) + this.keys[filter.siteKeys[i]] = filter.text; + } + else + this.whitelist.add(filter); + } + else + this.blacklist.add(filter); + + if (this.cacheEntries > 0) + { + this.resultCache = {__proto__: null}; + this.cacheEntries = 0; + } + }, + + /** + * @see Matcher#remove + */ + remove: function(filter) + { + if (filter instanceof WhitelistFilter) + { + if (filter.siteKeys) + { + for (let i = 0; i < filter.siteKeys.length; i++) + delete this.keys[filter.siteKeys[i]]; + } + else + this.whitelist.remove(filter); + } + else + this.blacklist.remove(filter); + + if (this.cacheEntries > 0) + { + this.resultCache = {__proto__: null}; + this.cacheEntries = 0; + } + }, + + /** + * @see Matcher#findKeyword + */ + findKeyword: function(filter) + { + if (filter instanceof WhitelistFilter) + return this.whitelist.findKeyword(filter); + else + return this.blacklist.findKeyword(filter); + }, + + /** + * @see Matcher#hasFilter + */ + hasFilter: function(filter) + { + if (filter instanceof WhitelistFilter) + return this.whitelist.hasFilter(filter); + else + return this.blacklist.hasFilter(filter); + }, + + /** + * @see Matcher#getKeywordForFilter + */ + getKeywordForFilter: function(filter) + { + if (filter instanceof WhitelistFilter) + return this.whitelist.getKeywordForFilter(filter); + else + return this.blacklist.getKeywordForFilter(filter); + }, + + /** + * Checks whether a particular filter is slow + */ + isSlowFilter: function(/**RegExpFilter*/ filter) /**Boolean*/ + { + let matcher = (filter instanceof WhitelistFilter ? this.whitelist : this.blacklist); + if (matcher.hasFilter(filter)) + return !matcher.getKeywordForFilter(filter); + else + return !matcher.findKeyword(filter); + }, + + /** + * Optimized filter matching testing both whitelist and blacklist matchers + * simultaneously. For parameters see Matcher.matchesAny(). + * @see Matcher#matchesAny + */ + matchesAnyInternal: function(location, contentType, docDomain, thirdParty, privatenode) + { + let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); + if (candidates === null) + candidates = []; + candidates.push(""); + + let blacklistHit = null; + for (let i = 0, l = candidates.length; i < l; i++) + { + let substr = candidates[i]; + if (substr in this.whitelist.filterByKeyword) + { + let result = this.whitelist._checkEntryMatch(substr, location, contentType, docDomain, thirdParty, privatenode); + if (result) + return result; + } + if (substr in this.blacklist.filterByKeyword && blacklistHit === null) + blacklistHit = this.blacklist._checkEntryMatch(substr, location, contentType, docDomain, thirdParty, privatenode); + } + return blacklistHit; + }, + + /** + * @see Matcher#matchesAny + */ + matchesAny: function(location, contentType, docDomain, thirdParty, privatenode) + { + let key = location + " " + contentType + " " + docDomain + " " + thirdParty; + if (!privatenode){ + if (key in this.resultCache) + return this.resultCache[key]; + } + + let result = this.matchesAnyInternal(location, contentType, docDomain, thirdParty, privatenode); + + if (this.cacheEntries >= CombinedMatcher.maxCacheEntries) + { + this.resultCache = {__proto__: null}; + this.cacheEntries = 0; + } + + if (!privatenode){ + this.resultCache[key] = result; + this.cacheEntries++; + } + + return result; + }, + + /** + * Looks up whether any filters match the given website key. + */ + matchesByKey: function(/**String*/ location, /**String*/ key, /**String*/ docDomain) + { + key = key.toUpperCase(); + if (key in this.keys) + { + let filter = Filter.knownFilters[this.keys[key]]; + if (filter && filter.matches(location, "DOCUMENT", docDomain, false)) + return filter; + else + return null; + } + else + return null; + } +} + +/** + * Shared CombinedMatcher instance that should usually be used. + * @type CombinedMatcher + */ +let defaultMatcher = exports.defaultMatcher = new CombinedMatcher(); |