diff options
Diffstat (limited to 'data/extensions/https-everywhere-eff@eff.org/chrome/content/code/HTTPSRules.js')
-rw-r--r-- | data/extensions/https-everywhere-eff@eff.org/chrome/content/code/HTTPSRules.js | 349 |
1 files changed, 233 insertions, 116 deletions
diff --git a/data/extensions/https-everywhere-eff@eff.org/chrome/content/code/HTTPSRules.js b/data/extensions/https-everywhere-eff@eff.org/chrome/content/code/HTTPSRules.js index f100a79..0dd2736 100644 --- a/data/extensions/https-everywhere-eff@eff.org/chrome/content/code/HTTPSRules.js +++ b/data/extensions/https-everywhere-eff@eff.org/chrome/content/code/HTTPSRules.js @@ -68,6 +68,10 @@ RuleSet.prototype = { ensureCompiled: function() { // Postpone compilation of exclusions, rules and cookies until now, to accelerate // browser load time. + // NOTE: Since rulesets are now lazy-loaded in FF, this will be called immediately + // after the ruleset is loaded, and doesn't give much startup benefit. We + // may want to switch these back so patterns are compiled immediately on + // ruleset load, for simplicity. if (this.compiled) return; var i; @@ -387,43 +391,86 @@ const HTTPSRules = { // applicable ruleset ids this.rulesetsByID = {}; this.rulesetsByName = {}; - var t1 = new Date().getTime(); + this.targetsLoaded = false; + this.targetsLoadingCallbacks = []; this.checkMixedContentHandling(); var rulefiles = RuleWriter.enumerate(RuleWriter.getCustomRuleDir()); this.scanRulefiles(rulefiles); // Initialize database connection. var dbFile = new FileUtils.File(RuleWriter.chromeToPath("chrome://https-everywhere/content/rulesets.sqlite")); - var rulesetDBConn = Services.storage.openDatabase(dbFile); - this.queryForRuleset = rulesetDBConn.createStatement( - "select contents from rulesets where id = :id"); - - // Preload the mapping of hostname target -> ruleset ID from DB. - // This is a little slow (287 ms on a Core2 Duo @ 2.2GHz with SSD), - // but is faster than loading all of the rulesets. If this becomes a - // bottleneck, change it to load in a background webworker, or load - // a smaller bloom filter instead. - var targetsQuery = rulesetDBConn.createStatement("select host, ruleset_id from targets"); - this.log(DBUG, "Loading targets..."); - while (targetsQuery.executeStep()) { - var host = targetsQuery.row.host; - var id = targetsQuery.row.ruleset_id; - if (!this.targets[host]) { - this.targets[host] = [id]; - } else { - this.targets[host].push(id); - } - } - this.log(DBUG, "Loading adding targets."); + this.rulesetDBConn = Services.storage.openDatabase(dbFile); } catch(e) { this.log(DBUG,"Rules Failed: "+e); } - var t2 = new Date().getTime(); - this.log(NOTE,"Loading targets took " + (t2 - t1) / 1000.0 + " seconds"); return; }, + loadTargets: function(callback) { + if (this.targetsLoaded) { + callback(); + return; + } + // loadTargets can be called multiple times before it resolves. We store a + // list of callbacks to call when done, and make sure we only actually do + // the query once. + this.targetsLoadingCallbacks.push(callback); + if (this.targetsLoadingCallbacks.length > 1) { + this.log(DBUG, "Skipping loadTargets, a query is already in progress."); + } + // Load the mapping of hostname target -> ruleset ID from DB. + // This is a little slow (287 ms on a Core2 Duo @ 2.2GHz with SSD), + // but is faster than loading all of the rulesets. If this becomes a + // bottleneck, change it to load in a background webworker, or load + // a smaller bloom filter instead. + var t1 = new Date().getTime(); + var query = this.rulesetDBConn.createStatement("select host, ruleset_id from targets"); + var that = this; + var count = 0; + this.log(INFO, "Querying targets"); + // TODO: Store "this is pending" and resolve all pending once the whole + // thing is loaded. + query.executeAsync({ + handleResult: function(aResultSet) { + try { + for (let row = aResultSet.getNextRow(); + row; + row = aResultSet.getNextRow()) { + var host = row.getResultByName("host"); + var id = row.getResultByName("ruleset_id"); + count ++; + if (!that.targets[host]) { + that.targets[host] = [id]; + } else { + that.targets[host].push(id); + } + } + } catch (e) { + that.log(WARN, "ERROR " + e); + } + }, + handleError: function(aError) { + that.log(WARN, "SQLite error loading targets: " + aError.message); + callback(); + }, + + handleCompletion: function(aReason) { + if (aReason != Components.interfaces.mozIStorageStatementCallback.REASON_FINISHED) { + that.log(WARN, "SQLite query canceled or aborted!"); + } else { + var t2 = new Date().getTime(); + that.log(NOTE, "Loading " + count + " targets took " + (t2 - t1) / 1000.0 + " seconds"); + that.targetsLoadingCallbacks.forEach(function(callback) { + callback(); + }); + that.targetsLoadingCallbacks = []; + that.targetsLoaded = true; + } + } + }); + }, + checkMixedContentHandling: function() { // Firefox 23+ blocks mixed content by default, so rulesets that create // mixed content situations should be disabled there @@ -472,8 +519,9 @@ const HTTPSRules = { } }, - - rewrittenURI: function(alist, input_uri) { + // return true iff callback has been called already, false if callback will be + // called asynchronously + rewrittenURI: function(alist, input_uri, callback) { // This function oversees the task of working out if a uri should be // rewritten, what it should be rewritten to, and recordkeeping of which // applicable rulesets are and aren't active. Previously this returned @@ -487,44 +535,55 @@ const HTTPSRules = { if (!alist) this.log(DBUG, "No applicable list rewriting " + input_uri.spec); this.log(DBUG, "Processing " + input_uri.spec); - var uri = this.sanitiseURI(input_uri); - // Get the list of rulesets that target this host try { - var rs = this.potentiallyApplicableRulesets(uri.host); + var host = input_uri.host } catch(e) { - this.log(NOTE, 'Could not check applicable rules for '+uri.spec + '\n'+e); - return null; - } - - // ponder each potentially applicable ruleset, working out if it applies - // and recording it as active/inactive/moot/breaking in the applicable list - for (i = 0; i < rs.length; ++i) { - if (!rs[i].active) { - if (alist && rs[i].wouldMatch(uri, alist)) - alist.inactive_rule(rs[i]); - continue; - } - blob.newuri = rs[i].transformURI(uri); - if (blob.newuri) { - if (alist) { - if (uri.spec in https_everywhere_blacklist) - alist.breaking_rule(rs[i]); - else - alist.active_rule(rs[i]); - } - if (userpass_present) blob.newuri.userPass = input_uri.userPass; - blob.applied_ruleset = rs[i]; - return blob; + // NS_ERROR_FAILURE is normal for accessing uri.host. It just means that + // host is not applicable for the URI scheme, e.g. about: URIs. + // If that happens we quietly return null. If another exception happens + // we noisily return null. + if (e.name != "NS_ERROR_FAILURE") { + this.log(WARN, 'Could not get host from ' + input_uri.spec + ': ' + e); } - if (uri.scheme == "https" && alist) { - // we didn't rewrite but the rule applies to this domain and the - // requests are going over https - if (rs[i].wouldMatch(uri, alist)) alist.moot_rule(rs[i]); - continue; - } - } - return null; + callback(null); + return true; + } + var that = this; + return this.potentiallyApplicableRulesets(host, function(rs) { + var uri = that.sanitiseURI(input_uri); + // ponder each potentially applicable ruleset, working out if it applies + // and recording it as active/inactive/moot/breaking in the applicable list + for (i = 0; i < rs.length; ++i) { + if (!rs[i].active) { + if (alist && rs[i].wouldMatch(uri, alist)) + alist.inactive_rule(rs[i]); + continue; + } + blob.newuri = rs[i].transformURI(uri); + if (blob.newuri) { + if (alist) { + if (uri.spec in https_everywhere_blacklist) { + alist.breaking_rule(rs[i]); + } else { + alist.active_rule(rs[i]); + } + } + if (userpass_present) blob.newuri.userPass = input_uri.userPass; + blob.applied_ruleset = rs[i]; + callback(blob); + return + } + if (uri.scheme == "https" && alist) { + // we didn't rewrite but the rule applies to this domain and the + // requests are going over https + if (rs[i].wouldMatch(uri, alist)) alist.moot_rule(rs[i]); + continue; + } + } + callback(null); + return; + }); }, sanitiseURI: function(input_uri) { @@ -560,7 +619,7 @@ const HTTPSRules = { } } catch(e3) { this.log(INFO, "uri.host is explosive!"); - try { this.log(INFO, "(" + uri.spec + ")"); } // happens for about: uris and soforth + try { this.log(INFO, "(" + uri.spec + ")"); } // happens for about: uris and so forth catch(e4) { this.log(WARN, "(and unprintable!!!!!!)"); } } return uri; @@ -576,44 +635,82 @@ const HTTPSRules = { }, // Load a ruleset by numeric id, e.g. 234 - // NOTE: This call runs synchronously, which can lock up the browser UI. Is - // there any way to fix that, given that we need to run blocking in the request - // flow? Perhaps we can preload all targets from the DB into memory at startup - // so we only hit the DB when we know there is something to be had. - loadRulesetById: function(ruleset_id) { - this.queryForRuleset.params.id = ruleset_id; - - try { - if (this.queryForRuleset.executeStep()) { - RuleWriter.readFromString(this.queryForRuleset.row.contents, this, ruleset_id); - } else { - this.log(WARN,"Couldn't find ruleset for id " + ruleset_id); + loadRulesetById: function(ruleset_id, callback) { + var query = this.rulesetDBConn.createStatement( + "select contents from rulesets where id = :id"); + query.params.id = ruleset_id; + var that = this; + query.executeAsync({ + handleResult: function(aResultSet) { + for (let row = aResultSet.getNextRow(); + row; + row = aResultSet.getNextRow()) { + + let value = row.getResultByName("contents"); + RuleWriter.readFromString(value, that, ruleset_id); + } + }, + handleError: function(aError) { + that.log(WARN, "SQLite error: " + aError.message); + callback(); + }, + + handleCompletion: function(aReason) { + if (aReason != Components.interfaces.mozIStorageStatementCallback.REASON_FINISHED) { + that.log(WARN, "SQLite query canceled or aborted!"); + } + callback(); } - } finally { - this.queryForRuleset.reset(); - } + }); }, // Get all rulesets matching a given target, lazy-loading from DB as necessary. - rulesetsByTarget: function(target) { - var rulesetIds = this.targets[target]; - - var output = []; - if (rulesetIds) { - this.log(INFO, "For target " + target + ", found ids " + rulesetIds.toString()); - for (var i = 0; i < rulesetIds.length; i++) { - var id = rulesetIds[i]; - if (!this.rulesetsByID[id]) { - this.loadRulesetById(id); - } - if (this.rulesetsByID[id]) { - output.push(this.rulesetsByID[id]); + // Returns true if callback was called immediately: i.e., didn't have to go async. + rulesetsByTargets: function(targets, callback) { + // If the array of target hosts is not already loaded, load it + // (asynchronously). This should only happen once. + if (!this.targetsLoaded) { + this.log(INFO, "Loading targets"); + this.loadTargets(this.rulesetsByTargets.bind(this, targets, callback)); + return false; + } else { + this.log(INFO, "Targets are loaded " + this.targets["www.eff.org"]); + } + var foundIds = []; + var neededIds = []; + var that = this; + targets.forEach(function(target) { + var rulesetIds = that.targets[target] || []; + rulesetIds.forEach(function(id) { + foundIds.push(id); + if (!that.rulesetsByID[id]) { + neededIds.push(id); } + }); + }); + + this.log(DBUG, "For targets " + targets.join(' ') + + ", found ids " + foundIds + ", need to load: " + neededIds); + + var callbackImmediate = true; + function loadOne(done) { + if (neededIds.length !== 0) { + callbackImmediate = false; + that.loadRulesetById(neededIds.pop(), loadOne.bind(null, done)); + } else { + done(); } - } else { - this.log(DBUG, "For target " + target + ", found no ids in DB"); } - return output; + + loadOne(function() { + output = foundIds.map(function(id) { + return that.rulesetsByID[id]; + }) + that.log(DBUG, "Callback from rulesetsByTargets output = " + output); + callback(output); + }); + that.log(DBUG, "Returning from rulesetsByTargets callbackImmediate = " + callbackImmediate); + return callbackImmediate; }, /** @@ -622,17 +719,15 @@ const HTTPSRules = { * This function is only defined for fully-qualified hostnames. Wildcards and * cookie-style domain attributes with a leading dot are not permitted. * @param host {string} - * @return {Array.<RuleSet>} + * @return true iff we didn't have to go async to load rules */ - potentiallyApplicableRulesets: function(host) { + potentiallyApplicableRulesets: function(host, callback) { + if (!callback) { + this.log(WARN, 'Bad problem: potentiallyApplicableRulesets called without callback.'); + return false; + } var i, tmp, t; - var results = []; - - var attempt = function(target) { - this.setInsert(results, this.rulesetsByTarget(target)); - }.bind(this); - - attempt(host); + var targetsToTry = [host]; // replace each portion of the domain with a * in turn var segmented = host.split("."); @@ -640,23 +735,26 @@ const HTTPSRules = { tmp = segmented[i]; if (tmp.length === 0) { this.log(WARN,"Malformed host passed to potentiallyApplicableRulesets: " + host); - return null; + return false; } segmented[i] = "*"; t = segmented.join("."); segmented[i] = tmp; - attempt(t); + targetsToTry.push(t); } // now eat away from the left, with *, so that for x.y.z.google.com we // check *.z.google.com and *.google.com (we did *.y.z.google.com above) for (i = 2; i <= segmented.length - 2; ++i) { t = "*." + segmented.slice(i,segmented.length).join("."); - attempt(t); - } - this.log(DBUG,"Potentially applicable rules for " + host + ":"); - for (i = 0; i < results.length; ++i) - this.log(DBUG, " " + results[i].name); - return results; + targetsToTry.push(t) + } + var that = this; + return this.rulesetsByTargets(targetsToTry, function(rulesets) { + that.log(DBUG,"Potentially applicable rules for " + host + ":"); + for (i = 0; i < rulesets.length; ++i) + that.log(DBUG, " " + rulesets[i].name); + callback(rulesets); + }); }, /** @@ -676,7 +774,7 @@ const HTTPSRules = { }, /** - * Check to see if the Cookie object c meets any of our cookierule citeria + * Check to see if the Cookie object c meets any of our cookierule criteria * for being marked as secure. * * @param applicable_list {ApplicableList} an ApplicableList for record keeping @@ -692,13 +790,33 @@ const HTTPSRules = { var i,j; // potentiallyApplicableRulesets is defined on hostnames not cookie-style // "domain" attributes, so we strip a leading dot before calling. - var rs = this.potentiallyApplicableRulesets(this.hostFromCookieDomain(c.host)); + var host = this.hostFromCookieDomain(c.host); + + // When checking for potentially applicable rulesets, we have to wait for a + // callback, because we may need to load the rulesets from disk. However, in + // practice this callback will always be run immediately, because the + // ruleset for the necessary host will have been loaded already for the HTTP + // request. + var result; + var callbackedImmediate = this.potentiallyApplicableRulesets(host, function(rs) { + result = this.shouldSecureCookieWithRulesets(applicable_list, c, known_https, rs); + }.bind(this)); + if (callbackedImmediate) { + return result; + } else { + this.log(WARN, "Shouldn't happen: rulesets were not already loaded for host " + host) + // Default to securing cookies if we aren't sure. + return true + } + }, + + shouldSecureCookieWithRulesets: function(applicable_list, c, known_https, rs) { for (i = 0; i < rs.length; ++i) { var ruleset = rs[i]; if (ruleset.active) { ruleset.ensureCompiled(); // Never secure a cookie if this page might be HTTP - if (!(known_https || this.safeToSecureCookie(c.rawHost))) { + if (!(known_https || this.safeToSecureCookie(c.rawHost, rs))) { continue; } for (j = 0; j < ruleset.cookierules.length; j++) { @@ -738,9 +856,10 @@ const HTTPSRules = { * flagged as secure. * * @param domain {string} The cookie's 'domain' attribute. + * @param rs {Array.<Ruleset>} A list of potentially applicable rulesets. * @return {boolean} True if it's safe to secure a cookie on that domain. */ - safeToSecureCookie: function(domain) { + safeToSecureCookie: function(domain, rs) { if (domain in https_blacklist_domains) { this.log(INFO, "cookies for " + domain + "blacklisted"); return false; @@ -759,9 +878,7 @@ const HTTPSRules = { } this.log(DBUG, "Testing securecookie applicability with " + test_uri); - // potentiallyApplicableRulesets is defined on hostnames not cookie-style - // "domain" attributes, so we strip a leading dot before calling. - var rs = this.potentiallyApplicableRulesets(this.hostFromCookieDomain(domain)); + for (var i = 0; i < rs.length; ++i) { if (!rs[i].active) continue; var rewrite = rs[i].apply(test_uri); |