diff options
Diffstat (limited to 'tools/process-json-files.py')
-rw-r--r-- | tools/process-json-files.py | 70 |
1 files changed, 68 insertions, 2 deletions
diff --git a/tools/process-json-files.py b/tools/process-json-files.py index 3a3bde1..d449057 100644 --- a/tools/process-json-files.py +++ b/tools/process-json-files.py @@ -3,6 +3,7 @@ # Copyright (C) 2020, 2021 grizzlyuser <grizzlyuser@protonmail.com> # Copyright (C) 2020, 2021 Ruben Rodriguez <ruben@gnu.org> # Copyright (C) 2021 Amin Bandali <bandali@gnu.org> +# Copyright (C) 2025 Mark H Weaver <mhw@netris.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,6 +25,7 @@ import time import copy import argparse import pathlib +import re from collections import namedtuple from jsonschema import validate @@ -152,7 +154,7 @@ class SearchConfig(RemoteSettings): 'main/search-config.json', ) SCHEMA_PATH = arguments.MAIN_PATH / \ - 'toolkit/components/search/schema/search-engine-config-schema.json' + 'toolkit/components/search/schema/search-config-schema.json' OUTPUT_PATH = JSON_PATHS[0] _DUCKDUCKGO_SEARCH_ENGINE_ID = 'ddg@search.mozilla.org' @@ -188,6 +190,68 @@ class SearchConfig(RemoteSettings): return search_engine +class SearchConfigV2(RemoteSettings): + JSON_PATHS = ( + RemoteSettings.DUMPS_PATH_ABSOLUTE / + 'main/search-config-v2.json', + ) + SCHEMA_PATH = arguments.MAIN_PATH / \ + 'toolkit/components/search/schema/search-config-v2-schema.json' + OUTPUT_PATH = JSON_PATHS[0] + + _DUCKDUCKGO_SEARCH_ENGINE_ID = 'ddg' + _REDDIT_SEARCH_ENGINE_ID = 'reddit' + + @classmethod + def should_drop_record(cls, record): + return (record['recordType'] == 'engine' and + record['identifier'] not in ( + cls._DUCKDUCKGO_SEARCH_ENGINE_ID, + cls._REDDIT_SEARCH_ENGINE_ID ) and + not re.match('wikipedia', record['identifier'])) + + @classmethod + def process_record(cls, record): + if record['recordType'] == 'engine': + if record['identifier'] == cls._DUCKDUCKGO_SEARCH_ENGINE_ID: + del record['variants'][1:] + del record['variants'][0]['subVariants'] + del record['base']['urls']['search']['params'] + record['base']['urls']['search']['base'] = "https://html.duckduckgo.com/html" + if record['recordType'] == 'defaultEngines': + record['globalDefault'] = cls._DUCKDUCKGO_SEARCH_ENGINE_ID + record['specificDefaults'] = [] + return record + + +class SearchConfigOverrides(RemoteSettings): + JSON_PATHS = ( + RemoteSettings.DUMPS_PATH_ABSOLUTE / + 'main/search-config-overrides.json', + ) + SCHEMA_PATH = arguments.MAIN_PATH / \ + 'toolkit/components/search/schema/search-config-overrides-schema.json' + OUTPUT_PATH = JSON_PATHS[0] + + @classmethod + def should_drop_record(cls, record): + return True + + +class SearchConfigOverridesV2(RemoteSettings): + JSON_PATHS = ( + RemoteSettings.DUMPS_PATH_ABSOLUTE / + 'main/search-config-overrides-v2.json', + ) + SCHEMA_PATH = arguments.MAIN_PATH / \ + 'toolkit/components/search/schema/search-config-overrides-v2-schema.json' + OUTPUT_PATH = JSON_PATHS[0] + + @classmethod + def should_drop_record(cls, record): + return True + + class TippyTopSites: JSON_PATHS = ( arguments.MAIN_PATH / @@ -231,7 +295,9 @@ class TopSites(RemoteSettings): # To reflect the latest timestamps, Changes class should always come after # all other RemoteSettings subclasses -processors = (SearchConfig, Changes) +processors = (SearchConfig, SearchConfigOverrides, + SearchConfigV2, SearchConfigOverridesV2, + Changes) for processor in processors: parsed_jsons = [] |