@@ -0,0 +1,13 @@
+import json
+
+from mwmbl.indexer.paths import TOP_DOMAINS_JSON_PATH
+from mwmbl.tinysearchengine.hn_top_domains_filtered import DOMAINS
+def export_top_domains_to_json():
+ with open(TOP_DOMAINS_JSON_PATH, 'w') as output_file:
+ json.dump(DOMAINS, output_file, indent=2)
+if __name__ == '__main__':
+ export_top_domains_to_json()
@@ -20,3 +20,5 @@ DOMAINS_TITLES_QUEUE_NAME = 'domains-title-queue-fs'
DOMAINS_PATH = os.path.join(DATA_DIR, 'top10milliondomains.csv.gz')
INDEX_PATH = Path(__file__).parent / 'data' / 'index.tinysearch'
+TOP_DOMAINS_JSON_PATH = DATA_DIR / 'hn-top-domains.json'