Jelajahi Sumber

Add script to export top domains

Daoud Clarke 3 tahun lalu
induk
melakukan
171fa645d2
2 mengubah file dengan 15 tambahan dan 0 penghapusan
  1. 13 0
      analyse/export_top_domains.py
  2. 2 0
      mwmbl/indexer/paths.py

+ 13 - 0
analyse/export_top_domains.py

@@ -0,0 +1,13 @@
+import json
+
+from mwmbl.indexer.paths import TOP_DOMAINS_JSON_PATH
+from mwmbl.tinysearchengine.hn_top_domains_filtered import DOMAINS
+
+
+def export_top_domains_to_json():
+    with open(TOP_DOMAINS_JSON_PATH, 'w') as output_file:
+        json.dump(DOMAINS, output_file, indent=2)
+
+
+if __name__ == '__main__':
+    export_top_domains_to_json()

+ 2 - 0
mwmbl/indexer/paths.py

@@ -20,3 +20,5 @@ DOMAINS_TITLES_QUEUE_NAME = 'domains-title-queue-fs'
 DOMAINS_PATH = os.path.join(DATA_DIR, 'top10milliondomains.csv.gz')
 
 INDEX_PATH = Path(__file__).parent / 'data' / 'index.tinysearch'
+
+TOP_DOMAINS_JSON_PATH = DATA_DIR / 'hn-top-domains.json'