Add script to export top domains

This commit is contained in:
Daoud Clarke 2022-01-23 22:04:30 +00:00
parent 908a9cf0b6
commit 171fa645d2
2 changed files with 15 additions and 0 deletions

View file

@ -0,0 +1,13 @@
import json
from mwmbl.indexer.paths import TOP_DOMAINS_JSON_PATH
from mwmbl.tinysearchengine.hn_top_domains_filtered import DOMAINS
def export_top_domains_to_json():
with open(TOP_DOMAINS_JSON_PATH, 'w') as output_file:
json.dump(DOMAINS, output_file, indent=2)
if __name__ == '__main__':
export_top_domains_to_json()

View file

@ -20,3 +20,5 @@ DOMAINS_TITLES_QUEUE_NAME = 'domains-title-queue-fs'
DOMAINS_PATH = os.path.join(DATA_DIR, 'top10milliondomains.csv.gz')
INDEX_PATH = Path(__file__).parent / 'data' / 'index.tinysearch'
TOP_DOMAINS_JSON_PATH = DATA_DIR / 'hn-top-domains.json'