Add script to export top domains
This commit is contained in:
parent
908a9cf0b6
commit
171fa645d2
2 changed files with 15 additions and 0 deletions
13
analyse/export_top_domains.py
Normal file
13
analyse/export_top_domains.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
import json
|
||||
|
||||
from mwmbl.indexer.paths import TOP_DOMAINS_JSON_PATH
|
||||
from mwmbl.tinysearchengine.hn_top_domains_filtered import DOMAINS
|
||||
|
||||
|
||||
def export_top_domains_to_json():
|
||||
with open(TOP_DOMAINS_JSON_PATH, 'w') as output_file:
|
||||
json.dump(DOMAINS, output_file, indent=2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
export_top_domains_to_json()
|
|
@ -20,3 +20,5 @@ DOMAINS_TITLES_QUEUE_NAME = 'domains-title-queue-fs'
|
|||
DOMAINS_PATH = os.path.join(DATA_DIR, 'top10milliondomains.csv.gz')
|
||||
|
||||
INDEX_PATH = Path(__file__).parent / 'data' / 'index.tinysearch'
|
||||
|
||||
TOP_DOMAINS_JSON_PATH = DATA_DIR / 'hn-top-domains.json'
|
||||
|
|
Loading…
Add table
Reference in a new issue