mwmbl/analyse/search.py

33 lines
889 B
Python
Raw Normal View History

2022-08-08 21:52:37 +00:00
import logging
import sys
from itertools import islice
2022-08-08 21:52:37 +00:00
2023-10-10 12:51:06 +00:00
from mwmbl.indexer import INDEX_PATH
2022-08-08 21:52:37 +00:00
from mwmbl.tinysearchengine.completer import Completer
2023-10-10 12:51:06 +00:00
from mwmbl.tinysearchengine import TinyIndex, Document
2022-08-08 21:52:37 +00:00
from mwmbl.tinysearchengine.rank import HeuristicRanker
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
def clean(sequence):
return ''.join(x['value'] for x in sequence)
2022-08-08 21:52:37 +00:00
def run():
with TinyIndex(Document, INDEX_PATH) as tiny_index:
completer = Completer()
ranker = HeuristicRanker(tiny_index, completer)
items = ranker.search('jasper fforde')
print()
2022-08-08 21:52:37 +00:00
if items:
for i, item in enumerate(islice(items, 10)):
print(f"{i + 1}. {item['url']}")
print(clean(item['title']))
print(clean(item['extract']))
print()
2022-08-08 21:52:37 +00:00
if __name__ == '__main__':
run()