2022-08-08 21:52:37 +00:00
|
|
|
import logging
|
|
|
|
import sys
|
2022-08-10 20:43:13 +00:00
|
|
|
from itertools import islice
|
2022-08-08 21:52:37 +00:00
|
|
|
|
2023-10-10 12:51:06 +00:00
|
|
|
from mwmbl.indexer import INDEX_PATH
|
2022-08-08 21:52:37 +00:00
|
|
|
from mwmbl.tinysearchengine.completer import Completer
|
2023-10-10 12:51:06 +00:00
|
|
|
from mwmbl.tinysearchengine import TinyIndex, Document
|
2022-08-08 21:52:37 +00:00
|
|
|
from mwmbl.tinysearchengine.rank import HeuristicRanker
|
|
|
|
|
|
|
|
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
|
|
|
|
|
|
|
|
|
2022-08-10 20:43:13 +00:00
|
|
|
def clean(sequence):
|
|
|
|
return ''.join(x['value'] for x in sequence)
|
|
|
|
|
|
|
|
|
2022-08-08 21:52:37 +00:00
|
|
|
def run():
|
|
|
|
with TinyIndex(Document, INDEX_PATH) as tiny_index:
|
|
|
|
completer = Completer()
|
|
|
|
ranker = HeuristicRanker(tiny_index, completer)
|
2022-08-11 22:27:30 +00:00
|
|
|
items = ranker.search('jasper fforde')
|
2022-08-10 20:43:13 +00:00
|
|
|
print()
|
2022-08-08 21:52:37 +00:00
|
|
|
if items:
|
2022-08-10 20:43:13 +00:00
|
|
|
for i, item in enumerate(islice(items, 10)):
|
|
|
|
print(f"{i + 1}. {item['url']}")
|
|
|
|
print(clean(item['title']))
|
|
|
|
print(clean(item['extract']))
|
|
|
|
print()
|
2022-08-08 21:52:37 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
run()
|