Fix relative path

This commit is contained in:
Daoud Clarke 2022-06-17 23:19:30 +01:00
parent 1c7420e5fb
commit 5ea9efcfa2
2 changed files with 9 additions and 18 deletions

View file

@ -16,9 +16,6 @@ from mwmbl.tinysearchengine.rank import HeuristicRanker
logging.basicConfig()
TERMS_PATH = Path(__file__).parent.parent / 'resources' / 'mwmbl-crawl-terms.csv'
def setup_args():
parser = argparse.ArgumentParser(description="mwmbl-tinysearchengine")
parser.add_argument("--index", help="Path to the tinysearchengine index file", default="/data/index.tinysearch")
@ -42,9 +39,7 @@ def run():
except FileExistsError:
print("Index already exists")
# Load term data
terms = pd.read_csv(TERMS_PATH)
completer = Completer(terms)
completer = Completer()
with TinyIndex(item_factory=Document, index_path=args.index) as tiny_index:
ranker = HeuristicRanker(tiny_index, completer)

View file

@ -1,12 +1,17 @@
from bisect import bisect_left, bisect_right
from datetime import datetime
from pathlib import Path
import pandas as pd
from pandas import DataFrame
TERMS_PATH = Path(__file__).parent.parent.parent / 'resources' / 'mwmbl-crawl-terms.csv'
class Completer:
def __init__(self, terms: DataFrame, num_matches: int = 3):
def __init__(self, num_matches: int = 3):
# Load term data
terms = pd.read_csv(TERMS_PATH)
terms_dict = terms.sort_values('term').set_index('term')['count'].to_dict()
self.terms = list(terms_dict.keys())
self.counts = list(terms_dict.values())
@ -26,12 +31,3 @@ class Completer:
counts, terms = zip(*top_terms)
return list(terms)
if __name__ == '__main__':
data = pd.read_csv('data/mwmbl-crawl-terms.csv')
completer = Completer(data)
start = datetime.now()
completer.complete('fa')
end = datetime.now()
print("Time", end - start)