Keep track of curated couments
This commit is contained in:
parent
f0592f99df
commit
8d64af4f1b
6 changed files with 684 additions and 571 deletions
Binary file not shown.
|
@ -166,7 +166,7 @@ def create_router(index_path: str) -> APIRouter:
|
||||||
print("Key", term)
|
print("Key", term)
|
||||||
|
|
||||||
documents = [
|
documents = [
|
||||||
Document(result.title, result.url, result.extract, MAX_CURATED_SCORE - i, term, DocumentState.CURATED.value)
|
Document(result.title, result.url, result.extract, MAX_CURATED_SCORE - i, term, result.curated)
|
||||||
for i, result in enumerate(curation.results)
|
for i, result in enumerate(curation.results)
|
||||||
]
|
]
|
||||||
page_index = indexer.get_key_page_index(term)
|
page_index = indexer.get_key_page_index(term)
|
||||||
|
|
|
@ -31,7 +31,8 @@ def astuple(dc):
|
||||||
|
|
||||||
|
|
||||||
class DocumentState(IntEnum):
|
class DocumentState(IntEnum):
|
||||||
CURATED = 1
|
CURATED = 0
|
||||||
|
VALIDATED = 1
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
@ -162,7 +162,8 @@ class Ranker:
|
||||||
# Check for curation
|
# Check for curation
|
||||||
curation_term = " ".join(terms)
|
curation_term = " ".join(terms)
|
||||||
curation_items = self.tiny_index.retrieve(curation_term)
|
curation_items = self.tiny_index.retrieve(curation_term)
|
||||||
curated_items = [d for d in curation_items if d.state == DocumentState.CURATED and d.term == curation_term]
|
curated_items = [d for d in curation_items if d.state in {DocumentState.CURATED, DocumentState.VALIDATED}
|
||||||
|
and d.term == curation_term]
|
||||||
if len(curated_items) > 0:
|
if len(curated_items) > 0:
|
||||||
return curated_items, terms, completions
|
return curated_items, terms, completions
|
||||||
|
|
||||||
|
|
1245
poetry.lock
generated
1245
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -20,6 +20,7 @@ psycopg2-binary = "^2.9.3"
|
||||||
spacy = "==3.2.1"
|
spacy = "==3.2.1"
|
||||||
pytest = "^7.2.1"
|
pytest = "^7.2.1"
|
||||||
pytest-mock = "^3.10.0"
|
pytest-mock = "^3.10.0"
|
||||||
|
jusText = "==3.0.0"
|
||||||
|
|
||||||
# Optional dependencies do not get installed by default. Look under tool.poetry.extras section
|
# Optional dependencies do not get installed by default. Look under tool.poetry.extras section
|
||||||
# to see which extras to use.
|
# to see which extras to use.
|
||||||
|
@ -28,7 +29,6 @@ warcio = {version= "==1.7.4", optional = true}
|
||||||
idna = {version= "==3.3", optional = true}
|
idna = {version= "==3.3", optional = true}
|
||||||
beautifulsoup4 = {version= "==4.10.0", optional = true}
|
beautifulsoup4 = {version= "==4.10.0", optional = true}
|
||||||
lxml = {version= "==4.6.4", optional = true}
|
lxml = {version= "==4.6.4", optional = true}
|
||||||
jusText = {version= "==3.0.0", optional = true}
|
|
||||||
langdetect = {version= "==1.0.9", optional = true}
|
langdetect = {version= "==1.0.9", optional = true}
|
||||||
pyarrow = {version= "==6.0.0", optional = true}
|
pyarrow = {version= "==6.0.0", optional = true}
|
||||||
pyspark = {version= "==3.2.0", optional = true}
|
pyspark = {version= "==3.2.0", optional = true}
|
||||||
|
|
Loading…
Reference in a new issue