Add source to results

This commit is contained in:
Daoud Clarke 2023-11-21 10:01:27 +00:00
parent 06c1958503
commit c4328474f7
7 changed files with 27 additions and 13 deletions

View file

@ -18,7 +18,7 @@ def run():
with TinyIndex(Document, INDEX_PATH) as tiny_index:
completer = Completer()
ranker = HeuristicRanker(tiny_index, completer)
items = ranker.search('jasper fforde')
items = ranker.search('jasper fforde', [])
print()
if items:
for i, item in enumerate(islice(items, 10)):

Binary file not shown.

View file

@ -148,6 +148,13 @@ body {
font-size: .9rem;
}
.via {
font-size: .9rem;
font-weight: var(--default-font-weight);
/* italics */
font-style: italic;
}
.result .title, .result .title>* {
color: var(--primary-color);
font-size: 1.1rem;

View file

@ -3,7 +3,7 @@ import re
from mwmbl.tokenizer import tokenize, clean_unicode
def format_result_with_pattern(pattern, result):
def format_result_with_pattern(pattern, result, source):
formatted_result = {}
for content_type, content_raw in [('title', result.title), ('extract', result.extract)]:
content = clean_unicode(content_raw)
@ -17,6 +17,7 @@ def format_result_with_pattern(pattern, result):
content_result.append({'value': content[start:end], 'is_bold': is_bold})
formatted_result[content_type] = content_result
formatted_result['url'] = result.url
formatted_result['source'] = source
return formatted_result
@ -34,8 +35,8 @@ def get_query_regex(terms, is_complete, is_url):
return pattern
def format_result(result, query):
def format_result(result, query, source):
tokens = tokenize(query)
pattern = get_query_regex(tokens, True, False)
return format_result_with_pattern(pattern, result)
return format_result_with_pattern(pattern, result, source)

View file

@ -3,7 +3,7 @@
<div class="result-container">
<div class="result-link">
<a href="{{result.url}}">
<p class='link'>{{result.url}}</p>
<p><span class='link'>{{result.url}}</span> <span class="via">— found via {{result.source|title}}</span></p>
<p class='title'>{{result.title|strengthen}}</p>
</a>
<p class='extract'>{{result.extract|strengthen}}</p>

View file

@ -3,6 +3,7 @@ import re
from abc import abstractmethod
from logging import getLogger
from operator import itemgetter
from typing import Optional
from urllib.parse import urlparse
from mwmbl.format import format_result_with_pattern, get_query_regex
@ -118,15 +119,20 @@ class Ranker:
def order_results(self, terms, pages, is_complete):
pass
def search(self, s: str, additional_results: list[Document]):
results, terms, _ = self.get_results(s)
def search(self, s: str, additional_results: list[Document], source: Optional[str] = None):
mwmbl_results, terms, _ = self.get_results(s)
is_complete = s.endswith(' ')
pattern = get_query_regex(terms, is_complete, False)
formatted_results = []
for result in additional_results + results:
formatted_result = format_result_with_pattern(pattern, result)
formatted_results.append(formatted_result)
seen_urls = set()
for results, source in [(additional_results, source), (mwmbl_results, 'mwmbl')]:
for result in results:
if result.url in seen_urls:
continue
formatted_result = format_result_with_pattern(pattern, result, source)
formatted_results.append(formatted_result)
seen_urls.add(result.url)
logger.info("Return results: %d", len(formatted_results))
return formatted_results

View file

@ -90,7 +90,7 @@ def _get_results_and_activity(request):
if query:
# There may be extra results in the request that we need to add in
# format is ?enhanced=google&title=title1&url=url1&extract=extract1&title=title2&url=url2&extract=extract2
# enhanced = request.GET.get("enhanced")
source = request.GET.get("enhanced", "unknown")
titles = request.GET.getlist(f"title")
urls = request.GET.getlist(f"url")
extracts = request.GET.getlist(f"extract")
@ -100,7 +100,7 @@ def _get_results_and_activity(request):
for title, url, extract in zip(titles, urls, extracts)
]
results = ranker.search(query, additional_results=additional_results)
results = ranker.search(query, additional_results=additional_results, source=source)
activity = None
else:
results = None
@ -137,5 +137,5 @@ def fetch_url(request):
result = Document(title=title, url=url, extract=extract, score=0.0)
return render(request, "result.html", {
"result": format_result(result, query),
"result": format_result(result, query, "user"),
})