Compare commits

...

2 commits

Author SHA1 Message Date
Daoud Clarke
c4328474f7 Add source to results 2023-11-21 10:01:27 +00:00
Daoud Clarke
06c1958503 Allow additional results to be passed in 2023-11-20 14:54:11 +00:00
7 changed files with 39 additions and 13 deletions

View file

@ -18,7 +18,7 @@ def run():
with TinyIndex(Document, INDEX_PATH) as tiny_index: with TinyIndex(Document, INDEX_PATH) as tiny_index:
completer = Completer() completer = Completer()
ranker = HeuristicRanker(tiny_index, completer) ranker = HeuristicRanker(tiny_index, completer)
items = ranker.search('jasper fforde') items = ranker.search('jasper fforde', [])
print() print()
if items: if items:
for i, item in enumerate(islice(items, 10)): for i, item in enumerate(islice(items, 10)):

Binary file not shown.

View file

@ -148,6 +148,13 @@ body {
font-size: .9rem; font-size: .9rem;
} }
.via {
font-size: .9rem;
font-weight: var(--default-font-weight);
/* italics */
font-style: italic;
}
.result .title, .result .title>* { .result .title, .result .title>* {
color: var(--primary-color); color: var(--primary-color);
font-size: 1.1rem; font-size: 1.1rem;

View file

@ -3,7 +3,7 @@ import re
from mwmbl.tokenizer import tokenize, clean_unicode from mwmbl.tokenizer import tokenize, clean_unicode
def format_result_with_pattern(pattern, result): def format_result_with_pattern(pattern, result, source):
formatted_result = {} formatted_result = {}
for content_type, content_raw in [('title', result.title), ('extract', result.extract)]: for content_type, content_raw in [('title', result.title), ('extract', result.extract)]:
content = clean_unicode(content_raw) content = clean_unicode(content_raw)
@ -17,6 +17,7 @@ def format_result_with_pattern(pattern, result):
content_result.append({'value': content[start:end], 'is_bold': is_bold}) content_result.append({'value': content[start:end], 'is_bold': is_bold})
formatted_result[content_type] = content_result formatted_result[content_type] = content_result
formatted_result['url'] = result.url formatted_result['url'] = result.url
formatted_result['source'] = source
return formatted_result return formatted_result
@ -34,8 +35,8 @@ def get_query_regex(terms, is_complete, is_url):
return pattern return pattern
def format_result(result, query): def format_result(result, query, source):
tokens = tokenize(query) tokens = tokenize(query)
pattern = get_query_regex(tokens, True, False) pattern = get_query_regex(tokens, True, False)
return format_result_with_pattern(pattern, result) return format_result_with_pattern(pattern, result, source)

View file

@ -3,7 +3,7 @@
<div class="result-container"> <div class="result-container">
<div class="result-link"> <div class="result-link">
<a href="{{result.url}}"> <a href="{{result.url}}">
<p class='link'>{{result.url}}</p> <p><span class='link'>{{result.url}}</span> <span class="via">— found via {{result.source|title}}</span></p>
<p class='title'>{{result.title|strengthen}}</p> <p class='title'>{{result.title|strengthen}}</p>
</a> </a>
<p class='extract'>{{result.extract|strengthen}}</p> <p class='extract'>{{result.extract|strengthen}}</p>

View file

@ -3,6 +3,7 @@ import re
from abc import abstractmethod from abc import abstractmethod
from logging import getLogger from logging import getLogger
from operator import itemgetter from operator import itemgetter
from typing import Optional
from urllib.parse import urlparse from urllib.parse import urlparse
from mwmbl.format import format_result_with_pattern, get_query_regex from mwmbl.format import format_result_with_pattern, get_query_regex
@ -118,17 +119,22 @@ class Ranker:
def order_results(self, terms, pages, is_complete): def order_results(self, terms, pages, is_complete):
pass pass
def search(self, s: str): def search(self, s: str, additional_results: list[Document], source: Optional[str] = None):
results, terms, _ = self.get_results(s) mwmbl_results, terms, _ = self.get_results(s)
is_complete = s.endswith(' ') is_complete = s.endswith(' ')
pattern = get_query_regex(terms, is_complete, False) pattern = get_query_regex(terms, is_complete, False)
formatted_results = [] formatted_results = []
for result in results: seen_urls = set()
formatted_result = format_result_with_pattern(pattern, result) for results, source in [(additional_results, source), (mwmbl_results, 'mwmbl')]:
formatted_results.append(formatted_result) for result in results:
if result.url in seen_urls:
continue
formatted_result = format_result_with_pattern(pattern, result, source)
formatted_results.append(formatted_result)
seen_urls.add(result.url)
logger.info("Return results: %r", formatted_results) logger.info("Return results: %d", len(formatted_results))
return formatted_results return formatted_results
def complete(self, q: str): def complete(self, q: str):

View file

@ -88,7 +88,19 @@ class Activity:
def _get_results_and_activity(request): def _get_results_and_activity(request):
query = request.GET.get("q") query = request.GET.get("q")
if query: if query:
results = ranker.search(query) # There may be extra results in the request that we need to add in
# format is ?enhanced=google&title=title1&url=url1&extract=extract1&title=title2&url=url2&extract=extract2
source = request.GET.get("enhanced", "unknown")
titles = request.GET.getlist(f"title")
urls = request.GET.getlist(f"url")
extracts = request.GET.getlist(f"extract")
additional_results = [
Document(title=title, url=url, extract=extract, score=0.0)
for title, url, extract in zip(titles, urls, extracts)
]
results = ranker.search(query, additional_results=additional_results, source=source)
activity = None activity = None
else: else:
results = None results = None
@ -125,5 +137,5 @@ def fetch_url(request):
result = Document(title=title, url=url, extract=extract, score=0.0) result = Document(title=title, url=url, extract=extract, score=0.0)
return render(request, "result.html", { return render(request, "result.html", {
"result": format_result(result, query), "result": format_result(result, query, "user"),
}) })