Compare commits
2 commits
main
...
allow-extr
Author | SHA1 | Date | |
---|---|---|---|
|
c4328474f7 | ||
|
06c1958503 |
7 changed files with 39 additions and 13 deletions
|
@ -18,7 +18,7 @@ def run():
|
||||||
with TinyIndex(Document, INDEX_PATH) as tiny_index:
|
with TinyIndex(Document, INDEX_PATH) as tiny_index:
|
||||||
completer = Completer()
|
completer = Completer()
|
||||||
ranker = HeuristicRanker(tiny_index, completer)
|
ranker = HeuristicRanker(tiny_index, completer)
|
||||||
items = ranker.search('jasper fforde')
|
items = ranker.search('jasper fforde', [])
|
||||||
print()
|
print()
|
||||||
if items:
|
if items:
|
||||||
for i, item in enumerate(islice(items, 10)):
|
for i, item in enumerate(islice(items, 10)):
|
||||||
|
|
Binary file not shown.
|
@ -148,6 +148,13 @@ body {
|
||||||
font-size: .9rem;
|
font-size: .9rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.via {
|
||||||
|
font-size: .9rem;
|
||||||
|
font-weight: var(--default-font-weight);
|
||||||
|
/* italics */
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
.result .title, .result .title>* {
|
.result .title, .result .title>* {
|
||||||
color: var(--primary-color);
|
color: var(--primary-color);
|
||||||
font-size: 1.1rem;
|
font-size: 1.1rem;
|
||||||
|
|
|
@ -3,7 +3,7 @@ import re
|
||||||
from mwmbl.tokenizer import tokenize, clean_unicode
|
from mwmbl.tokenizer import tokenize, clean_unicode
|
||||||
|
|
||||||
|
|
||||||
def format_result_with_pattern(pattern, result):
|
def format_result_with_pattern(pattern, result, source):
|
||||||
formatted_result = {}
|
formatted_result = {}
|
||||||
for content_type, content_raw in [('title', result.title), ('extract', result.extract)]:
|
for content_type, content_raw in [('title', result.title), ('extract', result.extract)]:
|
||||||
content = clean_unicode(content_raw)
|
content = clean_unicode(content_raw)
|
||||||
|
@ -17,6 +17,7 @@ def format_result_with_pattern(pattern, result):
|
||||||
content_result.append({'value': content[start:end], 'is_bold': is_bold})
|
content_result.append({'value': content[start:end], 'is_bold': is_bold})
|
||||||
formatted_result[content_type] = content_result
|
formatted_result[content_type] = content_result
|
||||||
formatted_result['url'] = result.url
|
formatted_result['url'] = result.url
|
||||||
|
formatted_result['source'] = source
|
||||||
return formatted_result
|
return formatted_result
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,8 +35,8 @@ def get_query_regex(terms, is_complete, is_url):
|
||||||
return pattern
|
return pattern
|
||||||
|
|
||||||
|
|
||||||
def format_result(result, query):
|
def format_result(result, query, source):
|
||||||
tokens = tokenize(query)
|
tokens = tokenize(query)
|
||||||
pattern = get_query_regex(tokens, True, False)
|
pattern = get_query_regex(tokens, True, False)
|
||||||
return format_result_with_pattern(pattern, result)
|
return format_result_with_pattern(pattern, result, source)
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<div class="result-container">
|
<div class="result-container">
|
||||||
<div class="result-link">
|
<div class="result-link">
|
||||||
<a href="{{result.url}}">
|
<a href="{{result.url}}">
|
||||||
<p class='link'>{{result.url}}</p>
|
<p><span class='link'>{{result.url}}</span> <span class="via">— found via {{result.source|title}}</span></p>
|
||||||
<p class='title'>{{result.title|strengthen}}</p>
|
<p class='title'>{{result.title|strengthen}}</p>
|
||||||
</a>
|
</a>
|
||||||
<p class='extract'>{{result.extract|strengthen}}</p>
|
<p class='extract'>{{result.extract|strengthen}}</p>
|
||||||
|
|
|
@ -3,6 +3,7 @@ import re
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
from typing import Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from mwmbl.format import format_result_with_pattern, get_query_regex
|
from mwmbl.format import format_result_with_pattern, get_query_regex
|
||||||
|
@ -118,17 +119,22 @@ class Ranker:
|
||||||
def order_results(self, terms, pages, is_complete):
|
def order_results(self, terms, pages, is_complete):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def search(self, s: str):
|
def search(self, s: str, additional_results: list[Document], source: Optional[str] = None):
|
||||||
results, terms, _ = self.get_results(s)
|
mwmbl_results, terms, _ = self.get_results(s)
|
||||||
|
|
||||||
is_complete = s.endswith(' ')
|
is_complete = s.endswith(' ')
|
||||||
pattern = get_query_regex(terms, is_complete, False)
|
pattern = get_query_regex(terms, is_complete, False)
|
||||||
formatted_results = []
|
formatted_results = []
|
||||||
for result in results:
|
seen_urls = set()
|
||||||
formatted_result = format_result_with_pattern(pattern, result)
|
for results, source in [(additional_results, source), (mwmbl_results, 'mwmbl')]:
|
||||||
formatted_results.append(formatted_result)
|
for result in results:
|
||||||
|
if result.url in seen_urls:
|
||||||
|
continue
|
||||||
|
formatted_result = format_result_with_pattern(pattern, result, source)
|
||||||
|
formatted_results.append(formatted_result)
|
||||||
|
seen_urls.add(result.url)
|
||||||
|
|
||||||
logger.info("Return results: %r", formatted_results)
|
logger.info("Return results: %d", len(formatted_results))
|
||||||
return formatted_results
|
return formatted_results
|
||||||
|
|
||||||
def complete(self, q: str):
|
def complete(self, q: str):
|
||||||
|
|
|
@ -88,7 +88,19 @@ class Activity:
|
||||||
def _get_results_and_activity(request):
|
def _get_results_and_activity(request):
|
||||||
query = request.GET.get("q")
|
query = request.GET.get("q")
|
||||||
if query:
|
if query:
|
||||||
results = ranker.search(query)
|
# There may be extra results in the request that we need to add in
|
||||||
|
# format is ?enhanced=google&title=title1&url=url1&extract=extract1&title=title2&url=url2&extract=extract2
|
||||||
|
source = request.GET.get("enhanced", "unknown")
|
||||||
|
titles = request.GET.getlist(f"title")
|
||||||
|
urls = request.GET.getlist(f"url")
|
||||||
|
extracts = request.GET.getlist(f"extract")
|
||||||
|
|
||||||
|
additional_results = [
|
||||||
|
Document(title=title, url=url, extract=extract, score=0.0)
|
||||||
|
for title, url, extract in zip(titles, urls, extracts)
|
||||||
|
]
|
||||||
|
|
||||||
|
results = ranker.search(query, additional_results=additional_results, source=source)
|
||||||
activity = None
|
activity = None
|
||||||
else:
|
else:
|
||||||
results = None
|
results = None
|
||||||
|
@ -125,5 +137,5 @@ def fetch_url(request):
|
||||||
|
|
||||||
result = Document(title=title, url=url, extract=extract, score=0.0)
|
result = Document(title=title, url=url, extract=extract, score=0.0)
|
||||||
return render(request, "result.html", {
|
return render(request, "result.html", {
|
||||||
"result": format_result(result, query),
|
"result": format_result(result, query, "user"),
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in a new issue