diff --git a/analyse/search.py b/analyse/search.py index 4bc3b72..8f0f1de 100644 --- a/analyse/search.py +++ b/analyse/search.py @@ -18,7 +18,7 @@ def run(): with TinyIndex(Document, INDEX_PATH) as tiny_index: completer = Completer() ranker = HeuristicRanker(tiny_index, completer) - items = ranker.search('jasper fforde') + items = ranker.search('jasper fforde', []) print() if items: for i, item in enumerate(islice(items, 10)): diff --git a/devdata/index-v2.tinysearch b/devdata/index-v2.tinysearch index d7a3690..e4316e9 100644 Binary files a/devdata/index-v2.tinysearch and b/devdata/index-v2.tinysearch differ diff --git a/front-end/assets/css/global.css b/front-end/assets/css/global.css index 5e3a4c5..c85945a 100644 --- a/front-end/assets/css/global.css +++ b/front-end/assets/css/global.css @@ -148,6 +148,13 @@ body { font-size: .9rem; } +.via { + font-size: .9rem; + font-weight: var(--default-font-weight); + /* italics */ + font-style: italic; +} + .result .title, .result .title>* { color: var(--primary-color); font-size: 1.1rem; diff --git a/mwmbl/format.py b/mwmbl/format.py index ad8f2ef..56df6d3 100644 --- a/mwmbl/format.py +++ b/mwmbl/format.py @@ -3,7 +3,7 @@ import re from mwmbl.tokenizer import tokenize, clean_unicode -def format_result_with_pattern(pattern, result): +def format_result_with_pattern(pattern, result, source): formatted_result = {} for content_type, content_raw in [('title', result.title), ('extract', result.extract)]: content = clean_unicode(content_raw) @@ -17,6 +17,7 @@ def format_result_with_pattern(pattern, result): content_result.append({'value': content[start:end], 'is_bold': is_bold}) formatted_result[content_type] = content_result formatted_result['url'] = result.url + formatted_result['source'] = source return formatted_result @@ -34,8 +35,8 @@ def get_query_regex(terms, is_complete, is_url): return pattern -def format_result(result, query): +def format_result(result, query, source): tokens = tokenize(query) pattern = get_query_regex(tokens, True, False) - return format_result_with_pattern(pattern, result) + return format_result_with_pattern(pattern, result, source) diff --git a/mwmbl/templates/result.html b/mwmbl/templates/result.html index 6b5a634..339d293 100644 --- a/mwmbl/templates/result.html +++ b/mwmbl/templates/result.html @@ -3,7 +3,7 @@
{{result.url}}
+{{result.url}} — found via {{result.source|title}}
{{result.title|strengthen}}
{{result.extract|strengthen}}
diff --git a/mwmbl/tinysearchengine/rank.py b/mwmbl/tinysearchengine/rank.py index 15ea49b..2e72477 100644 --- a/mwmbl/tinysearchengine/rank.py +++ b/mwmbl/tinysearchengine/rank.py @@ -3,6 +3,7 @@ import re from abc import abstractmethod from logging import getLogger from operator import itemgetter +from typing import Optional from urllib.parse import urlparse from mwmbl.format import format_result_with_pattern, get_query_regex @@ -118,15 +119,20 @@ class Ranker: def order_results(self, terms, pages, is_complete): pass - def search(self, s: str, additional_results: list[Document]): - results, terms, _ = self.get_results(s) + def search(self, s: str, additional_results: list[Document], source: Optional[str] = None): + mwmbl_results, terms, _ = self.get_results(s) is_complete = s.endswith(' ') pattern = get_query_regex(terms, is_complete, False) formatted_results = [] - for result in additional_results + results: - formatted_result = format_result_with_pattern(pattern, result) - formatted_results.append(formatted_result) + seen_urls = set() + for results, source in [(additional_results, source), (mwmbl_results, 'mwmbl')]: + for result in results: + if result.url in seen_urls: + continue + formatted_result = format_result_with_pattern(pattern, result, source) + formatted_results.append(formatted_result) + seen_urls.add(result.url) logger.info("Return results: %d", len(formatted_results)) return formatted_results diff --git a/mwmbl/views.py b/mwmbl/views.py index 6878ba4..84b9c9f 100644 --- a/mwmbl/views.py +++ b/mwmbl/views.py @@ -90,7 +90,7 @@ def _get_results_and_activity(request): if query: # There may be extra results in the request that we need to add in # format is ?enhanced=google&title=title1&url=url1&extract=extract1&title=title2&url=url2&extract=extract2 - # enhanced = request.GET.get("enhanced") + source = request.GET.get("enhanced", "unknown") titles = request.GET.getlist(f"title") urls = request.GET.getlist(f"url") extracts = request.GET.getlist(f"extract") @@ -100,7 +100,7 @@ def _get_results_and_activity(request): for title, url, extract in zip(titles, urls, extracts) ] - results = ranker.search(query, additional_results=additional_results) + results = ranker.search(query, additional_results=additional_results, source=source) activity = None else: results = None @@ -137,5 +137,5 @@ def fetch_url(request): result = Document(title=title, url=url, extract=extract, score=0.0) return render(request, "result.html", { - "result": format_result(result, query), + "result": format_result(result, query, "user"), })