Merge pull request #123 from mwmbl/use-htmx-for-search-results

Use htmx for search results
This commit is contained in:
Daoud Clarke 2023-11-05 13:24:34 +00:00 committed by GitHub
commit 19a8c8ac79
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 173 additions and 213 deletions

View file

@ -1,33 +0,0 @@
import define from '../utils/define.js';
import addResult from "./molecules/add-result.js";
import save from "./organisms/save.js";
const template = () => /*html*/`
<header class="search-menu">
<ul>
<li is="${save}"></li>
</ul>
<div><a href="/accounts/login/">Login</a> <a href="/accounts/signup/">Sign up</a> </div>
<div class="branding">
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
<span class="brand-title">MWMBL</span>
</div>
<mwmbl-search-bar></mwmbl-search-bar>
</header>
<main>
<mwmbl-results></mwmbl-results>
</main>
<div is="${addResult}"></div>
<footer is="mwmbl-footer"></footer>
`;
export default define('app', class extends HTMLElement {
constructor() {
super();
this.__setup();
}
__setup() {
this.innerHTML = template();
}
});

View file

@ -3,7 +3,7 @@ import config from "../../../config.js";
import {globalBus} from "../../utils/events.js";
const FETCH_URL = `${config['publicApiURL']}crawler/fetch?`
const FETCH_URL = '/app/fetch?'
const template = () => /*html*/`
@ -56,7 +56,7 @@ export default define('add-result', class extends HTMLDivElement {
const url = `${FETCH_URL}url=${encodeURIComponent(value)}&query=${encodeURIComponent(query)}`;
const response = await fetch(url);
if (response.status === 200) {
const data = await response.json();
const data = await response.text();
console.log("Data", data);
const addResultEvent = new CustomEvent('curate-add-result', {detail: data});

View file

@ -1,26 +1,7 @@
import define from '../../utils/define.js';
import escapeString from '../../utils/escapeString.js';
import { globalBus } from '../../utils/events.js';
import deleteButton from "./delete-button.js";
import validateButton from "./validate-button.js";
import addButton from "./add-button.js";
const template = ({ data }) => /*html*/`
<div class="result-container">
<div class="curation-buttons">
<button class="curation-button curate-delete" is="${deleteButton}"></button>
<button class="curation-button curate-approve" is="${validateButton}"></button>
<button class="curation-button curate-add" is="${addButton}"></button>
</div>
<div class="result-link">
<a href='${data.url}'>
<p class='link'>${data.url}</p>
<p class='title'>${data.title}</p>
<p class='extract'>${data.extract}</p>
</a>
</div>
</div>
`;
export default define('result', class extends HTMLLIElement {
constructor() {
@ -30,11 +11,6 @@ export default define('result', class extends HTMLLIElement {
}
__setup() {
this.innerHTML = template({ data: {
url: this.dataset.url,
title: this.__handleBold(JSON.parse(this.dataset.title)),
extract: this.__handleBold(JSON.parse(this.dataset.extract))
}});
this.__events();
}

View file

@ -1,21 +1,13 @@
import define from '../../utils/define.js';
import {globalBus} from '../../utils/events.js';
// Components
import result from '../molecules/result.js';
import emptyResult from '../molecules/empty-result.js';
import home from './home.js';
import escapeString from '../../utils/escapeString.js';
const template = () => /*html*/`
<ul class='results'>
<li is='${home}'></li>
</ul>
`;
document.body.addEventListener('htmx:load', function(evt) {
export default define('results', class extends HTMLElement {
});
class ResultsHandler {
constructor() {
super();
this.results = null;
this.oldIndex = null;
this.curating = false;
@ -23,50 +15,12 @@ export default define('results', class extends HTMLElement {
}
__setup() {
this.innerHTML = template();
this.results = this.querySelector('.results');
this.__events();
}
__events() {
globalBus.on('search', (e) => {
this.results.innerHTML = '';
let resultsHTML = '';
if (!e.detail.error) {
// If there is no details the input is empty
if (!e.detail.results) {
resultsHTML = /*html*/`
<li is='${home}'></li>
`;
}
// If the details array has results display them
else if (e.detail.results.length > 0) {
for(const resultData of e.detail.results) {
resultsHTML += /*html*/`
<li
is='${result}'
data-url='${escapeString(resultData.url)}'
data-title='${escapeString(JSON.stringify(resultData.title))}'
data-extract='${escapeString(JSON.stringify(resultData.extract))}'
></li>
`;
}
}
// If the details array is empty there is no result
else {
resultsHTML = /*html*/`
<li is='${emptyResult}'></li>
`;
}
}
else {
// If there is an error display an empty result
resultsHTML = /*html*/`
<li is='${emptyResult}'></li>
`;
}
// Bind HTML to the DOM
this.results.innerHTML = resultsHTML;
document.body.addEventListener('htmx:load', e => {
this.results = document.querySelector('.results');
// Allow the user to re-order search results
$(".results").sortable({
@ -142,15 +96,7 @@ export default define('results', class extends HTMLElement {
console.log("Add result", e);
this.__beginCurating();
const resultData = e.detail;
const resultHTML = /*html*/`
<li
is='${result}'
data-url='${escapeString(resultData.url)}'
data-title='${escapeString(JSON.stringify(resultData.title))}'
data-extract='${escapeString(JSON.stringify(resultData.extract))}'
></li>
`;
this.results.insertAdjacentHTML('afterbegin', resultHTML);
this.results.insertAdjacentHTML('afterbegin', resultData);
const newResults = this.__getResults();
@ -236,4 +182,6 @@ export default define('results', class extends HTMLElement {
});
globalBus.dispatch(curationMoveEvent);
}
});
}
const resultsHandler = new ResultsHandler();

View file

@ -48,6 +48,8 @@
<!-- <mwmbl-register></mwmbl-register>-->
<mwmbl-app></mwmbl-app>
<noscript>
<!-- https://stackoverflow.com/a/431554 -->
<style> .jsonly { display: none } </style>
<main class="noscript">
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
<h1>
@ -63,8 +65,46 @@
</p>
</main>
</noscript>
<!-- Javasript entrypoint -->
<script src="https://unpkg.com/htmx.org@1.9.6"></script>
<script src="./index.js" type="module"></script>
<main class="jsonly">
<header class="search-menu">
<ul>
<li is="${save}"></li>
</ul>
<div><a href="/accounts/login/">Login</a> <a href="/accounts/signup/">Sign up</a> </div>
<div class="branding">
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
<span class="brand-title">MWMBL</span>
</div>
<form class="search-bar">
<i class="ph-magnifying-glass-bold"></i>
<input
type='search'
name='query'
class='search-bar-input'
placeholder='Search on mwmbl...'
title='Use "CTRL+K" or "/" to focus.'
autocomplete='off'
hx-get="/app/search/"
hx-trigger="keyup changed delay:100ms"
hx-target=".results"
>
</form>
</header>
<main>
<mwmbl-results>
<ul class='results'>
<li is='${home}'></li>
</ul>
</mwmbl-results>
</main>
<div is="mwmbl-add-result"></div>
<footer is="mwmbl-footer"></footer>
</main>
</body>
</html>

View file

@ -14,7 +14,6 @@
if (!redirected) {
// Load components only after redirects are checked.
import('./components/app.js');
import('./components/login.js');
import('./components/register.js');
import("./components/organisms/search-bar.js");

View file

@ -1,28 +1,10 @@
from multiprocessing import Queue
from pathlib import Path
from django.conf import settings
from ninja import NinjaAPI
from ninja.security import django_auth
import mwmbl.crawler.app as crawler
from mwmbl.indexer.batch_cache import BatchCache
from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME
from mwmbl.platform import curate
from mwmbl.search_setup import queued_batches, index_path, ranker, batch_cache
from mwmbl.tinysearchengine import search
from mwmbl.tinysearchengine.completer import Completer
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
from mwmbl.tinysearchengine.rank import HeuristicRanker
queued_batches = Queue()
completer = Completer()
index_path = Path(settings.DATA_PATH) / INDEX_NAME
tiny_index = TinyIndex(item_factory=Document, index_path=index_path)
tiny_index.__enter__()
ranker = HeuristicRanker(tiny_index, completer)
batch_cache = BatchCache(Path(settings.DATA_PATH) / BATCH_DIR_NAME)
def create_api(version):

View file

@ -13,7 +13,7 @@ class MwmblConfig(AppConfig):
def ready(self):
# Imports here to avoid AppRegistryNotReady exception
from mwmbl.api import queued_batches
from mwmbl.search_setup import queued_batches
from mwmbl import background
from mwmbl.indexer.paths import INDEX_NAME
from mwmbl.indexer.update_urls import update_urls_continuously

View file

@ -8,12 +8,8 @@ from typing import Union
from uuid import uuid4
import boto3
import justext
import requests
from fastapi import HTTPException
from justext.core import html_to_dom, ParagraphMaker, classify_paragraphs, revise_paragraph_classification, \
LENGTH_LOW_DEFAULT, STOPWORDS_LOW_DEFAULT, MAX_LINK_DENSITY_DEFAULT, NO_HEADINGS_DEFAULT, LENGTH_HIGH_DEFAULT, \
STOPWORDS_HIGH_DEFAULT, MAX_HEADING_DISTANCE_DEFAULT, DEFAULT_ENCODING, DEFAULT_ENC_ERRORS, preprocessor
from ninja import Router
from redis import Redis
@ -21,7 +17,6 @@ from mwmbl.crawler.batch import Batch, NewBatchRequest, HashedBatch
from mwmbl.crawler.stats import MwmblStats, StatsManager
from mwmbl.crawler.urls import URLDatabase, FoundURL, URLStatus
from mwmbl.database import Database
from mwmbl.format import format_result
from mwmbl.indexer.batch_cache import BatchCache
from mwmbl.indexer.indexdb import IndexDatabase, BatchInfo, BatchStatus
from mwmbl.settings import (
@ -35,9 +30,7 @@ from mwmbl.settings import (
PUBLIC_URL_PREFIX,
PUBLIC_USER_ID_LENGTH,
FILE_NAME_SUFFIX,
DATE_REGEX, NUM_EXTRACT_CHARS)
from mwmbl.tinysearchengine.indexer import Document
DATE_REGEX)
stats_manager = StatsManager(Redis.from_url(os.environ.get("REDIS_URL")))
@ -57,32 +50,6 @@ def upload(data: bytes, name: str):
last_batch = None
def justext_with_dom(html_text, stoplist, length_low=LENGTH_LOW_DEFAULT,
length_high=LENGTH_HIGH_DEFAULT, stopwords_low=STOPWORDS_LOW_DEFAULT,
stopwords_high=STOPWORDS_HIGH_DEFAULT, max_link_density=MAX_LINK_DENSITY_DEFAULT,
max_heading_distance=MAX_HEADING_DISTANCE_DEFAULT, no_headings=NO_HEADINGS_DEFAULT,
encoding=None, default_encoding=DEFAULT_ENCODING,
enc_errors=DEFAULT_ENC_ERRORS):
"""
Converts an HTML page into a list of classified paragraphs. Each paragraph
is represented as instance of class ˙˙justext.paragraph.Paragraph˙˙.
"""
dom = html_to_dom(html_text, default_encoding, encoding, enc_errors)
titles = dom.xpath("//title")
title = titles[0].text if len(titles) > 0 else None
dom = preprocessor(dom)
paragraphs = ParagraphMaker.make_paragraphs(dom)
classify_paragraphs(paragraphs, stoplist, length_low, length_high,
stopwords_low, stopwords_high, max_link_density, no_headings)
revise_paragraph_classification(paragraphs, max_heading_distance)
return paragraphs, title
def create_router(batch_cache: BatchCache, queued_batches: Queue) -> Router:
router = Router(tags=["crawler"])
@ -90,19 +57,6 @@ def create_router(batch_cache: BatchCache, queued_batches: Queue) -> Router:
# #
# # url_db.create_tables()
@router.get('/fetch')
def fetch_url(request, url: str, query: str):
response = requests.get(url)
paragraphs, title = justext_with_dom(response.content, justext.get_stoplist("English"))
good_paragraphs = [p for p in paragraphs if p.class_type == 'good']
extract = ' '.join([p.text for p in good_paragraphs])
if len(extract) > NUM_EXTRACT_CHARS:
extract = extract[:NUM_EXTRACT_CHARS - 1] + ''
result = Document(title=title, url=url, extract=extract, score=0.0)
return format_result(result, query)
@router.post('/batches/')
def post_batch(request, batch: Batch):
if len(batch.items) > MAX_BATCH_SIZE:

19
mwmbl/search_setup.py Normal file
View file

@ -0,0 +1,19 @@
from multiprocessing import Queue
from pathlib import Path
from django.conf import settings
from mwmbl.indexer.batch_cache import BatchCache
from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME
from mwmbl.tinysearchengine.completer import Completer
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
from mwmbl.tinysearchengine.rank import HeuristicRanker
queued_batches = Queue()
completer = Completer()
index_path = Path(settings.DATA_PATH) / INDEX_NAME
tiny_index = TinyIndex(item_factory=Document, index_path=index_path)
tiny_index.__enter__()
ranker = HeuristicRanker(tiny_index, completer)
batch_cache = BatchCache(Path(settings.DATA_PATH) / BATCH_DIR_NAME)

View file

@ -0,0 +1,19 @@
{% load result_filters %}
{% for result in results %}
<li class="result" is="mwmbl-result">
<div class="result-container">
<div class="curation-buttons">
<button class="curation-button curate-delete" is="mwmbl-delete-button"></button>
<button class="curation-button curate-approve" is="mwmbl-validate-button"></button>
<button class="curation-button curate-add" is="mwmbl-add-button"></button>
</div>
<div class="result-link">
<a href="{{result.url}}">
<p class='link'>{{result.url}}</p>
<p class='title'>{{result.title|strengthen}}</p>
<p class='extract'>{{result.extract|strengthen}}</p>
</a>
</div>
</div>
</li>
{% endfor %}

View file

View file

@ -0,0 +1,18 @@
from django.template import Library
from django.utils.html import conditional_escape
from django.utils.safestring import mark_safe
register = Library()
@register.filter(needs_autoescape=True)
def strengthen(spans, autoescape=True):
escape = conditional_escape if autoescape else lambda x: x
strengthened = []
for span in spans:
escaped_value = escape(span["value"])
if span["is_bold"]:
strengthened.append(f"<strong>{escaped_value}</strong>")
else:
strengthened.append(escaped_value)
return mark_safe("".join(strengthened))

View file

@ -15,12 +15,10 @@ Including another URLconf
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.contrib.auth import login, logout
from django.template.defaulttags import url
from django.urls import path, include
from mwmbl.api import api_original as api, api_v1
from mwmbl.views import signup, profile
from mwmbl.views import profile, search_results, fetch_url
urlpatterns = [
path('admin/', admin.site.urls),
@ -28,9 +26,7 @@ urlpatterns = [
path('api/v1/', api_v1.urls),
path('accounts/', include('allauth.urls')),
# path("accounts/", include("django.contrib.auth.urls")),
# path('accounts/new/', signup, name='signup'),
path('accounts/profile/', profile, name='profile'),
# path('login/', login, {'template_name': 'login.html'}, name='login'),
# path('logout/', logout, {'next_page': 'login'}, name='logout'),
path('app/search/', search_results, name="search_results"),
path('app/fetch/', fetch_url, name="fetch_url")
]

View file

@ -1,24 +1,66 @@
from django.contrib.auth import authenticate, login
import justext
import requests
from django.contrib.auth.decorators import login_required
from django.contrib.auth.forms import UserCreationForm
from django.shortcuts import redirect, render
from django.shortcuts import render
from mwmbl.format import format_result
from mwmbl.search_setup import ranker
from justext.core import html_to_dom, ParagraphMaker, classify_paragraphs, revise_paragraph_classification, \
LENGTH_LOW_DEFAULT, STOPWORDS_LOW_DEFAULT, MAX_LINK_DENSITY_DEFAULT, NO_HEADINGS_DEFAULT, LENGTH_HIGH_DEFAULT, \
STOPWORDS_HIGH_DEFAULT, MAX_HEADING_DISTANCE_DEFAULT, DEFAULT_ENCODING, DEFAULT_ENC_ERRORS, preprocessor
from mwmbl.settings import NUM_EXTRACT_CHARS
from mwmbl.tinysearchengine.indexer import Document
def signup(request):
if request.method == 'POST':
form = UserCreationForm(request.POST)
if form.is_valid():
form.save()
username = form.cleaned_data.get('username')
raw_password = form.cleaned_data.get('password1')
user = authenticate(username=username, password=raw_password)
login(request, user)
return redirect('/')
else:
form = UserCreationForm()
return render(request, 'signup.html', {'form': form})
def justext_with_dom(html_text, stoplist, length_low=LENGTH_LOW_DEFAULT,
length_high=LENGTH_HIGH_DEFAULT, stopwords_low=STOPWORDS_LOW_DEFAULT,
stopwords_high=STOPWORDS_HIGH_DEFAULT, max_link_density=MAX_LINK_DENSITY_DEFAULT,
max_heading_distance=MAX_HEADING_DISTANCE_DEFAULT, no_headings=NO_HEADINGS_DEFAULT,
encoding=None, default_encoding=DEFAULT_ENCODING,
enc_errors=DEFAULT_ENC_ERRORS):
"""
Converts an HTML page into a list of classified paragraphs. Each paragraph
is represented as instance of class ˙˙justext.paragraph.Paragraph˙˙.
"""
dom = html_to_dom(html_text, default_encoding, encoding, enc_errors)
titles = dom.xpath("//title")
title = titles[0].text if len(titles) > 0 else None
dom = preprocessor(dom)
paragraphs = ParagraphMaker.make_paragraphs(dom)
classify_paragraphs(paragraphs, stoplist, length_low, length_high,
stopwords_low, stopwords_high, max_link_density, no_headings)
revise_paragraph_classification(paragraphs, max_heading_distance)
return paragraphs, title
@login_required
def profile(request):
return render(request, 'profile.html')
def search_results(request):
query = request.GET["query"]
results = ranker.search(query)
return render(request, "results.html", {"results": results})
def fetch_url(request):
url = request.GET["url"]
query = request.GET["query"]
response = requests.get(url)
paragraphs, title = justext_with_dom(response.content, justext.get_stoplist("English"))
good_paragraphs = [p for p in paragraphs if p.class_type == 'good']
extract = ' '.join([p.text for p in good_paragraphs])
if len(extract) > NUM_EXTRACT_CHARS:
extract = extract[:NUM_EXTRACT_CHARS - 1] + ''
result = Document(title=title, url=url, extract=extract, score=0.0)
return render(request, "results.html", {"results": [format_result(result, query)]})