Merge pull request #123 from mwmbl/use-htmx-for-search-results
Use htmx for search results
This commit is contained in:
commit
19a8c8ac79
15 changed files with 173 additions and 213 deletions
|
@ -1,33 +0,0 @@
|
||||||
import define from '../utils/define.js';
|
|
||||||
import addResult from "./molecules/add-result.js";
|
|
||||||
import save from "./organisms/save.js";
|
|
||||||
|
|
||||||
const template = () => /*html*/`
|
|
||||||
<header class="search-menu">
|
|
||||||
<ul>
|
|
||||||
<li is="${save}"></li>
|
|
||||||
</ul>
|
|
||||||
<div><a href="/accounts/login/">Login</a> <a href="/accounts/signup/">Sign up</a> </div>
|
|
||||||
<div class="branding">
|
|
||||||
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
|
|
||||||
<span class="brand-title">MWMBL</span>
|
|
||||||
</div>
|
|
||||||
<mwmbl-search-bar></mwmbl-search-bar>
|
|
||||||
</header>
|
|
||||||
<main>
|
|
||||||
<mwmbl-results></mwmbl-results>
|
|
||||||
</main>
|
|
||||||
<div is="${addResult}"></div>
|
|
||||||
<footer is="mwmbl-footer"></footer>
|
|
||||||
`;
|
|
||||||
|
|
||||||
export default define('app', class extends HTMLElement {
|
|
||||||
constructor() {
|
|
||||||
super();
|
|
||||||
this.__setup();
|
|
||||||
}
|
|
||||||
|
|
||||||
__setup() {
|
|
||||||
this.innerHTML = template();
|
|
||||||
}
|
|
||||||
});
|
|
|
@ -3,7 +3,7 @@ import config from "../../../config.js";
|
||||||
import {globalBus} from "../../utils/events.js";
|
import {globalBus} from "../../utils/events.js";
|
||||||
|
|
||||||
|
|
||||||
const FETCH_URL = `${config['publicApiURL']}crawler/fetch?`
|
const FETCH_URL = '/app/fetch?'
|
||||||
|
|
||||||
|
|
||||||
const template = () => /*html*/`
|
const template = () => /*html*/`
|
||||||
|
@ -56,7 +56,7 @@ export default define('add-result', class extends HTMLDivElement {
|
||||||
const url = `${FETCH_URL}url=${encodeURIComponent(value)}&query=${encodeURIComponent(query)}`;
|
const url = `${FETCH_URL}url=${encodeURIComponent(value)}&query=${encodeURIComponent(query)}`;
|
||||||
const response = await fetch(url);
|
const response = await fetch(url);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const data = await response.json();
|
const data = await response.text();
|
||||||
console.log("Data", data);
|
console.log("Data", data);
|
||||||
|
|
||||||
const addResultEvent = new CustomEvent('curate-add-result', {detail: data});
|
const addResultEvent = new CustomEvent('curate-add-result', {detail: data});
|
||||||
|
|
|
@ -1,26 +1,7 @@
|
||||||
import define from '../../utils/define.js';
|
import define from '../../utils/define.js';
|
||||||
import escapeString from '../../utils/escapeString.js';
|
import escapeString from '../../utils/escapeString.js';
|
||||||
import { globalBus } from '../../utils/events.js';
|
import { globalBus } from '../../utils/events.js';
|
||||||
import deleteButton from "./delete-button.js";
|
|
||||||
import validateButton from "./validate-button.js";
|
|
||||||
import addButton from "./add-button.js";
|
|
||||||
|
|
||||||
const template = ({ data }) => /*html*/`
|
|
||||||
<div class="result-container">
|
|
||||||
<div class="curation-buttons">
|
|
||||||
<button class="curation-button curate-delete" is="${deleteButton}">✕</button>
|
|
||||||
<button class="curation-button curate-approve" is="${validateButton}">✓</button>
|
|
||||||
<button class="curation-button curate-add" is="${addButton}">+</button>
|
|
||||||
</div>
|
|
||||||
<div class="result-link">
|
|
||||||
<a href='${data.url}'>
|
|
||||||
<p class='link'>${data.url}</p>
|
|
||||||
<p class='title'>${data.title}</p>
|
|
||||||
<p class='extract'>${data.extract}</p>
|
|
||||||
</a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
`;
|
|
||||||
|
|
||||||
export default define('result', class extends HTMLLIElement {
|
export default define('result', class extends HTMLLIElement {
|
||||||
constructor() {
|
constructor() {
|
||||||
|
@ -30,11 +11,6 @@ export default define('result', class extends HTMLLIElement {
|
||||||
}
|
}
|
||||||
|
|
||||||
__setup() {
|
__setup() {
|
||||||
this.innerHTML = template({ data: {
|
|
||||||
url: this.dataset.url,
|
|
||||||
title: this.__handleBold(JSON.parse(this.dataset.title)),
|
|
||||||
extract: this.__handleBold(JSON.parse(this.dataset.extract))
|
|
||||||
}});
|
|
||||||
this.__events();
|
this.__events();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,21 +1,13 @@
|
||||||
import define from '../../utils/define.js';
|
|
||||||
import {globalBus} from '../../utils/events.js';
|
import {globalBus} from '../../utils/events.js';
|
||||||
|
|
||||||
// Components
|
|
||||||
import result from '../molecules/result.js';
|
|
||||||
import emptyResult from '../molecules/empty-result.js';
|
|
||||||
import home from './home.js';
|
|
||||||
import escapeString from '../../utils/escapeString.js';
|
|
||||||
|
|
||||||
const template = () => /*html*/`
|
document.body.addEventListener('htmx:load', function(evt) {
|
||||||
<ul class='results'>
|
|
||||||
<li is='${home}'></li>
|
|
||||||
</ul>
|
|
||||||
`;
|
|
||||||
|
|
||||||
export default define('results', class extends HTMLElement {
|
});
|
||||||
|
|
||||||
|
|
||||||
|
class ResultsHandler {
|
||||||
constructor() {
|
constructor() {
|
||||||
super();
|
|
||||||
this.results = null;
|
this.results = null;
|
||||||
this.oldIndex = null;
|
this.oldIndex = null;
|
||||||
this.curating = false;
|
this.curating = false;
|
||||||
|
@ -23,50 +15,12 @@ export default define('results', class extends HTMLElement {
|
||||||
}
|
}
|
||||||
|
|
||||||
__setup() {
|
__setup() {
|
||||||
this.innerHTML = template();
|
|
||||||
this.results = this.querySelector('.results');
|
|
||||||
this.__events();
|
this.__events();
|
||||||
}
|
}
|
||||||
|
|
||||||
__events() {
|
__events() {
|
||||||
globalBus.on('search', (e) => {
|
document.body.addEventListener('htmx:load', e => {
|
||||||
this.results.innerHTML = '';
|
this.results = document.querySelector('.results');
|
||||||
let resultsHTML = '';
|
|
||||||
if (!e.detail.error) {
|
|
||||||
// If there is no details the input is empty
|
|
||||||
if (!e.detail.results) {
|
|
||||||
resultsHTML = /*html*/`
|
|
||||||
<li is='${home}'></li>
|
|
||||||
`;
|
|
||||||
}
|
|
||||||
// If the details array has results display them
|
|
||||||
else if (e.detail.results.length > 0) {
|
|
||||||
for(const resultData of e.detail.results) {
|
|
||||||
resultsHTML += /*html*/`
|
|
||||||
<li
|
|
||||||
is='${result}'
|
|
||||||
data-url='${escapeString(resultData.url)}'
|
|
||||||
data-title='${escapeString(JSON.stringify(resultData.title))}'
|
|
||||||
data-extract='${escapeString(JSON.stringify(resultData.extract))}'
|
|
||||||
></li>
|
|
||||||
`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If the details array is empty there is no result
|
|
||||||
else {
|
|
||||||
resultsHTML = /*html*/`
|
|
||||||
<li is='${emptyResult}'></li>
|
|
||||||
`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// If there is an error display an empty result
|
|
||||||
resultsHTML = /*html*/`
|
|
||||||
<li is='${emptyResult}'></li>
|
|
||||||
`;
|
|
||||||
}
|
|
||||||
// Bind HTML to the DOM
|
|
||||||
this.results.innerHTML = resultsHTML;
|
|
||||||
|
|
||||||
// Allow the user to re-order search results
|
// Allow the user to re-order search results
|
||||||
$(".results").sortable({
|
$(".results").sortable({
|
||||||
|
@ -142,15 +96,7 @@ export default define('results', class extends HTMLElement {
|
||||||
console.log("Add result", e);
|
console.log("Add result", e);
|
||||||
this.__beginCurating();
|
this.__beginCurating();
|
||||||
const resultData = e.detail;
|
const resultData = e.detail;
|
||||||
const resultHTML = /*html*/`
|
this.results.insertAdjacentHTML('afterbegin', resultData);
|
||||||
<li
|
|
||||||
is='${result}'
|
|
||||||
data-url='${escapeString(resultData.url)}'
|
|
||||||
data-title='${escapeString(JSON.stringify(resultData.title))}'
|
|
||||||
data-extract='${escapeString(JSON.stringify(resultData.extract))}'
|
|
||||||
></li>
|
|
||||||
`;
|
|
||||||
this.results.insertAdjacentHTML('afterbegin', resultHTML);
|
|
||||||
|
|
||||||
const newResults = this.__getResults();
|
const newResults = this.__getResults();
|
||||||
|
|
||||||
|
@ -236,4 +182,6 @@ export default define('results', class extends HTMLElement {
|
||||||
});
|
});
|
||||||
globalBus.dispatch(curationMoveEvent);
|
globalBus.dispatch(curationMoveEvent);
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
|
|
||||||
|
const resultsHandler = new ResultsHandler();
|
||||||
|
|
|
@ -48,6 +48,8 @@
|
||||||
<!-- <mwmbl-register></mwmbl-register>-->
|
<!-- <mwmbl-register></mwmbl-register>-->
|
||||||
<mwmbl-app></mwmbl-app>
|
<mwmbl-app></mwmbl-app>
|
||||||
<noscript>
|
<noscript>
|
||||||
|
<!-- https://stackoverflow.com/a/431554 -->
|
||||||
|
<style> .jsonly { display: none } </style>
|
||||||
<main class="noscript">
|
<main class="noscript">
|
||||||
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
|
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
|
||||||
<h1>
|
<h1>
|
||||||
|
@ -63,8 +65,46 @@
|
||||||
</p>
|
</p>
|
||||||
</main>
|
</main>
|
||||||
</noscript>
|
</noscript>
|
||||||
|
|
||||||
<!-- Javasript entrypoint -->
|
<!-- Javasript entrypoint -->
|
||||||
|
<script src="https://unpkg.com/htmx.org@1.9.6"></script>
|
||||||
<script src="./index.js" type="module"></script>
|
<script src="./index.js" type="module"></script>
|
||||||
|
|
||||||
|
<main class="jsonly">
|
||||||
|
<header class="search-menu">
|
||||||
|
<ul>
|
||||||
|
<li is="${save}"></li>
|
||||||
|
</ul>
|
||||||
|
<div><a href="/accounts/login/">Login</a> <a href="/accounts/signup/">Sign up</a> </div>
|
||||||
|
<div class="branding">
|
||||||
|
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
|
||||||
|
<span class="brand-title">MWMBL</span>
|
||||||
|
</div>
|
||||||
|
<form class="search-bar">
|
||||||
|
<i class="ph-magnifying-glass-bold"></i>
|
||||||
|
<input
|
||||||
|
type='search'
|
||||||
|
name='query'
|
||||||
|
class='search-bar-input'
|
||||||
|
placeholder='Search on mwmbl...'
|
||||||
|
title='Use "CTRL+K" or "/" to focus.'
|
||||||
|
autocomplete='off'
|
||||||
|
hx-get="/app/search/"
|
||||||
|
hx-trigger="keyup changed delay:100ms"
|
||||||
|
hx-target=".results"
|
||||||
|
>
|
||||||
|
</form>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<mwmbl-results>
|
||||||
|
<ul class='results'>
|
||||||
|
<li is='${home}'></li>
|
||||||
|
</ul>
|
||||||
|
</mwmbl-results>
|
||||||
|
</main>
|
||||||
|
<div is="mwmbl-add-result"></div>
|
||||||
|
<footer is="mwmbl-footer"></footer>
|
||||||
|
</main>
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
|
@ -14,7 +14,6 @@
|
||||||
|
|
||||||
if (!redirected) {
|
if (!redirected) {
|
||||||
// Load components only after redirects are checked.
|
// Load components only after redirects are checked.
|
||||||
import('./components/app.js');
|
|
||||||
import('./components/login.js');
|
import('./components/login.js');
|
||||||
import('./components/register.js');
|
import('./components/register.js');
|
||||||
import("./components/organisms/search-bar.js");
|
import("./components/organisms/search-bar.js");
|
||||||
|
|
20
mwmbl/api.py
20
mwmbl/api.py
|
@ -1,28 +1,10 @@
|
||||||
from multiprocessing import Queue
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from django.conf import settings
|
|
||||||
from ninja import NinjaAPI
|
from ninja import NinjaAPI
|
||||||
from ninja.security import django_auth
|
from ninja.security import django_auth
|
||||||
|
|
||||||
import mwmbl.crawler.app as crawler
|
import mwmbl.crawler.app as crawler
|
||||||
from mwmbl.indexer.batch_cache import BatchCache
|
|
||||||
from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME
|
|
||||||
from mwmbl.platform import curate
|
from mwmbl.platform import curate
|
||||||
|
from mwmbl.search_setup import queued_batches, index_path, ranker, batch_cache
|
||||||
from mwmbl.tinysearchengine import search
|
from mwmbl.tinysearchengine import search
|
||||||
from mwmbl.tinysearchengine.completer import Completer
|
|
||||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
|
||||||
from mwmbl.tinysearchengine.rank import HeuristicRanker
|
|
||||||
|
|
||||||
|
|
||||||
queued_batches = Queue()
|
|
||||||
completer = Completer()
|
|
||||||
|
|
||||||
index_path = Path(settings.DATA_PATH) / INDEX_NAME
|
|
||||||
tiny_index = TinyIndex(item_factory=Document, index_path=index_path)
|
|
||||||
tiny_index.__enter__()
|
|
||||||
ranker = HeuristicRanker(tiny_index, completer)
|
|
||||||
batch_cache = BatchCache(Path(settings.DATA_PATH) / BATCH_DIR_NAME)
|
|
||||||
|
|
||||||
|
|
||||||
def create_api(version):
|
def create_api(version):
|
||||||
|
|
|
@ -13,7 +13,7 @@ class MwmblConfig(AppConfig):
|
||||||
|
|
||||||
def ready(self):
|
def ready(self):
|
||||||
# Imports here to avoid AppRegistryNotReady exception
|
# Imports here to avoid AppRegistryNotReady exception
|
||||||
from mwmbl.api import queued_batches
|
from mwmbl.search_setup import queued_batches
|
||||||
from mwmbl import background
|
from mwmbl import background
|
||||||
from mwmbl.indexer.paths import INDEX_NAME
|
from mwmbl.indexer.paths import INDEX_NAME
|
||||||
from mwmbl.indexer.update_urls import update_urls_continuously
|
from mwmbl.indexer.update_urls import update_urls_continuously
|
||||||
|
|
|
@ -8,12 +8,8 @@ from typing import Union
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
import justext
|
|
||||||
import requests
|
import requests
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from justext.core import html_to_dom, ParagraphMaker, classify_paragraphs, revise_paragraph_classification, \
|
|
||||||
LENGTH_LOW_DEFAULT, STOPWORDS_LOW_DEFAULT, MAX_LINK_DENSITY_DEFAULT, NO_HEADINGS_DEFAULT, LENGTH_HIGH_DEFAULT, \
|
|
||||||
STOPWORDS_HIGH_DEFAULT, MAX_HEADING_DISTANCE_DEFAULT, DEFAULT_ENCODING, DEFAULT_ENC_ERRORS, preprocessor
|
|
||||||
from ninja import Router
|
from ninja import Router
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
|
|
||||||
|
@ -21,7 +17,6 @@ from mwmbl.crawler.batch import Batch, NewBatchRequest, HashedBatch
|
||||||
from mwmbl.crawler.stats import MwmblStats, StatsManager
|
from mwmbl.crawler.stats import MwmblStats, StatsManager
|
||||||
from mwmbl.crawler.urls import URLDatabase, FoundURL, URLStatus
|
from mwmbl.crawler.urls import URLDatabase, FoundURL, URLStatus
|
||||||
from mwmbl.database import Database
|
from mwmbl.database import Database
|
||||||
from mwmbl.format import format_result
|
|
||||||
from mwmbl.indexer.batch_cache import BatchCache
|
from mwmbl.indexer.batch_cache import BatchCache
|
||||||
from mwmbl.indexer.indexdb import IndexDatabase, BatchInfo, BatchStatus
|
from mwmbl.indexer.indexdb import IndexDatabase, BatchInfo, BatchStatus
|
||||||
from mwmbl.settings import (
|
from mwmbl.settings import (
|
||||||
|
@ -35,9 +30,7 @@ from mwmbl.settings import (
|
||||||
PUBLIC_URL_PREFIX,
|
PUBLIC_URL_PREFIX,
|
||||||
PUBLIC_USER_ID_LENGTH,
|
PUBLIC_USER_ID_LENGTH,
|
||||||
FILE_NAME_SUFFIX,
|
FILE_NAME_SUFFIX,
|
||||||
DATE_REGEX, NUM_EXTRACT_CHARS)
|
DATE_REGEX)
|
||||||
from mwmbl.tinysearchengine.indexer import Document
|
|
||||||
|
|
||||||
|
|
||||||
stats_manager = StatsManager(Redis.from_url(os.environ.get("REDIS_URL")))
|
stats_manager = StatsManager(Redis.from_url(os.environ.get("REDIS_URL")))
|
||||||
|
|
||||||
|
@ -57,32 +50,6 @@ def upload(data: bytes, name: str):
|
||||||
last_batch = None
|
last_batch = None
|
||||||
|
|
||||||
|
|
||||||
def justext_with_dom(html_text, stoplist, length_low=LENGTH_LOW_DEFAULT,
|
|
||||||
length_high=LENGTH_HIGH_DEFAULT, stopwords_low=STOPWORDS_LOW_DEFAULT,
|
|
||||||
stopwords_high=STOPWORDS_HIGH_DEFAULT, max_link_density=MAX_LINK_DENSITY_DEFAULT,
|
|
||||||
max_heading_distance=MAX_HEADING_DISTANCE_DEFAULT, no_headings=NO_HEADINGS_DEFAULT,
|
|
||||||
encoding=None, default_encoding=DEFAULT_ENCODING,
|
|
||||||
enc_errors=DEFAULT_ENC_ERRORS):
|
|
||||||
"""
|
|
||||||
Converts an HTML page into a list of classified paragraphs. Each paragraph
|
|
||||||
is represented as instance of class ˙˙justext.paragraph.Paragraph˙˙.
|
|
||||||
"""
|
|
||||||
dom = html_to_dom(html_text, default_encoding, encoding, enc_errors)
|
|
||||||
|
|
||||||
titles = dom.xpath("//title")
|
|
||||||
title = titles[0].text if len(titles) > 0 else None
|
|
||||||
|
|
||||||
dom = preprocessor(dom)
|
|
||||||
|
|
||||||
paragraphs = ParagraphMaker.make_paragraphs(dom)
|
|
||||||
|
|
||||||
classify_paragraphs(paragraphs, stoplist, length_low, length_high,
|
|
||||||
stopwords_low, stopwords_high, max_link_density, no_headings)
|
|
||||||
revise_paragraph_classification(paragraphs, max_heading_distance)
|
|
||||||
|
|
||||||
return paragraphs, title
|
|
||||||
|
|
||||||
|
|
||||||
def create_router(batch_cache: BatchCache, queued_batches: Queue) -> Router:
|
def create_router(batch_cache: BatchCache, queued_batches: Queue) -> Router:
|
||||||
router = Router(tags=["crawler"])
|
router = Router(tags=["crawler"])
|
||||||
|
|
||||||
|
@ -90,19 +57,6 @@ def create_router(batch_cache: BatchCache, queued_batches: Queue) -> Router:
|
||||||
# #
|
# #
|
||||||
# # url_db.create_tables()
|
# # url_db.create_tables()
|
||||||
|
|
||||||
@router.get('/fetch')
|
|
||||||
def fetch_url(request, url: str, query: str):
|
|
||||||
response = requests.get(url)
|
|
||||||
paragraphs, title = justext_with_dom(response.content, justext.get_stoplist("English"))
|
|
||||||
good_paragraphs = [p for p in paragraphs if p.class_type == 'good']
|
|
||||||
|
|
||||||
extract = ' '.join([p.text for p in good_paragraphs])
|
|
||||||
if len(extract) > NUM_EXTRACT_CHARS:
|
|
||||||
extract = extract[:NUM_EXTRACT_CHARS - 1] + '…'
|
|
||||||
|
|
||||||
result = Document(title=title, url=url, extract=extract, score=0.0)
|
|
||||||
return format_result(result, query)
|
|
||||||
|
|
||||||
@router.post('/batches/')
|
@router.post('/batches/')
|
||||||
def post_batch(request, batch: Batch):
|
def post_batch(request, batch: Batch):
|
||||||
if len(batch.items) > MAX_BATCH_SIZE:
|
if len(batch.items) > MAX_BATCH_SIZE:
|
||||||
|
|
19
mwmbl/search_setup.py
Normal file
19
mwmbl/search_setup.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
from multiprocessing import Queue
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from mwmbl.indexer.batch_cache import BatchCache
|
||||||
|
from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME
|
||||||
|
from mwmbl.tinysearchengine.completer import Completer
|
||||||
|
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
||||||
|
from mwmbl.tinysearchengine.rank import HeuristicRanker
|
||||||
|
|
||||||
|
queued_batches = Queue()
|
||||||
|
completer = Completer()
|
||||||
|
index_path = Path(settings.DATA_PATH) / INDEX_NAME
|
||||||
|
tiny_index = TinyIndex(item_factory=Document, index_path=index_path)
|
||||||
|
tiny_index.__enter__()
|
||||||
|
|
||||||
|
ranker = HeuristicRanker(tiny_index, completer)
|
||||||
|
batch_cache = BatchCache(Path(settings.DATA_PATH) / BATCH_DIR_NAME)
|
19
mwmbl/templates/results.html
Normal file
19
mwmbl/templates/results.html
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
{% load result_filters %}
|
||||||
|
{% for result in results %}
|
||||||
|
<li class="result" is="mwmbl-result">
|
||||||
|
<div class="result-container">
|
||||||
|
<div class="curation-buttons">
|
||||||
|
<button class="curation-button curate-delete" is="mwmbl-delete-button">✕</button>
|
||||||
|
<button class="curation-button curate-approve" is="mwmbl-validate-button">✓</button>
|
||||||
|
<button class="curation-button curate-add" is="mwmbl-add-button">+</button>
|
||||||
|
</div>
|
||||||
|
<div class="result-link">
|
||||||
|
<a href="{{result.url}}">
|
||||||
|
<p class='link'>{{result.url}}</p>
|
||||||
|
<p class='title'>{{result.title|strengthen}}</p>
|
||||||
|
<p class='extract'>{{result.extract|strengthen}}</p>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
0
mwmbl/templatetags/__init__.py
Normal file
0
mwmbl/templatetags/__init__.py
Normal file
18
mwmbl/templatetags/result_filters.py
Normal file
18
mwmbl/templatetags/result_filters.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from django.template import Library
|
||||||
|
from django.utils.html import conditional_escape
|
||||||
|
from django.utils.safestring import mark_safe
|
||||||
|
|
||||||
|
register = Library()
|
||||||
|
|
||||||
|
|
||||||
|
@register.filter(needs_autoescape=True)
|
||||||
|
def strengthen(spans, autoescape=True):
|
||||||
|
escape = conditional_escape if autoescape else lambda x: x
|
||||||
|
strengthened = []
|
||||||
|
for span in spans:
|
||||||
|
escaped_value = escape(span["value"])
|
||||||
|
if span["is_bold"]:
|
||||||
|
strengthened.append(f"<strong>{escaped_value}</strong>")
|
||||||
|
else:
|
||||||
|
strengthened.append(escaped_value)
|
||||||
|
return mark_safe("".join(strengthened))
|
|
@ -15,12 +15,10 @@ Including another URLconf
|
||||||
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
|
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
|
||||||
"""
|
"""
|
||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
from django.contrib.auth import login, logout
|
|
||||||
from django.template.defaulttags import url
|
|
||||||
from django.urls import path, include
|
from django.urls import path, include
|
||||||
|
|
||||||
from mwmbl.api import api_original as api, api_v1
|
from mwmbl.api import api_original as api, api_v1
|
||||||
from mwmbl.views import signup, profile
|
from mwmbl.views import profile, search_results, fetch_url
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path('admin/', admin.site.urls),
|
path('admin/', admin.site.urls),
|
||||||
|
@ -28,9 +26,7 @@ urlpatterns = [
|
||||||
path('api/v1/', api_v1.urls),
|
path('api/v1/', api_v1.urls),
|
||||||
path('accounts/', include('allauth.urls')),
|
path('accounts/', include('allauth.urls')),
|
||||||
|
|
||||||
# path("accounts/", include("django.contrib.auth.urls")),
|
|
||||||
# path('accounts/new/', signup, name='signup'),
|
|
||||||
path('accounts/profile/', profile, name='profile'),
|
path('accounts/profile/', profile, name='profile'),
|
||||||
# path('login/', login, {'template_name': 'login.html'}, name='login'),
|
path('app/search/', search_results, name="search_results"),
|
||||||
# path('logout/', logout, {'next_page': 'login'}, name='logout'),
|
path('app/fetch/', fetch_url, name="fetch_url")
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,24 +1,66 @@
|
||||||
from django.contrib.auth import authenticate, login
|
import justext
|
||||||
|
import requests
|
||||||
from django.contrib.auth.decorators import login_required
|
from django.contrib.auth.decorators import login_required
|
||||||
from django.contrib.auth.forms import UserCreationForm
|
from django.shortcuts import render
|
||||||
from django.shortcuts import redirect, render
|
|
||||||
|
from mwmbl.format import format_result
|
||||||
|
from mwmbl.search_setup import ranker
|
||||||
|
|
||||||
|
from justext.core import html_to_dom, ParagraphMaker, classify_paragraphs, revise_paragraph_classification, \
|
||||||
|
LENGTH_LOW_DEFAULT, STOPWORDS_LOW_DEFAULT, MAX_LINK_DENSITY_DEFAULT, NO_HEADINGS_DEFAULT, LENGTH_HIGH_DEFAULT, \
|
||||||
|
STOPWORDS_HIGH_DEFAULT, MAX_HEADING_DISTANCE_DEFAULT, DEFAULT_ENCODING, DEFAULT_ENC_ERRORS, preprocessor
|
||||||
|
|
||||||
|
from mwmbl.settings import NUM_EXTRACT_CHARS
|
||||||
|
from mwmbl.tinysearchengine.indexer import Document
|
||||||
|
|
||||||
|
|
||||||
def signup(request):
|
def justext_with_dom(html_text, stoplist, length_low=LENGTH_LOW_DEFAULT,
|
||||||
if request.method == 'POST':
|
length_high=LENGTH_HIGH_DEFAULT, stopwords_low=STOPWORDS_LOW_DEFAULT,
|
||||||
form = UserCreationForm(request.POST)
|
stopwords_high=STOPWORDS_HIGH_DEFAULT, max_link_density=MAX_LINK_DENSITY_DEFAULT,
|
||||||
if form.is_valid():
|
max_heading_distance=MAX_HEADING_DISTANCE_DEFAULT, no_headings=NO_HEADINGS_DEFAULT,
|
||||||
form.save()
|
encoding=None, default_encoding=DEFAULT_ENCODING,
|
||||||
username = form.cleaned_data.get('username')
|
enc_errors=DEFAULT_ENC_ERRORS):
|
||||||
raw_password = form.cleaned_data.get('password1')
|
"""
|
||||||
user = authenticate(username=username, password=raw_password)
|
Converts an HTML page into a list of classified paragraphs. Each paragraph
|
||||||
login(request, user)
|
is represented as instance of class ˙˙justext.paragraph.Paragraph˙˙.
|
||||||
return redirect('/')
|
"""
|
||||||
else:
|
dom = html_to_dom(html_text, default_encoding, encoding, enc_errors)
|
||||||
form = UserCreationForm()
|
|
||||||
return render(request, 'signup.html', {'form': form})
|
titles = dom.xpath("//title")
|
||||||
|
title = titles[0].text if len(titles) > 0 else None
|
||||||
|
|
||||||
|
dom = preprocessor(dom)
|
||||||
|
|
||||||
|
paragraphs = ParagraphMaker.make_paragraphs(dom)
|
||||||
|
|
||||||
|
classify_paragraphs(paragraphs, stoplist, length_low, length_high,
|
||||||
|
stopwords_low, stopwords_high, max_link_density, no_headings)
|
||||||
|
revise_paragraph_classification(paragraphs, max_heading_distance)
|
||||||
|
|
||||||
|
return paragraphs, title
|
||||||
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
def profile(request):
|
def profile(request):
|
||||||
return render(request, 'profile.html')
|
return render(request, 'profile.html')
|
||||||
|
|
||||||
|
|
||||||
|
def search_results(request):
|
||||||
|
query = request.GET["query"]
|
||||||
|
results = ranker.search(query)
|
||||||
|
return render(request, "results.html", {"results": results})
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_url(request):
|
||||||
|
url = request.GET["url"]
|
||||||
|
query = request.GET["query"]
|
||||||
|
response = requests.get(url)
|
||||||
|
paragraphs, title = justext_with_dom(response.content, justext.get_stoplist("English"))
|
||||||
|
good_paragraphs = [p for p in paragraphs if p.class_type == 'good']
|
||||||
|
|
||||||
|
extract = ' '.join([p.text for p in good_paragraphs])
|
||||||
|
if len(extract) > NUM_EXTRACT_CHARS:
|
||||||
|
extract = extract[:NUM_EXTRACT_CHARS - 1] + '…'
|
||||||
|
|
||||||
|
result = Document(title=title, url=url, extract=extract, score=0.0)
|
||||||
|
return render(request, "results.html", {"results": [format_result(result, query)]})
|
||||||
|
|
Loading…
Reference in a new issue