Compare commits
No commits in common. "main" and "add-term-info-to-index" have entirely different histories.
main
...
add-term-i
10 changed files with 59 additions and 36 deletions
|
@ -3,4 +3,3 @@ Contributions are very welcome!
|
|||
Please join the discussion at https://matrix.to/#/#mwmbl:matrix.org and let us know what you're planning to do.
|
||||
|
||||
See https://book.mwmbl.org/page/developers/ for a guide to development.
|
||||
|
||||
|
|
27
mwmbl/api.py
Normal file
27
mwmbl/api.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
from ninja import NinjaAPI
|
||||
from ninja.security import django_auth
|
||||
|
||||
import mwmbl.crawler.app as crawler
|
||||
from mwmbl.platform import curate
|
||||
from mwmbl.search_setup import queued_batches, index_path, ranker, batch_cache
|
||||
from mwmbl.tinysearchengine import search
|
||||
|
||||
|
||||
def create_api(version):
|
||||
# Set csrf to True to all cookie-based authentication
|
||||
api = NinjaAPI(version=version, csrf=True)
|
||||
|
||||
search_router = search.create_router(ranker)
|
||||
api.add_router("/search/", search_router)
|
||||
|
||||
crawler_router = crawler.create_router(batch_cache=batch_cache, queued_batches=queued_batches)
|
||||
api.add_router("/crawler/", crawler_router)
|
||||
|
||||
curation_router = curate.create_router(index_path)
|
||||
api.add_router("/curation/", curation_router, auth=django_auth)
|
||||
return api
|
||||
|
||||
|
||||
# Work around because Django-Ninja doesn't allow using multiple URLs for the same thing
|
||||
api_original = create_api("0.1")
|
||||
api_v1 = create_api("1.0.0")
|
|
@ -10,7 +10,7 @@ from uuid import uuid4
|
|||
import boto3
|
||||
import requests
|
||||
from fastapi import HTTPException
|
||||
from ninja import NinjaAPI
|
||||
from ninja import Router
|
||||
from redis import Redis
|
||||
|
||||
from mwmbl.crawler.batch import Batch, NewBatchRequest, HashedBatch
|
||||
|
@ -50,8 +50,12 @@ def upload(data: bytes, name: str):
|
|||
last_batch = None
|
||||
|
||||
|
||||
def create_router(batch_cache: BatchCache, queued_batches: Queue, version: str) -> NinjaAPI:
|
||||
router = NinjaAPI(urls_namespace=f"crawler-{version}")
|
||||
def create_router(batch_cache: BatchCache, queued_batches: Queue) -> Router:
|
||||
router = Router(tags=["crawler"])
|
||||
|
||||
# TODO: # ensure tables are created before crawler code is used:
|
||||
# #
|
||||
# # url_db.create_tables()
|
||||
|
||||
@router.post('/batches/')
|
||||
def post_batch(request, batch: Batch):
|
||||
|
|
|
@ -2,7 +2,7 @@ from logging import getLogger
|
|||
from typing import Any
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
from ninja import Router, NinjaAPI
|
||||
from ninja import Router
|
||||
|
||||
from mwmbl.indexer.update_urls import get_datetime_from_timestamp
|
||||
from mwmbl.models import UserCuration
|
||||
|
@ -19,8 +19,8 @@ MAX_CURATED_SCORE = 1_111_111.0
|
|||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
def create_router(index_path: str, version: str) -> NinjaAPI:
|
||||
router = NinjaAPI(urls_namespace=f"curate-{version}", csrf=True)
|
||||
def create_router(index_path: str) -> Router:
|
||||
router = Router(tags=["user"])
|
||||
|
||||
@router.post("/begin")
|
||||
def user_begin_curate(request, curate_begin: make_curation_type(CurateBegin)):
|
||||
|
|
|
@ -32,7 +32,7 @@ SCORE_FOR_SAME_DOMAIN = 0.01
|
|||
EXTRA_LINK_MULTIPLIER = 0.001
|
||||
UNKNOWN_DOMAIN_MULTIPLIER = 0.001
|
||||
EXCLUDED_DOMAINS = {'web.archive.org', 'forums.giantitp.com', 'www.crutchfield.com', 'plus.google.com'}
|
||||
DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx|jksu\.org|lwhyl\.org$|rgcd\.cn$|hzqwyou\.cn$|omgoat\.org$")
|
||||
DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx|jksu\.org|lwhyl\.org$|rgcd\.cn$|hzqwyou\.cn$")
|
||||
CORE_DOMAINS = {
|
||||
'github.com',
|
||||
'en.wikipedia.org',
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
from mwmbl.settings_prod import *
|
||||
from mwmbl.settings_common import *
|
||||
|
||||
DEBUG = False
|
||||
ALLOWED_HOSTS = ["api.mwmbl.org", "mwmbl.org"]
|
||||
|
||||
DATA_PATH = "/app/storage"
|
||||
RUN_BACKGROUND_PROCESSES = True
|
||||
NUM_PAGES = 10240000
|
|
@ -9,6 +9,7 @@ https://docs.djangoproject.com/en/4.2/topics/settings/
|
|||
For the full list of settings and their values, see
|
||||
https://docs.djangoproject.com/en/4.2/ref/settings/
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from logging import getLogger
|
||||
|
||||
from ninja import NinjaAPI
|
||||
from ninja import Router
|
||||
|
||||
from mwmbl.tinysearchengine.rank import HeuristicRanker
|
||||
|
||||
|
@ -10,8 +10,8 @@ logger = getLogger(__name__)
|
|||
SCORE_THRESHOLD = 0.25
|
||||
|
||||
|
||||
def create_router(ranker: HeuristicRanker, version: str) -> NinjaAPI:
|
||||
router = NinjaAPI(urls_namespace=f"search-{version}")
|
||||
def create_router(ranker: HeuristicRanker) -> Router:
|
||||
router = Router(tags=["search"])
|
||||
|
||||
@router.get("")
|
||||
def search(request, s: str):
|
||||
|
|
|
@ -17,27 +17,15 @@ Including another URLconf
|
|||
from django.contrib import admin
|
||||
from django.urls import path, include
|
||||
|
||||
import mwmbl.crawler.app as crawler
|
||||
from mwmbl.platform import curate
|
||||
from mwmbl.search_setup import queued_batches, index_path, ranker, batch_cache
|
||||
from mwmbl.tinysearchengine import search
|
||||
from mwmbl.api import api_v1
|
||||
from mwmbl.views import home_fragment, fetch_url, index
|
||||
|
||||
urlpatterns = [
|
||||
path('admin/', admin.site.urls),
|
||||
path('api/v1/', api_v1.urls),
|
||||
path('accounts/', include('allauth.urls')),
|
||||
|
||||
path('', index, name="index"),
|
||||
path('', index, name="home"),
|
||||
path('app/home/', home_fragment, name="home"),
|
||||
path('app/fetch/', fetch_url, name="fetch_url"),
|
||||
|
||||
# TODO: this is the old API, deprecated and to be removed once all clients have moved over
|
||||
path("search/", search.create_router(ranker, "0.1").urls),
|
||||
path("crawler/", crawler.create_router(batch_cache=batch_cache, queued_batches=queued_batches, version="0.1").urls),
|
||||
path("curation/", curate.create_router(index_path, version="0.1").urls),
|
||||
|
||||
# New API
|
||||
path("api/v1/search/", search.create_router(ranker, "1.0.0").urls),
|
||||
path("api/v1/crawler/", crawler.create_router(batch_cache=batch_cache, queued_batches=queued_batches, version="1.0.0").urls),
|
||||
path("api/v1/curation/", curate.create_router(index_path, version="1.0.0").urls),
|
||||
path('app/fetch/', fetch_url, name="fetch_url")
|
||||
]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from itertools import groupby
|
||||
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, ParseResult
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
import justext
|
||||
import requests
|
||||
|
@ -66,13 +66,12 @@ def home_fragment(request):
|
|||
"query": query,
|
||||
"activity": activity,
|
||||
})
|
||||
|
||||
# Encode the new query string
|
||||
if query:
|
||||
new_query_string = urlencode({"q": query}, doseq=True)
|
||||
new_url = "/?" + new_query_string
|
||||
else:
|
||||
new_url = "/"
|
||||
current_url = request.htmx.current_url
|
||||
# Replace query string with new query
|
||||
stripped_url = current_url[:current_url.index("?")] if "?" in current_url else current_url
|
||||
query_string = "?q=" + query if len(query) > 0 else ""
|
||||
new_url = stripped_url + query_string
|
||||
# Set the htmx replace header
|
||||
response["HX-Replace-Url"] = new_url
|
||||
return response
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue