Implement curation API using Django Ninja

This commit is contained in:
Daoud Clarke 2023-10-25 16:39:42 +01:00
parent bd017079d5
commit bb9e6aa4bd
10 changed files with 227 additions and 199 deletions

8
mwmbl/admin.py Normal file
View file

@ -0,0 +1,8 @@
from django.contrib.admin import ModelAdmin
from django.contrib.auth.admin import UserAdmin
from django.contrib import admin
from mwmbl.models import MwmblUser, UserCuration
admin.site.register(MwmblUser, UserAdmin)
admin.site.register(UserCuration, ModelAdmin)

View file

@ -7,6 +7,7 @@ from ninja import NinjaAPI
import mwmbl.crawler.app as crawler
from mwmbl.indexer.batch_cache import BatchCache
from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME
from mwmbl.platform import curate
from mwmbl.tinysearchengine import search
from mwmbl.tinysearchengine.completer import Completer
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
@ -24,13 +25,17 @@ batch_cache = BatchCache(Path(settings.DATA_PATH) / BATCH_DIR_NAME)
def create_api(version):
api = NinjaAPI(version=version)
# Set csrf to True to all cookie-based authentication
api = NinjaAPI(version=version, csrf=True)
search_router = search.create_router(ranker)
api.add_router("/search/", search_router)
crawler_router = crawler.create_router(batch_cache=batch_cache, queued_batches=queued_batches)
api.add_router("/crawler/", crawler_router)
curation_router = curate.create_router(index_path)
api.add_router("/curation/", curation_router)
return api

View file

@ -6,19 +6,20 @@ from pathlib import Path
from django.apps import AppConfig
from django.conf import settings
from mwmbl.api import queued_batches
from mwmbl import background
from mwmbl.indexer.paths import INDEX_NAME
from mwmbl.indexer.update_urls import update_urls_continuously
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PAGE_SIZE
from mwmbl.url_queue import update_queue_continuously
class MwmblConfig(AppConfig):
name = "mwmbl"
verbose_name = "Mwmbl Application"
def ready(self):
# Imports here to avoid AppRegistryNotReady exception
from mwmbl.api import queued_batches
from mwmbl import background
from mwmbl.indexer.paths import INDEX_NAME
from mwmbl.indexer.update_urls import update_urls_continuously
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PAGE_SIZE
from mwmbl.url_queue import update_queue_continuously
index_path = Path(settings.DATA_PATH) / INDEX_NAME
try:
existing_index = TinyIndex(item_factory=Document, index_path=index_path)

View file

@ -0,0 +1,58 @@
# Generated by Django 4.2.6 on 2023-10-25 11:55
from django.conf import settings
import django.contrib.auth.models
import django.contrib.auth.validators
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
class Migration(migrations.Migration):
initial = True
dependencies = [
('auth', '0012_alter_user_first_name_max_length'),
]
operations = [
migrations.CreateModel(
name='MwmblUser',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('password', models.CharField(max_length=128, verbose_name='password')),
('last_login', models.DateTimeField(blank=True, null=True, verbose_name='last login')),
('is_superuser', models.BooleanField(default=False, help_text='Designates that this user has all permissions without explicitly assigning them.', verbose_name='superuser status')),
('username', models.CharField(error_messages={'unique': 'A user with that username already exists.'}, help_text='Required. 150 characters or fewer. Letters, digits and @/./+/-/_ only.', max_length=150, unique=True, validators=[django.contrib.auth.validators.UnicodeUsernameValidator()], verbose_name='username')),
('first_name', models.CharField(blank=True, max_length=150, verbose_name='first name')),
('last_name', models.CharField(blank=True, max_length=150, verbose_name='last name')),
('email', models.EmailField(blank=True, max_length=254, verbose_name='email address')),
('is_staff', models.BooleanField(default=False, help_text='Designates whether the user can log into this admin site.', verbose_name='staff status')),
('is_active', models.BooleanField(default=True, help_text='Designates whether this user should be treated as active. Unselect this instead of deleting accounts.', verbose_name='active')),
('date_joined', models.DateTimeField(default=django.utils.timezone.now, verbose_name='date joined')),
('groups', models.ManyToManyField(blank=True, help_text='The groups this user belongs to. A user will get all permissions granted to each of their groups.', related_name='user_set', related_query_name='user', to='auth.group', verbose_name='groups')),
('user_permissions', models.ManyToManyField(blank=True, help_text='Specific permissions for this user.', related_name='user_set', related_query_name='user', to='auth.permission', verbose_name='user permissions')),
],
options={
'verbose_name': 'user',
'verbose_name_plural': 'users',
'abstract': False,
},
managers=[
('objects', django.contrib.auth.models.UserManager()),
],
),
migrations.CreateModel(
name='UserCuration',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('timestamp', models.DateTimeField()),
('url', models.CharField(max_length=300)),
('results', models.JSONField()),
('curation_type', models.CharField(max_length=20)),
('curation', models.JSONField()),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
),
]

View file

15
mwmbl/models.py Normal file
View file

@ -0,0 +1,15 @@
from django.db import models
from django.contrib.auth.models import AbstractUser
class MwmblUser(AbstractUser):
pass
class UserCuration(models.Model):
user = models.ForeignKey(MwmblUser, on_delete=models.CASCADE)
timestamp = models.DateTimeField()
url = models.CharField(max_length=300)
results = models.JSONField()
curation_type = models.CharField(max_length=20)
curation = models.JSONField()

82
mwmbl/platform/curate.py Normal file
View file

@ -0,0 +1,82 @@
import json
from urllib.parse import urljoin, parse_qs
import requests
from ninja import Router
from ninja.security import django_auth
from mwmbl.indexer.update_urls import get_datetime_from_timestamp
from mwmbl.models import UserCuration
from mwmbl.platform.data import CurateBegin, CurateMove, CurateDelete, CurateAdd, CurateValidate, Curation
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
from mwmbl.tokenizer import tokenize
RESULT_URL = "https://mwmbl.org/?q="
MAX_CURATED_SCORE = 1_111_111.0
def create_router(index_path: str) -> Router:
router = Router(tags=["user"])
@router.post("/begin", auth=django_auth)
def user_begin_curate(request, curate_begin: CurateBegin):
return _curate(request, "curate_begin", curate_begin)
@router.post("/move", auth=django_auth)
def user_move_result(request, curate_move: Curation[CurateMove]):
return _curate(request, "curate_move", curate_move)
@router.post("/delete", auth=django_auth)
def user_delete_result(request, curate_delete: Curation[CurateDelete]):
return _curate(request, "curate_delete", curate_delete)
@router.post("/add", auth=django_auth)
def user_add_result(request, curate_add: Curation[CurateAdd]):
return _curate(request, "curate_add", curate_add)
@router.post("/validate", auth=django_auth)
def user_add_result(request, curate_validate: Curation[CurateValidate]):
return _curate(request, "curate_validate", curate_validate)
def _curate(request, curation_type: str, curation: Curation):
user_curation = UserCuration(
user=request.user,
timestamp=get_datetime_from_timestamp(curation.timestamp),
url=curation.url,
results=curation.dict()["results"],
curation_type=curation_type,
curation=curation.curation.dict(),
)
user_curation.save()
with TinyIndex(Document, index_path, 'w') as indexer:
query_string = parse_qs(curation.url)
if len(query_string) > 1:
raise ValueError(f"Should be one query string in the URL: {curation.url}")
queries = next(iter(query_string.values()))
if len(queries) > 1:
raise ValueError(f"Should be one query value in the URL: {curation.url}")
query = queries[0]
print("Query", query)
tokens = tokenize(query)
print("Tokens", tokens)
term = " ".join(tokens)
print("Key", term)
documents = [
Document(result.title, result.url, result.extract, MAX_CURATED_SCORE - i, term, result.curated)
for i, result in enumerate(curation.results)
]
page_index = indexer.get_key_page_index(term)
print("Page index", page_index)
print("Storing documents", documents)
indexer.store_in_page(page_index, documents)
return {"curation": "ok"}
return router

45
mwmbl/platform/data.py Normal file
View file

@ -0,0 +1,45 @@
from datetime import datetime
from typing import TypeVar, Generic
from ninja import Schema
class Result(Schema):
url: str
title: str
extract: str
curated: bool
class CurateBegin(Schema):
pass
class CurateMove(Schema):
old_index: int
new_index: int
class CurateDelete(Schema):
delete_index: int
class CurateAdd(Schema):
insert_index: int
url: str
class CurateValidate(Schema):
validate_index: int
is_validated: bool
T = TypeVar('T', CurateBegin, CurateAdd, CurateDelete, CurateMove, CurateValidate)
class Curation(Schema, Generic[T]):
timestamp: int
url: str
results: list[Result]
curation: T

View file

@ -1,190 +0,0 @@
import json
import os
from typing import TypeVar, Generic
from urllib.parse import urljoin, parse_qs
import requests
from fastapi import APIRouter, Response
from pydantic import BaseModel
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
from mwmbl.tokenizer import tokenize
LEMMY_URL = os.environ["LEMMY_URL"]
RESULT_URL = "https://mwmbl.org/?q="
MAX_CURATED_SCORE = 1_111_111.0
class Register(BaseModel):
username: str
email: str
password: str
password_verify: str
class Login(BaseModel):
username_or_email: str
password: str
class Result(BaseModel):
url: str
title: str
extract: str
curated: bool
class BeginCurate(BaseModel):
auth: str
url: str
results: list[Result]
class CurateMove(BaseModel):
old_index: int
new_index: int
class CurateDelete(BaseModel):
delete_index: int
class CurateAdd(BaseModel):
insert_index: int
url: str
class CurateValidate(BaseModel):
validate_index: int
is_validated: bool
T = TypeVar('T', CurateAdd, CurateDelete, CurateMove, CurateValidate)
class Curation(BaseModel, Generic[T]):
auth: str
curation_id: int
url: str
results: list[Result]
curation: T
def create_router(index_path: str) -> APIRouter:
router = APIRouter(prefix="/user", tags=["user"])
# TODO: reinstate
# community_id = get_community_id()
community_id = 0
@router.post("/register")
def user_register(register: Register) -> Response:
lemmy_register = {
"username": register.username,
"email": register.email,
"password": register.password,
"password_verify": register.password_verify,
"answer": "not applicable",
"captcha_answer": None,
"captcha_uuid": None,
"honeypot": None,
"show_nsfw": False,
}
request = requests.post(urljoin(LEMMY_URL, "api/v3/user/register"), json=lemmy_register)
if request.status_code != 200:
return Response(content=request.content, status_code=request.status_code, media_type="text/json")
@router.post("/login")
def user_login(login: Login) -> Response:
request = requests.post(urljoin(LEMMY_URL, "api/v3/user/login"), json=login.dict())
return Response(content=request.content, status_code=request.status_code, media_type="text/json")
@router.post("/curation/begin")
def user_begin_curate(begin_curate: BeginCurate):
results = begin_curate.dict()["results"]
body = json.dumps({"original_results": results}, indent=2)
create_post = {
"auth": begin_curate.auth,
"body": body,
"community_id": community_id,
"honeypot": None,
"language_id": None,
"name": begin_curate.url,
"nsfw": None,
"url": begin_curate.url,
}
request = requests.post(urljoin(LEMMY_URL, "api/v3/post"), json=create_post)
if request.status_code != 200:
return Response(content=request.content, status_code=request.status_code, media_type="text/json")
data = request.json()
curation_id = data["post_view"]["post"]["id"]
return {"curation_id": curation_id}
@router.post("/curation/move")
def user_move_result(curate_move: Curation[CurateMove]):
return _curate("curate_move", curate_move)
@router.post("/curation/delete")
def user_delete_result(curate_delete: Curation[CurateDelete]):
return _curate("curate_delete", curate_delete)
@router.post("/curation/add")
def user_add_result(curate_add: Curation[CurateAdd]):
return _curate("curate_add", curate_add)
@router.post("/curation/validate")
def user_add_result(curate_validate: Curation[CurateValidate]):
return _curate("curate_validate", curate_validate)
def _curate(curation_type: str, curation: Curation):
content = json.dumps({
"curation_type": curation_type,
"curation": curation.curation.dict(),
}, indent=2)
create_comment = {
"auth": curation.auth,
"content": json.dumps(content, indent=2),
"form_id": None,
"language_id": None,
"parent_id": None,
"post_id": curation.curation_id,
}
request = requests.post(urljoin(LEMMY_URL, "api/v3/comment"), json=create_comment)
with TinyIndex(Document, index_path, 'w') as indexer:
query_string = parse_qs(curation.url)
if len(query_string) > 1:
raise ValueError(f"Should be one query string in the URL: {curation.url}")
queries = next(iter(query_string.values()))
if len(queries) > 1:
raise ValueError(f"Should be one query value in the URL: {curation.url}")
query = queries[0]
print("Query", query)
tokens = tokenize(query)
print("Tokens", tokens)
term = " ".join(tokens)
print("Key", term)
documents = [
Document(result.title, result.url, result.extract, MAX_CURATED_SCORE - i, term, result.curated)
for i, result in enumerate(curation.results)
]
page_index = indexer.get_key_page_index(term)
print("Page index", page_index)
print("Storing documents", documents)
indexer.store_in_page(page_index, documents)
return Response(content=request.content, status_code=request.status_code, media_type="text/json")
return router
def get_community_id() -> str:
request = requests.get(urljoin(LEMMY_URL, "api/v3/community?name=main"))
community = request.json()
return community["community_view"]["community"]["id"]

View file

@ -119,7 +119,6 @@ USE_TZ = True
STATIC_URL = 'static/'
STATICFILES_DIRS = [str(Path(__file__).parent.parent / "front-end" / "dist")]
print("Static files", STATICFILES_DIRS)
# Default primary key field type
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
@ -134,5 +133,10 @@ AUTHENTICATION_BACKENDS = [
'allauth.account.auth_backends.AuthenticationBackend',
]
AUTH_USER_MODEL = "mwmbl.MwmblUser"
ACCOUNT_EMAIL_REQUIRED = True
ACCOUNT_EMAIL_VERIFICATION = "mandatory"