Implement curation API using Django Ninja
This commit is contained in:
parent
bd017079d5
commit
bb9e6aa4bd
10 changed files with 227 additions and 199 deletions
8
mwmbl/admin.py
Normal file
8
mwmbl/admin.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
from django.contrib.admin import ModelAdmin
|
||||
from django.contrib.auth.admin import UserAdmin
|
||||
from django.contrib import admin
|
||||
|
||||
from mwmbl.models import MwmblUser, UserCuration
|
||||
|
||||
admin.site.register(MwmblUser, UserAdmin)
|
||||
admin.site.register(UserCuration, ModelAdmin)
|
|
@ -7,6 +7,7 @@ from ninja import NinjaAPI
|
|||
import mwmbl.crawler.app as crawler
|
||||
from mwmbl.indexer.batch_cache import BatchCache
|
||||
from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME
|
||||
from mwmbl.platform import curate
|
||||
from mwmbl.tinysearchengine import search
|
||||
from mwmbl.tinysearchengine.completer import Completer
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
||||
|
@ -24,13 +25,17 @@ batch_cache = BatchCache(Path(settings.DATA_PATH) / BATCH_DIR_NAME)
|
|||
|
||||
|
||||
def create_api(version):
|
||||
api = NinjaAPI(version=version)
|
||||
# Set csrf to True to all cookie-based authentication
|
||||
api = NinjaAPI(version=version, csrf=True)
|
||||
|
||||
search_router = search.create_router(ranker)
|
||||
api.add_router("/search/", search_router)
|
||||
|
||||
crawler_router = crawler.create_router(batch_cache=batch_cache, queued_batches=queued_batches)
|
||||
api.add_router("/crawler/", crawler_router)
|
||||
|
||||
curation_router = curate.create_router(index_path)
|
||||
api.add_router("/curation/", curation_router)
|
||||
return api
|
||||
|
||||
|
||||
|
|
|
@ -6,19 +6,20 @@ from pathlib import Path
|
|||
from django.apps import AppConfig
|
||||
from django.conf import settings
|
||||
|
||||
from mwmbl.api import queued_batches
|
||||
from mwmbl import background
|
||||
from mwmbl.indexer.paths import INDEX_NAME
|
||||
from mwmbl.indexer.update_urls import update_urls_continuously
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PAGE_SIZE
|
||||
from mwmbl.url_queue import update_queue_continuously
|
||||
|
||||
|
||||
class MwmblConfig(AppConfig):
|
||||
name = "mwmbl"
|
||||
verbose_name = "Mwmbl Application"
|
||||
|
||||
def ready(self):
|
||||
# Imports here to avoid AppRegistryNotReady exception
|
||||
from mwmbl.api import queued_batches
|
||||
from mwmbl import background
|
||||
from mwmbl.indexer.paths import INDEX_NAME
|
||||
from mwmbl.indexer.update_urls import update_urls_continuously
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PAGE_SIZE
|
||||
from mwmbl.url_queue import update_queue_continuously
|
||||
|
||||
index_path = Path(settings.DATA_PATH) / INDEX_NAME
|
||||
try:
|
||||
existing_index = TinyIndex(item_factory=Document, index_path=index_path)
|
||||
|
|
58
mwmbl/migrations/0001_initial.py
Normal file
58
mwmbl/migrations/0001_initial.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
# Generated by Django 4.2.6 on 2023-10-25 11:55
|
||||
|
||||
from django.conf import settings
|
||||
import django.contrib.auth.models
|
||||
import django.contrib.auth.validators
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('auth', '0012_alter_user_first_name_max_length'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='MwmblUser',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('password', models.CharField(max_length=128, verbose_name='password')),
|
||||
('last_login', models.DateTimeField(blank=True, null=True, verbose_name='last login')),
|
||||
('is_superuser', models.BooleanField(default=False, help_text='Designates that this user has all permissions without explicitly assigning them.', verbose_name='superuser status')),
|
||||
('username', models.CharField(error_messages={'unique': 'A user with that username already exists.'}, help_text='Required. 150 characters or fewer. Letters, digits and @/./+/-/_ only.', max_length=150, unique=True, validators=[django.contrib.auth.validators.UnicodeUsernameValidator()], verbose_name='username')),
|
||||
('first_name', models.CharField(blank=True, max_length=150, verbose_name='first name')),
|
||||
('last_name', models.CharField(blank=True, max_length=150, verbose_name='last name')),
|
||||
('email', models.EmailField(blank=True, max_length=254, verbose_name='email address')),
|
||||
('is_staff', models.BooleanField(default=False, help_text='Designates whether the user can log into this admin site.', verbose_name='staff status')),
|
||||
('is_active', models.BooleanField(default=True, help_text='Designates whether this user should be treated as active. Unselect this instead of deleting accounts.', verbose_name='active')),
|
||||
('date_joined', models.DateTimeField(default=django.utils.timezone.now, verbose_name='date joined')),
|
||||
('groups', models.ManyToManyField(blank=True, help_text='The groups this user belongs to. A user will get all permissions granted to each of their groups.', related_name='user_set', related_query_name='user', to='auth.group', verbose_name='groups')),
|
||||
('user_permissions', models.ManyToManyField(blank=True, help_text='Specific permissions for this user.', related_name='user_set', related_query_name='user', to='auth.permission', verbose_name='user permissions')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'user',
|
||||
'verbose_name_plural': 'users',
|
||||
'abstract': False,
|
||||
},
|
||||
managers=[
|
||||
('objects', django.contrib.auth.models.UserManager()),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='UserCuration',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('timestamp', models.DateTimeField()),
|
||||
('url', models.CharField(max_length=300)),
|
||||
('results', models.JSONField()),
|
||||
('curation_type', models.CharField(max_length=20)),
|
||||
('curation', models.JSONField()),
|
||||
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||
],
|
||||
),
|
||||
]
|
0
mwmbl/migrations/__init__.py
Normal file
0
mwmbl/migrations/__init__.py
Normal file
15
mwmbl/models.py
Normal file
15
mwmbl/models.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
from django.db import models
|
||||
from django.contrib.auth.models import AbstractUser
|
||||
|
||||
|
||||
class MwmblUser(AbstractUser):
|
||||
pass
|
||||
|
||||
|
||||
class UserCuration(models.Model):
|
||||
user = models.ForeignKey(MwmblUser, on_delete=models.CASCADE)
|
||||
timestamp = models.DateTimeField()
|
||||
url = models.CharField(max_length=300)
|
||||
results = models.JSONField()
|
||||
curation_type = models.CharField(max_length=20)
|
||||
curation = models.JSONField()
|
82
mwmbl/platform/curate.py
Normal file
82
mwmbl/platform/curate.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
import json
|
||||
from urllib.parse import urljoin, parse_qs
|
||||
|
||||
import requests
|
||||
from ninja import Router
|
||||
from ninja.security import django_auth
|
||||
|
||||
from mwmbl.indexer.update_urls import get_datetime_from_timestamp
|
||||
from mwmbl.models import UserCuration
|
||||
from mwmbl.platform.data import CurateBegin, CurateMove, CurateDelete, CurateAdd, CurateValidate, Curation
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
||||
from mwmbl.tokenizer import tokenize
|
||||
|
||||
|
||||
RESULT_URL = "https://mwmbl.org/?q="
|
||||
MAX_CURATED_SCORE = 1_111_111.0
|
||||
|
||||
|
||||
def create_router(index_path: str) -> Router:
|
||||
router = Router(tags=["user"])
|
||||
|
||||
@router.post("/begin", auth=django_auth)
|
||||
def user_begin_curate(request, curate_begin: CurateBegin):
|
||||
return _curate(request, "curate_begin", curate_begin)
|
||||
|
||||
@router.post("/move", auth=django_auth)
|
||||
def user_move_result(request, curate_move: Curation[CurateMove]):
|
||||
return _curate(request, "curate_move", curate_move)
|
||||
|
||||
@router.post("/delete", auth=django_auth)
|
||||
def user_delete_result(request, curate_delete: Curation[CurateDelete]):
|
||||
return _curate(request, "curate_delete", curate_delete)
|
||||
|
||||
@router.post("/add", auth=django_auth)
|
||||
def user_add_result(request, curate_add: Curation[CurateAdd]):
|
||||
return _curate(request, "curate_add", curate_add)
|
||||
|
||||
@router.post("/validate", auth=django_auth)
|
||||
def user_add_result(request, curate_validate: Curation[CurateValidate]):
|
||||
return _curate(request, "curate_validate", curate_validate)
|
||||
|
||||
def _curate(request, curation_type: str, curation: Curation):
|
||||
user_curation = UserCuration(
|
||||
user=request.user,
|
||||
timestamp=get_datetime_from_timestamp(curation.timestamp),
|
||||
url=curation.url,
|
||||
results=curation.dict()["results"],
|
||||
curation_type=curation_type,
|
||||
curation=curation.curation.dict(),
|
||||
)
|
||||
user_curation.save()
|
||||
|
||||
with TinyIndex(Document, index_path, 'w') as indexer:
|
||||
query_string = parse_qs(curation.url)
|
||||
if len(query_string) > 1:
|
||||
raise ValueError(f"Should be one query string in the URL: {curation.url}")
|
||||
|
||||
queries = next(iter(query_string.values()))
|
||||
if len(queries) > 1:
|
||||
raise ValueError(f"Should be one query value in the URL: {curation.url}")
|
||||
|
||||
query = queries[0]
|
||||
print("Query", query)
|
||||
tokens = tokenize(query)
|
||||
print("Tokens", tokens)
|
||||
term = " ".join(tokens)
|
||||
print("Key", term)
|
||||
|
||||
documents = [
|
||||
Document(result.title, result.url, result.extract, MAX_CURATED_SCORE - i, term, result.curated)
|
||||
for i, result in enumerate(curation.results)
|
||||
]
|
||||
page_index = indexer.get_key_page_index(term)
|
||||
print("Page index", page_index)
|
||||
print("Storing documents", documents)
|
||||
indexer.store_in_page(page_index, documents)
|
||||
|
||||
return {"curation": "ok"}
|
||||
|
||||
return router
|
||||
|
||||
|
45
mwmbl/platform/data.py
Normal file
45
mwmbl/platform/data.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
from datetime import datetime
|
||||
from typing import TypeVar, Generic
|
||||
|
||||
from ninja import Schema
|
||||
|
||||
|
||||
class Result(Schema):
|
||||
url: str
|
||||
title: str
|
||||
extract: str
|
||||
curated: bool
|
||||
|
||||
|
||||
class CurateBegin(Schema):
|
||||
pass
|
||||
|
||||
|
||||
class CurateMove(Schema):
|
||||
old_index: int
|
||||
new_index: int
|
||||
|
||||
|
||||
class CurateDelete(Schema):
|
||||
delete_index: int
|
||||
|
||||
|
||||
class CurateAdd(Schema):
|
||||
insert_index: int
|
||||
url: str
|
||||
|
||||
|
||||
class CurateValidate(Schema):
|
||||
validate_index: int
|
||||
is_validated: bool
|
||||
|
||||
|
||||
T = TypeVar('T', CurateBegin, CurateAdd, CurateDelete, CurateMove, CurateValidate)
|
||||
|
||||
|
||||
class Curation(Schema, Generic[T]):
|
||||
timestamp: int
|
||||
url: str
|
||||
results: list[Result]
|
||||
curation: T
|
||||
|
|
@ -1,190 +0,0 @@
|
|||
import json
|
||||
import os
|
||||
from typing import TypeVar, Generic
|
||||
from urllib.parse import urljoin, parse_qs
|
||||
|
||||
import requests
|
||||
from fastapi import APIRouter, Response
|
||||
from pydantic import BaseModel
|
||||
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
||||
from mwmbl.tokenizer import tokenize
|
||||
|
||||
|
||||
LEMMY_URL = os.environ["LEMMY_URL"]
|
||||
RESULT_URL = "https://mwmbl.org/?q="
|
||||
MAX_CURATED_SCORE = 1_111_111.0
|
||||
|
||||
|
||||
class Register(BaseModel):
|
||||
username: str
|
||||
email: str
|
||||
password: str
|
||||
password_verify: str
|
||||
|
||||
|
||||
class Login(BaseModel):
|
||||
username_or_email: str
|
||||
password: str
|
||||
|
||||
|
||||
class Result(BaseModel):
|
||||
url: str
|
||||
title: str
|
||||
extract: str
|
||||
curated: bool
|
||||
|
||||
|
||||
class BeginCurate(BaseModel):
|
||||
auth: str
|
||||
url: str
|
||||
results: list[Result]
|
||||
|
||||
|
||||
class CurateMove(BaseModel):
|
||||
old_index: int
|
||||
new_index: int
|
||||
|
||||
|
||||
class CurateDelete(BaseModel):
|
||||
delete_index: int
|
||||
|
||||
|
||||
class CurateAdd(BaseModel):
|
||||
insert_index: int
|
||||
url: str
|
||||
|
||||
|
||||
class CurateValidate(BaseModel):
|
||||
validate_index: int
|
||||
is_validated: bool
|
||||
|
||||
|
||||
T = TypeVar('T', CurateAdd, CurateDelete, CurateMove, CurateValidate)
|
||||
|
||||
|
||||
class Curation(BaseModel, Generic[T]):
|
||||
auth: str
|
||||
curation_id: int
|
||||
url: str
|
||||
results: list[Result]
|
||||
curation: T
|
||||
|
||||
|
||||
def create_router(index_path: str) -> APIRouter:
|
||||
router = APIRouter(prefix="/user", tags=["user"])
|
||||
|
||||
# TODO: reinstate
|
||||
# community_id = get_community_id()
|
||||
community_id = 0
|
||||
|
||||
@router.post("/register")
|
||||
def user_register(register: Register) -> Response:
|
||||
lemmy_register = {
|
||||
"username": register.username,
|
||||
"email": register.email,
|
||||
"password": register.password,
|
||||
"password_verify": register.password_verify,
|
||||
"answer": "not applicable",
|
||||
"captcha_answer": None,
|
||||
"captcha_uuid": None,
|
||||
"honeypot": None,
|
||||
"show_nsfw": False,
|
||||
}
|
||||
request = requests.post(urljoin(LEMMY_URL, "api/v3/user/register"), json=lemmy_register)
|
||||
if request.status_code != 200:
|
||||
return Response(content=request.content, status_code=request.status_code, media_type="text/json")
|
||||
|
||||
@router.post("/login")
|
||||
def user_login(login: Login) -> Response:
|
||||
request = requests.post(urljoin(LEMMY_URL, "api/v3/user/login"), json=login.dict())
|
||||
return Response(content=request.content, status_code=request.status_code, media_type="text/json")
|
||||
|
||||
@router.post("/curation/begin")
|
||||
def user_begin_curate(begin_curate: BeginCurate):
|
||||
results = begin_curate.dict()["results"]
|
||||
body = json.dumps({"original_results": results}, indent=2)
|
||||
create_post = {
|
||||
"auth": begin_curate.auth,
|
||||
"body": body,
|
||||
"community_id": community_id,
|
||||
"honeypot": None,
|
||||
"language_id": None,
|
||||
"name": begin_curate.url,
|
||||
"nsfw": None,
|
||||
"url": begin_curate.url,
|
||||
}
|
||||
request = requests.post(urljoin(LEMMY_URL, "api/v3/post"), json=create_post)
|
||||
if request.status_code != 200:
|
||||
return Response(content=request.content, status_code=request.status_code, media_type="text/json")
|
||||
data = request.json()
|
||||
curation_id = data["post_view"]["post"]["id"]
|
||||
return {"curation_id": curation_id}
|
||||
|
||||
@router.post("/curation/move")
|
||||
def user_move_result(curate_move: Curation[CurateMove]):
|
||||
return _curate("curate_move", curate_move)
|
||||
|
||||
@router.post("/curation/delete")
|
||||
def user_delete_result(curate_delete: Curation[CurateDelete]):
|
||||
return _curate("curate_delete", curate_delete)
|
||||
|
||||
@router.post("/curation/add")
|
||||
def user_add_result(curate_add: Curation[CurateAdd]):
|
||||
return _curate("curate_add", curate_add)
|
||||
|
||||
@router.post("/curation/validate")
|
||||
def user_add_result(curate_validate: Curation[CurateValidate]):
|
||||
return _curate("curate_validate", curate_validate)
|
||||
|
||||
def _curate(curation_type: str, curation: Curation):
|
||||
content = json.dumps({
|
||||
"curation_type": curation_type,
|
||||
"curation": curation.curation.dict(),
|
||||
}, indent=2)
|
||||
create_comment = {
|
||||
"auth": curation.auth,
|
||||
"content": json.dumps(content, indent=2),
|
||||
"form_id": None,
|
||||
"language_id": None,
|
||||
"parent_id": None,
|
||||
"post_id": curation.curation_id,
|
||||
}
|
||||
request = requests.post(urljoin(LEMMY_URL, "api/v3/comment"), json=create_comment)
|
||||
|
||||
with TinyIndex(Document, index_path, 'w') as indexer:
|
||||
query_string = parse_qs(curation.url)
|
||||
if len(query_string) > 1:
|
||||
raise ValueError(f"Should be one query string in the URL: {curation.url}")
|
||||
|
||||
queries = next(iter(query_string.values()))
|
||||
if len(queries) > 1:
|
||||
raise ValueError(f"Should be one query value in the URL: {curation.url}")
|
||||
|
||||
query = queries[0]
|
||||
print("Query", query)
|
||||
tokens = tokenize(query)
|
||||
print("Tokens", tokens)
|
||||
term = " ".join(tokens)
|
||||
print("Key", term)
|
||||
|
||||
documents = [
|
||||
Document(result.title, result.url, result.extract, MAX_CURATED_SCORE - i, term, result.curated)
|
||||
for i, result in enumerate(curation.results)
|
||||
]
|
||||
page_index = indexer.get_key_page_index(term)
|
||||
print("Page index", page_index)
|
||||
print("Storing documents", documents)
|
||||
indexer.store_in_page(page_index, documents)
|
||||
|
||||
return Response(content=request.content, status_code=request.status_code, media_type="text/json")
|
||||
|
||||
return router
|
||||
|
||||
|
||||
def get_community_id() -> str:
|
||||
request = requests.get(urljoin(LEMMY_URL, "api/v3/community?name=main"))
|
||||
community = request.json()
|
||||
return community["community_view"]["community"]["id"]
|
||||
|
||||
|
|
@ -119,7 +119,6 @@ USE_TZ = True
|
|||
|
||||
STATIC_URL = 'static/'
|
||||
STATICFILES_DIRS = [str(Path(__file__).parent.parent / "front-end" / "dist")]
|
||||
print("Static files", STATICFILES_DIRS)
|
||||
|
||||
# Default primary key field type
|
||||
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
|
||||
|
@ -134,5 +133,10 @@ AUTHENTICATION_BACKENDS = [
|
|||
'allauth.account.auth_backends.AuthenticationBackend',
|
||||
]
|
||||
|
||||
|
||||
AUTH_USER_MODEL = "mwmbl.MwmblUser"
|
||||
|
||||
|
||||
ACCOUNT_EMAIL_REQUIRED = True
|
||||
ACCOUNT_EMAIL_VERIFICATION = "mandatory"
|
||||
|
||||
|
|
Loading…
Reference in a new issue