diff --git a/mwmbl/admin.py b/mwmbl/admin.py new file mode 100644 index 0000000..74af7ac --- /dev/null +++ b/mwmbl/admin.py @@ -0,0 +1,8 @@ +from django.contrib.admin import ModelAdmin +from django.contrib.auth.admin import UserAdmin +from django.contrib import admin + +from mwmbl.models import MwmblUser, UserCuration + +admin.site.register(MwmblUser, UserAdmin) +admin.site.register(UserCuration, ModelAdmin) diff --git a/mwmbl/api.py b/mwmbl/api.py index 4a9cf08..bb2fe75 100644 --- a/mwmbl/api.py +++ b/mwmbl/api.py @@ -7,6 +7,7 @@ from ninja import NinjaAPI import mwmbl.crawler.app as crawler from mwmbl.indexer.batch_cache import BatchCache from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME +from mwmbl.platform import curate from mwmbl.tinysearchengine import search from mwmbl.tinysearchengine.completer import Completer from mwmbl.tinysearchengine.indexer import TinyIndex, Document @@ -24,13 +25,17 @@ batch_cache = BatchCache(Path(settings.DATA_PATH) / BATCH_DIR_NAME) def create_api(version): - api = NinjaAPI(version=version) + # Set csrf to True to all cookie-based authentication + api = NinjaAPI(version=version, csrf=True) search_router = search.create_router(ranker) api.add_router("/search/", search_router) crawler_router = crawler.create_router(batch_cache=batch_cache, queued_batches=queued_batches) api.add_router("/crawler/", crawler_router) + + curation_router = curate.create_router(index_path) + api.add_router("/curation/", curation_router) return api diff --git a/mwmbl/apps.py b/mwmbl/apps.py index f829ecc..dff27b6 100644 --- a/mwmbl/apps.py +++ b/mwmbl/apps.py @@ -6,19 +6,20 @@ from pathlib import Path from django.apps import AppConfig from django.conf import settings -from mwmbl.api import queued_batches -from mwmbl import background -from mwmbl.indexer.paths import INDEX_NAME -from mwmbl.indexer.update_urls import update_urls_continuously -from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PAGE_SIZE -from mwmbl.url_queue import update_queue_continuously - class MwmblConfig(AppConfig): name = "mwmbl" verbose_name = "Mwmbl Application" def ready(self): + # Imports here to avoid AppRegistryNotReady exception + from mwmbl.api import queued_batches + from mwmbl import background + from mwmbl.indexer.paths import INDEX_NAME + from mwmbl.indexer.update_urls import update_urls_continuously + from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PAGE_SIZE + from mwmbl.url_queue import update_queue_continuously + index_path = Path(settings.DATA_PATH) / INDEX_NAME try: existing_index = TinyIndex(item_factory=Document, index_path=index_path) diff --git a/mwmbl/migrations/0001_initial.py b/mwmbl/migrations/0001_initial.py new file mode 100644 index 0000000..1dc8d43 --- /dev/null +++ b/mwmbl/migrations/0001_initial.py @@ -0,0 +1,58 @@ +# Generated by Django 4.2.6 on 2023-10-25 11:55 + +from django.conf import settings +import django.contrib.auth.models +import django.contrib.auth.validators +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('auth', '0012_alter_user_first_name_max_length'), + ] + + operations = [ + migrations.CreateModel( + name='MwmblUser', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('password', models.CharField(max_length=128, verbose_name='password')), + ('last_login', models.DateTimeField(blank=True, null=True, verbose_name='last login')), + ('is_superuser', models.BooleanField(default=False, help_text='Designates that this user has all permissions without explicitly assigning them.', verbose_name='superuser status')), + ('username', models.CharField(error_messages={'unique': 'A user with that username already exists.'}, help_text='Required. 150 characters or fewer. Letters, digits and @/./+/-/_ only.', max_length=150, unique=True, validators=[django.contrib.auth.validators.UnicodeUsernameValidator()], verbose_name='username')), + ('first_name', models.CharField(blank=True, max_length=150, verbose_name='first name')), + ('last_name', models.CharField(blank=True, max_length=150, verbose_name='last name')), + ('email', models.EmailField(blank=True, max_length=254, verbose_name='email address')), + ('is_staff', models.BooleanField(default=False, help_text='Designates whether the user can log into this admin site.', verbose_name='staff status')), + ('is_active', models.BooleanField(default=True, help_text='Designates whether this user should be treated as active. Unselect this instead of deleting accounts.', verbose_name='active')), + ('date_joined', models.DateTimeField(default=django.utils.timezone.now, verbose_name='date joined')), + ('groups', models.ManyToManyField(blank=True, help_text='The groups this user belongs to. A user will get all permissions granted to each of their groups.', related_name='user_set', related_query_name='user', to='auth.group', verbose_name='groups')), + ('user_permissions', models.ManyToManyField(blank=True, help_text='Specific permissions for this user.', related_name='user_set', related_query_name='user', to='auth.permission', verbose_name='user permissions')), + ], + options={ + 'verbose_name': 'user', + 'verbose_name_plural': 'users', + 'abstract': False, + }, + managers=[ + ('objects', django.contrib.auth.models.UserManager()), + ], + ), + migrations.CreateModel( + name='UserCuration', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('timestamp', models.DateTimeField()), + ('url', models.CharField(max_length=300)), + ('results', models.JSONField()), + ('curation_type', models.CharField(max_length=20)), + ('curation', models.JSONField()), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + ), + ] diff --git a/mwmbl/migrations/__init__.py b/mwmbl/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mwmbl/models.py b/mwmbl/models.py new file mode 100644 index 0000000..30d6e01 --- /dev/null +++ b/mwmbl/models.py @@ -0,0 +1,15 @@ +from django.db import models +from django.contrib.auth.models import AbstractUser + + +class MwmblUser(AbstractUser): + pass + + +class UserCuration(models.Model): + user = models.ForeignKey(MwmblUser, on_delete=models.CASCADE) + timestamp = models.DateTimeField() + url = models.CharField(max_length=300) + results = models.JSONField() + curation_type = models.CharField(max_length=20) + curation = models.JSONField() diff --git a/mwmbl/platform/curate.py b/mwmbl/platform/curate.py new file mode 100644 index 0000000..92b1ebd --- /dev/null +++ b/mwmbl/platform/curate.py @@ -0,0 +1,82 @@ +import json +from urllib.parse import urljoin, parse_qs + +import requests +from ninja import Router +from ninja.security import django_auth + +from mwmbl.indexer.update_urls import get_datetime_from_timestamp +from mwmbl.models import UserCuration +from mwmbl.platform.data import CurateBegin, CurateMove, CurateDelete, CurateAdd, CurateValidate, Curation +from mwmbl.tinysearchengine.indexer import TinyIndex, Document +from mwmbl.tokenizer import tokenize + + +RESULT_URL = "https://mwmbl.org/?q=" +MAX_CURATED_SCORE = 1_111_111.0 + + +def create_router(index_path: str) -> Router: + router = Router(tags=["user"]) + + @router.post("/begin", auth=django_auth) + def user_begin_curate(request, curate_begin: CurateBegin): + return _curate(request, "curate_begin", curate_begin) + + @router.post("/move", auth=django_auth) + def user_move_result(request, curate_move: Curation[CurateMove]): + return _curate(request, "curate_move", curate_move) + + @router.post("/delete", auth=django_auth) + def user_delete_result(request, curate_delete: Curation[CurateDelete]): + return _curate(request, "curate_delete", curate_delete) + + @router.post("/add", auth=django_auth) + def user_add_result(request, curate_add: Curation[CurateAdd]): + return _curate(request, "curate_add", curate_add) + + @router.post("/validate", auth=django_auth) + def user_add_result(request, curate_validate: Curation[CurateValidate]): + return _curate(request, "curate_validate", curate_validate) + + def _curate(request, curation_type: str, curation: Curation): + user_curation = UserCuration( + user=request.user, + timestamp=get_datetime_from_timestamp(curation.timestamp), + url=curation.url, + results=curation.dict()["results"], + curation_type=curation_type, + curation=curation.curation.dict(), + ) + user_curation.save() + + with TinyIndex(Document, index_path, 'w') as indexer: + query_string = parse_qs(curation.url) + if len(query_string) > 1: + raise ValueError(f"Should be one query string in the URL: {curation.url}") + + queries = next(iter(query_string.values())) + if len(queries) > 1: + raise ValueError(f"Should be one query value in the URL: {curation.url}") + + query = queries[0] + print("Query", query) + tokens = tokenize(query) + print("Tokens", tokens) + term = " ".join(tokens) + print("Key", term) + + documents = [ + Document(result.title, result.url, result.extract, MAX_CURATED_SCORE - i, term, result.curated) + for i, result in enumerate(curation.results) + ] + page_index = indexer.get_key_page_index(term) + print("Page index", page_index) + print("Storing documents", documents) + indexer.store_in_page(page_index, documents) + + return {"curation": "ok"} + + return router + + diff --git a/mwmbl/platform/data.py b/mwmbl/platform/data.py new file mode 100644 index 0000000..1f0b0c1 --- /dev/null +++ b/mwmbl/platform/data.py @@ -0,0 +1,45 @@ +from datetime import datetime +from typing import TypeVar, Generic + +from ninja import Schema + + +class Result(Schema): + url: str + title: str + extract: str + curated: bool + + +class CurateBegin(Schema): + pass + + +class CurateMove(Schema): + old_index: int + new_index: int + + +class CurateDelete(Schema): + delete_index: int + + +class CurateAdd(Schema): + insert_index: int + url: str + + +class CurateValidate(Schema): + validate_index: int + is_validated: bool + + +T = TypeVar('T', CurateBegin, CurateAdd, CurateDelete, CurateMove, CurateValidate) + + +class Curation(Schema, Generic[T]): + timestamp: int + url: str + results: list[Result] + curation: T + diff --git a/mwmbl/platform/user.py b/mwmbl/platform/user.py deleted file mode 100644 index bbdcb0e..0000000 --- a/mwmbl/platform/user.py +++ /dev/null @@ -1,190 +0,0 @@ -import json -import os -from typing import TypeVar, Generic -from urllib.parse import urljoin, parse_qs - -import requests -from fastapi import APIRouter, Response -from pydantic import BaseModel - -from mwmbl.tinysearchengine.indexer import TinyIndex, Document -from mwmbl.tokenizer import tokenize - - -LEMMY_URL = os.environ["LEMMY_URL"] -RESULT_URL = "https://mwmbl.org/?q=" -MAX_CURATED_SCORE = 1_111_111.0 - - -class Register(BaseModel): - username: str - email: str - password: str - password_verify: str - - -class Login(BaseModel): - username_or_email: str - password: str - - -class Result(BaseModel): - url: str - title: str - extract: str - curated: bool - - -class BeginCurate(BaseModel): - auth: str - url: str - results: list[Result] - - -class CurateMove(BaseModel): - old_index: int - new_index: int - - -class CurateDelete(BaseModel): - delete_index: int - - -class CurateAdd(BaseModel): - insert_index: int - url: str - - -class CurateValidate(BaseModel): - validate_index: int - is_validated: bool - - -T = TypeVar('T', CurateAdd, CurateDelete, CurateMove, CurateValidate) - - -class Curation(BaseModel, Generic[T]): - auth: str - curation_id: int - url: str - results: list[Result] - curation: T - - -def create_router(index_path: str) -> APIRouter: - router = APIRouter(prefix="/user", tags=["user"]) - - # TODO: reinstate - # community_id = get_community_id() - community_id = 0 - - @router.post("/register") - def user_register(register: Register) -> Response: - lemmy_register = { - "username": register.username, - "email": register.email, - "password": register.password, - "password_verify": register.password_verify, - "answer": "not applicable", - "captcha_answer": None, - "captcha_uuid": None, - "honeypot": None, - "show_nsfw": False, - } - request = requests.post(urljoin(LEMMY_URL, "api/v3/user/register"), json=lemmy_register) - if request.status_code != 200: - return Response(content=request.content, status_code=request.status_code, media_type="text/json") - - @router.post("/login") - def user_login(login: Login) -> Response: - request = requests.post(urljoin(LEMMY_URL, "api/v3/user/login"), json=login.dict()) - return Response(content=request.content, status_code=request.status_code, media_type="text/json") - - @router.post("/curation/begin") - def user_begin_curate(begin_curate: BeginCurate): - results = begin_curate.dict()["results"] - body = json.dumps({"original_results": results}, indent=2) - create_post = { - "auth": begin_curate.auth, - "body": body, - "community_id": community_id, - "honeypot": None, - "language_id": None, - "name": begin_curate.url, - "nsfw": None, - "url": begin_curate.url, - } - request = requests.post(urljoin(LEMMY_URL, "api/v3/post"), json=create_post) - if request.status_code != 200: - return Response(content=request.content, status_code=request.status_code, media_type="text/json") - data = request.json() - curation_id = data["post_view"]["post"]["id"] - return {"curation_id": curation_id} - - @router.post("/curation/move") - def user_move_result(curate_move: Curation[CurateMove]): - return _curate("curate_move", curate_move) - - @router.post("/curation/delete") - def user_delete_result(curate_delete: Curation[CurateDelete]): - return _curate("curate_delete", curate_delete) - - @router.post("/curation/add") - def user_add_result(curate_add: Curation[CurateAdd]): - return _curate("curate_add", curate_add) - - @router.post("/curation/validate") - def user_add_result(curate_validate: Curation[CurateValidate]): - return _curate("curate_validate", curate_validate) - - def _curate(curation_type: str, curation: Curation): - content = json.dumps({ - "curation_type": curation_type, - "curation": curation.curation.dict(), - }, indent=2) - create_comment = { - "auth": curation.auth, - "content": json.dumps(content, indent=2), - "form_id": None, - "language_id": None, - "parent_id": None, - "post_id": curation.curation_id, - } - request = requests.post(urljoin(LEMMY_URL, "api/v3/comment"), json=create_comment) - - with TinyIndex(Document, index_path, 'w') as indexer: - query_string = parse_qs(curation.url) - if len(query_string) > 1: - raise ValueError(f"Should be one query string in the URL: {curation.url}") - - queries = next(iter(query_string.values())) - if len(queries) > 1: - raise ValueError(f"Should be one query value in the URL: {curation.url}") - - query = queries[0] - print("Query", query) - tokens = tokenize(query) - print("Tokens", tokens) - term = " ".join(tokens) - print("Key", term) - - documents = [ - Document(result.title, result.url, result.extract, MAX_CURATED_SCORE - i, term, result.curated) - for i, result in enumerate(curation.results) - ] - page_index = indexer.get_key_page_index(term) - print("Page index", page_index) - print("Storing documents", documents) - indexer.store_in_page(page_index, documents) - - return Response(content=request.content, status_code=request.status_code, media_type="text/json") - - return router - - -def get_community_id() -> str: - request = requests.get(urljoin(LEMMY_URL, "api/v3/community?name=main")) - community = request.json() - return community["community_view"]["community"]["id"] - - diff --git a/mwmbl/settings_common.py b/mwmbl/settings_common.py index 14a1e5a..e223c1b 100644 --- a/mwmbl/settings_common.py +++ b/mwmbl/settings_common.py @@ -119,7 +119,6 @@ USE_TZ = True STATIC_URL = 'static/' STATICFILES_DIRS = [str(Path(__file__).parent.parent / "front-end" / "dist")] -print("Static files", STATICFILES_DIRS) # Default primary key field type # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field @@ -134,5 +133,10 @@ AUTHENTICATION_BACKENDS = [ 'allauth.account.auth_backends.AuthenticationBackend', ] + +AUTH_USER_MODEL = "mwmbl.MwmblUser" + + ACCOUNT_EMAIL_REQUIRED = True ACCOUNT_EMAIL_VERIFICATION = "mandatory" +