Merge branch 'master' into mwmbl-package

This commit is contained in:
nitred 2021-12-29 00:25:37 +01:00
commit be40a15b27
5 changed files with 1316 additions and 45 deletions

155
.gitignore vendored
View file

@ -1,3 +1,158 @@
./data
.idea
*~
### Python .gitignore source: https://github.com/github/gitignore/commit/3b6d9b05997558c7d1b376429a153e11610b8195
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

View file

@ -3,8 +3,11 @@ html {
background: #dcdced;
}
body {
font-size: 1.2rem;
}
p {
font-size: 25px;
width: 100%;
white-space: nowrap;
overflow: hidden;
@ -19,11 +22,12 @@ div {
.url {
margin-top: 0px;
font-size: 20px;
font-size: 1rem;
}
#container {
width: 1024px;
.container {
width: 100%;
max-width: 1024px;
margin: 0 auto;
}
@ -34,7 +38,7 @@ div {
outline: none;
font-size: 50px;
font-size: inherit;
border: 2px solid #ccc;
border-width: 4px;

View file

@ -1,7 +1,8 @@
<html>
<head>
<meta name="referrer" content="no-referrer">
<title>Stoatally Different</title>
<title>Stoatally Different</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="/index.css" rel="stylesheet">
<link rel="search"
type="application/opensearchdescription+xml"
@ -10,7 +11,7 @@
<script src="/index.js"></script>
</head>
<body>
<div id="container">
<div class="container">
<form autocomplete="off" id="search-form">
<input type="search" id="search" name="s" value="" autofocus/>
</form>

1149
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -11,9 +11,43 @@ zstandard = "^0.16.0"
mmh3 = "^3.0.0"
fastapi = "^0.70.1"
uvicorn = "^0.16.0"
# Optional dependencies do not get installed by default. Look under tool.poetry.extras section
# to see which extras to use.
botocore = {version= "==1.23.20", optional = true}
boto3 = {version= "==1.20.20", optional = true}
ujson = {version= "==4.3.0", optional = true}
warcio = {version= "==1.7.4", optional = true}
idna = {version= "==3.3", optional = true}
beautifulsoup4 = {version= "==4.10.0", optional = true}
lxml = {version= "==4.6.4", optional = true}
jusText = {version= "==3.0.0", optional = true}
langdetect = {version= "==1.0.9", optional = true}
pyarrow = {version= "==6.0.0", optional = true}
pyspark = {version= "==3.2.0", optional = true}
Levenshtein = {version= "==0.16.0", optional = true}
# en-core-web-sm requires a compatible version of spacy
spacy = {version= "==3.2.1", optional = true}
en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0.tar.gz", optional = true}
# [tool.poetry.dependencies.en_core_web_sm]
# url= "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"
[tool.poetry.extras]
indexer = [
"botocore",
"boto3",
"ujson",
"warcio",
"idna",
"beautifulsoup4",
"lxml",
"jusText",
"langdetect",
"pyarrow",
"pyspark",
"Levenshtein",
# en-core-web-sm requires a compatible version of spacy
"spacy",
"en-core-web-sm",
]
[tool.poetry.dev-dependencies]
# botocore = "^1.23.20"