commit
617666e3b7
31 changed files with 516 additions and 934 deletions
|
@ -39,4 +39,5 @@ COPY data /app/data
|
|||
COPY config /app/config
|
||||
|
||||
# Using the mwmbl-tinysearchengine binary/entrypoint which comes packaged with mwmbl
|
||||
# TODO: fix the arguments for the recent changes
|
||||
CMD ["/venv/bin/mwmbl-tinysearchengine", "--config", "config/tinysearchengine.yaml"]
|
||||
|
|
|
@ -7,7 +7,7 @@ import json
|
|||
from collections import defaultdict, Counter
|
||||
from urllib.parse import urlparse
|
||||
|
||||
CRAWL_GLOB = "../../data/mwmbl/b2/*/*/*/*/*/*.json.gz"
|
||||
from mwmbl.indexer.paths import CRAWL_GLOB
|
||||
|
||||
|
||||
def get_urls():
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
"""
|
||||
Make a curl script for testing performance
|
||||
"""
|
||||
import os
|
||||
from itertools import islice
|
||||
from urllib.parse import quote
|
||||
|
||||
from mwmbl.indexer.paths import DATA_DIR
|
||||
from mwmbl.indexer.wiki import get_wiki_titles_and_urls
|
||||
|
||||
URL_TEMPLATE = "http://localhost:8000/complete?q={}"
|
||||
CURL_FILE = os.path.join(DATA_DIR, "urls.curl")
|
||||
|
||||
|
||||
def get_urls():
|
||||
titles_and_urls = get_wiki_titles_and_urls()
|
||||
for title, url in islice(titles_and_urls, 100):
|
||||
query = quote(title.lower())
|
||||
yield URL_TEMPLATE.format(query)
|
||||
|
||||
|
||||
def run():
|
||||
with open(CURL_FILE, 'wt') as output_file:
|
||||
for url in get_urls():
|
||||
output_file.write(f'url="{url}"\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
|
@ -1,120 +0,0 @@
|
|||
"""
|
||||
Test the performance of the search in terms of compression and speed.
|
||||
"""
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
from spacy.lang.en import English
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from mwmbl.tinysearchengine import create_app
|
||||
from mwmbl.indexer.fsqueue import ZstdJsonSerializer
|
||||
from mwmbl.indexer.index import index_titles_urls_and_extracts
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, TinyIndexer, Document
|
||||
from mwmbl.indexer.paths import TEST_INDEX_PATH, DATA_DIR, TEST_TERMS_PATH
|
||||
|
||||
NUM_DOCUMENTS = 30000
|
||||
NUM_PAGES_FOR_STATS = 10
|
||||
TEST_PAGE_SIZE = 512
|
||||
TEST_NUM_PAGES = 1024
|
||||
TEST_DATA_PATH = os.path.join(DATA_DIR, 'test-urls.zstd')
|
||||
RECALL_AT_K = 3
|
||||
|
||||
NUM_QUERY_CHARS = 10
|
||||
|
||||
|
||||
def get_test_pages():
|
||||
serializer = ZstdJsonSerializer()
|
||||
with open(TEST_DATA_PATH, 'rb') as data_file:
|
||||
data = serializer.deserialize(data_file.read())
|
||||
return [(row['title'], row['url']) for row in data if row['title'] is not None]
|
||||
|
||||
|
||||
def query_test():
|
||||
titles_and_urls = get_test_pages()
|
||||
print(f"Got {len(titles_and_urls)} titles and URLs")
|
||||
tiny_index = TinyIndex(Document, TEST_INDEX_PATH, TEST_NUM_PAGES, TEST_PAGE_SIZE)
|
||||
|
||||
app = create_app.create()
|
||||
client = TestClient(app)
|
||||
|
||||
start = datetime.now()
|
||||
hits = 0
|
||||
count = 0
|
||||
for title, url in titles_and_urls:
|
||||
query = title[:NUM_QUERY_CHARS]
|
||||
result = client.get('/complete', params={'q': query})
|
||||
assert result.status_code == 200
|
||||
data = result.json()
|
||||
|
||||
hit = False
|
||||
if data:
|
||||
for result in data[1][:RECALL_AT_K]:
|
||||
if url in result:
|
||||
hit = True
|
||||
break
|
||||
|
||||
if hit:
|
||||
hits += 1
|
||||
else:
|
||||
print("Miss", data, title, url, sep='\n')
|
||||
|
||||
count += 1
|
||||
|
||||
end = datetime.now()
|
||||
print(f"Hits: {hits} out of {count}")
|
||||
print(f"Recall at {RECALL_AT_K}: {hits/count}")
|
||||
print("Query time:", (end - start).total_seconds() / NUM_DOCUMENTS)
|
||||
|
||||
|
||||
def page_stats(indexer: TinyIndexer):
|
||||
pages_and_sizes = []
|
||||
for i in range(TEST_NUM_PAGES):
|
||||
page = indexer.get_page(i)
|
||||
if page is not None:
|
||||
pages_and_sizes.append((len(page), page))
|
||||
big_page_sizes, big_pages = zip(*sorted(pages_and_sizes, reverse=True)[:NUM_PAGES_FOR_STATS])
|
||||
return np.mean(big_page_sizes), np.std(big_page_sizes), big_pages
|
||||
|
||||
|
||||
def performance_test():
|
||||
nlp = English()
|
||||
try:
|
||||
os.remove(TEST_INDEX_PATH)
|
||||
except FileNotFoundError:
|
||||
print("No test index found, creating")
|
||||
with TinyIndexer(Document, TEST_INDEX_PATH, TEST_NUM_PAGES, TEST_PAGE_SIZE) as indexer:
|
||||
titles_and_urls = get_test_pages()
|
||||
|
||||
start_time = datetime.now()
|
||||
index_titles_urls_and_extracts(indexer, nlp, titles_and_urls, TEST_TERMS_PATH)
|
||||
stop_time = datetime.now()
|
||||
|
||||
index_time = (stop_time - start_time).total_seconds()
|
||||
index_size = os.path.getsize(TEST_INDEX_PATH)
|
||||
|
||||
page_size_mean, page_size_std, big_pages = page_stats(indexer)
|
||||
|
||||
print("Indexed pages:", NUM_DOCUMENTS)
|
||||
print("Index time:", index_time)
|
||||
print("Index size:", index_size)
|
||||
print("Mean docs per page:", page_size_mean)
|
||||
print("Std err of docs per page:", page_size_std)
|
||||
print("Big pages")
|
||||
print_pages(big_pages)
|
||||
# print("Num tokens", indexer.get_num_tokens())
|
||||
|
||||
query_test()
|
||||
|
||||
|
||||
def print_pages(pages):
|
||||
for page in pages:
|
||||
print("Page", page)
|
||||
for title, url in page:
|
||||
print(title, url)
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
performance_test()
|
10
mwmbl/indexer/batch.py
Normal file
10
mwmbl/indexer/batch.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
from itertools import islice
|
||||
from typing import Iterator
|
||||
|
||||
|
||||
def grouper(n: int, iterator: Iterator):
|
||||
while True:
|
||||
chunk = tuple(islice(iterator, n))
|
||||
if not chunk:
|
||||
return
|
||||
yield chunk
|
|
@ -1,15 +0,0 @@
|
|||
#!/bin/bash -xe
|
||||
|
||||
sudo python3 -m pip uninstall numpy -y
|
||||
sudo python3 -m pip uninstall numpy -y
|
||||
sudo python3 -m pip uninstall numpy -y
|
||||
|
||||
sudo python3 -m pip install boto3==1.19.7 botocore==1.22.7 jusText==3.0.0 langdetect==1.0.9 \
|
||||
lxml==4.6.3 numpy==1.21.3 pandas==1.2.5 pyarrow==6.0.0 spacy==2.3.5 \
|
||||
warcio==1.7.4 zstandard==0.16.0
|
||||
|
||||
sudo python3 -m spacy download en_core_web_sm
|
||||
|
||||
echo "========================"
|
||||
echo "Normal python pip freeze"
|
||||
python3 -m pip freeze
|
|
@ -1,45 +0,0 @@
|
|||
"""
|
||||
Crawl the web
|
||||
"""
|
||||
import gzip
|
||||
import hashlib
|
||||
import os
|
||||
import sys
|
||||
from traceback import print_tb, print_exc
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
|
||||
from .paths import DATA_DIR, HN_TOP_PATH, CRAWL_PREFIX
|
||||
|
||||
|
||||
def crawl():
|
||||
data = pd.read_csv(HN_TOP_PATH)
|
||||
|
||||
for url in data['url']:
|
||||
filename = hashlib.md5(url.encode('utf8')).hexdigest()
|
||||
path = os.path.join(DATA_DIR, f"{CRAWL_PREFIX}{filename}.html.gz")
|
||||
if os.path.isfile(path):
|
||||
print("Path already exists, skipping", url)
|
||||
continue
|
||||
|
||||
print("Fetching", url)
|
||||
try:
|
||||
html = fetch(url)
|
||||
except Exception:
|
||||
print_exc(file=sys.stderr)
|
||||
print("Unable to fetch", url)
|
||||
continue
|
||||
|
||||
with gzip.open(path, 'wt') as output:
|
||||
output.write(url + '\n')
|
||||
output.write(html)
|
||||
|
||||
|
||||
def fetch(url):
|
||||
page_data = requests.get(url, timeout=10)
|
||||
return page_data.text
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
crawl()
|
42
mwmbl/indexer/dedupe.py
Normal file
42
mwmbl/indexer/dedupe.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
"""
|
||||
Dedupe pages that have been crawled more than once and prepare them for indexing
|
||||
"""
|
||||
import glob
|
||||
import gzip
|
||||
import json
|
||||
|
||||
from mwmbl.indexer.batch import grouper
|
||||
from mwmbl.indexer.fsqueue import FSQueue, GzipJsonBlobSerializer
|
||||
from mwmbl.indexer.paths import CRAWL_GLOB, TINYSEARCH_DATA_DIR
|
||||
|
||||
BATCH_SIZE = 100
|
||||
|
||||
|
||||
def get_deduped_pages():
|
||||
seen_urls = set()
|
||||
for path in sorted(glob.glob(CRAWL_GLOB), reverse=True):
|
||||
data = json.load(gzip.open(path))
|
||||
for item in data['items']:
|
||||
url = item['url']
|
||||
if url in seen_urls:
|
||||
continue
|
||||
|
||||
seen_urls.add(url)
|
||||
yield item
|
||||
|
||||
|
||||
def queue_deduped_items(deduped_pages):
|
||||
output_queue = FSQueue(TINYSEARCH_DATA_DIR, 'mwmbl-search-items', GzipJsonBlobSerializer())
|
||||
|
||||
for batch in grouper(BATCH_SIZE, deduped_pages):
|
||||
data = {'items': batch}
|
||||
output_queue.put(data)
|
||||
|
||||
|
||||
def run():
|
||||
deduped_pages = get_deduped_pages()
|
||||
queue_deduped_items(deduped_pages)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
|
@ -1,20 +0,0 @@
|
|||
cat hn-top-domains-filtered.py extract.py > runextract.py
|
||||
|
||||
aws s3 cp runextract.py s3://tinysearch/code/
|
||||
aws s3 cp bootstrap.sh s3://tinysearch/code/
|
||||
|
||||
|
||||
aws emr create-cluster \
|
||||
--applications Name=Spark Name=Zeppelin \
|
||||
--ec2-attributes '{"InstanceProfile":"EMR_EC2_DefaultRole","SubnetId":"subnet-03c33360c68f73a48"}' \
|
||||
--service-role EMR_DefaultRole \
|
||||
--enable-debugging \
|
||||
--release-label emr-5.33.1 \
|
||||
--log-uri 's3n://tinysearch/pyspark-logs/' \
|
||||
--bootstrap-actions '{"Path": "s3://tinysearch/code/bootstrap.sh"}' \
|
||||
--steps '[{"Args":["spark-submit","--deploy-mode","cluster","s3n://tinysearch/code/runextract.py"],"Type":"CUSTOM_JAR","ActionOnFailure":"CONTINUE","Jar":"command-runner.jar","Properties":"","Name":"Spark application"}]' \
|
||||
--name 'TinySearch' \
|
||||
--instance-groups '[{"InstanceCount":2,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":1}]},"InstanceGroupType":"CORE","InstanceType":"m4.large","Name":"Core Instance Group"},{"InstanceCount":1,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":1}]},"InstanceGroupType":"MASTER","InstanceType":"m4.large","Name":"Master Instance Group"}]' \
|
||||
--configurations '[{"Classification":"spark","Properties":{}}]' \
|
||||
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION --region us-east-1 \
|
||||
--auto-terminate
|
|
@ -9,7 +9,7 @@ import bs4
|
|||
import requests
|
||||
|
||||
from mwmbl.indexer.fsqueue import FSQueue, ZstdJsonSerializer
|
||||
from mwmbl.indexer.paths import DATA_DIR, DOMAINS_QUEUE_NAME, DOMAINS_TITLES_QUEUE_NAME
|
||||
from mwmbl.indexer.paths import TINYSEARCH_DATA_DIR, DOMAINS_QUEUE_NAME, DOMAINS_TITLES_QUEUE_NAME
|
||||
|
||||
NUM_PROCESSES = 10
|
||||
|
||||
|
@ -33,8 +33,8 @@ def get_redirect_no_cookies(url, max_redirects=5):
|
|||
|
||||
|
||||
def get_domain_titles():
|
||||
domains_queue = FSQueue(DATA_DIR, DOMAINS_QUEUE_NAME, ZstdJsonSerializer())
|
||||
titles_queue = FSQueue(DATA_DIR, DOMAINS_TITLES_QUEUE_NAME, ZstdJsonSerializer())
|
||||
domains_queue = FSQueue(TINYSEARCH_DATA_DIR, DOMAINS_QUEUE_NAME, ZstdJsonSerializer())
|
||||
titles_queue = FSQueue(TINYSEARCH_DATA_DIR, DOMAINS_TITLES_QUEUE_NAME, ZstdJsonSerializer())
|
||||
while True:
|
||||
items_id, items = domains_queue.get()
|
||||
titles = retrieve_titles(items)
|
||||
|
|
|
@ -5,7 +5,7 @@ import csv
|
|||
import gzip
|
||||
|
||||
from mwmbl.indexer.fsqueue import FSQueue, ZstdJsonSerializer
|
||||
from mwmbl.indexer.paths import DOMAINS_PATH, DOMAINS_QUEUE_NAME, DATA_DIR
|
||||
from mwmbl.indexer.paths import DOMAINS_PATH, DOMAINS_QUEUE_NAME, TINYSEARCH_DATA_DIR
|
||||
|
||||
BATCH_SIZE = 250
|
||||
|
||||
|
@ -18,7 +18,7 @@ def get_domains():
|
|||
|
||||
|
||||
def queue_domains():
|
||||
queue = FSQueue(DATA_DIR, DOMAINS_QUEUE_NAME, ZstdJsonSerializer())
|
||||
queue = FSQueue(TINYSEARCH_DATA_DIR, DOMAINS_QUEUE_NAME, ZstdJsonSerializer())
|
||||
queued = 0
|
||||
batch = []
|
||||
for rank, domain in get_domains():
|
||||
|
|
|
@ -1,73 +0,0 @@
|
|||
"""
|
||||
Extract content from HTML files and store it as compressed JSON
|
||||
"""
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from pyspark.sql import SparkSession
|
||||
from pyspark.sql.functions import col
|
||||
from pyspark.sql.types import StructType, StructField, StringType, LongType, IntegerType
|
||||
|
||||
RECORDS_PATH = 's3://tinysearch/outputs/records'
|
||||
OUTPUT_PATH = 's3://tinysearch/outputs/index'
|
||||
|
||||
|
||||
index_schema = StructType([
|
||||
StructField("term_hash", LongType(), False),
|
||||
StructField("data", StringType(), False),
|
||||
StructField("top", StringType(), False),
|
||||
])
|
||||
|
||||
|
||||
output_schema = StructType([
|
||||
StructField("uri", StringType(), False),
|
||||
StructField("title", StringType(), False),
|
||||
StructField("extract", StringType(), False),
|
||||
])
|
||||
|
||||
|
||||
record_schema = StructType([
|
||||
StructField("url", StringType(), False),
|
||||
StructField("warc_filename", StringType(), False),
|
||||
StructField("warc_record_offset", IntegerType(), False),
|
||||
StructField("warc_record_length", IntegerType(), False),
|
||||
])
|
||||
|
||||
|
||||
spark = SparkSession \
|
||||
.builder \
|
||||
.appName("Python Spark SQL basic example") \
|
||||
.config("spark.some.config.option", "some-value") \
|
||||
.getOrCreate()
|
||||
|
||||
|
||||
def run():
|
||||
# sqlc = SQLContext(sparkContext=spark)
|
||||
|
||||
df = spark.read.load('s3://commoncrawl/cc-index/table/cc-main/warc/')
|
||||
df.createOrReplaceTempView('ccindex')
|
||||
sqldf = spark.sql('''SELECT url, warc_filename, warc_record_offset,
|
||||
warc_record_length
|
||||
FROM ccindex
|
||||
WHERE crawl = 'CC-MAIN-2021-43'
|
||||
AND subset = 'warc'
|
||||
''')
|
||||
sqldf = sqldf.sample(fraction=0.01)
|
||||
sqldf = sqldf.filter(col('url_host_name').isin(list(DOMAINS.keys())))
|
||||
# print("Got rows", sqldf.take(10))
|
||||
# print("Num rows", sqldf.count())
|
||||
sqldf.write.option('compression', 'gzip').format('json').mode('overwrite').save(RECORDS_PATH)
|
||||
|
||||
# warc_recs = sqldf.select("url", "warc_filename", "warc_record_offset", "warc_record_length").rdd
|
||||
# rdd = warc_recs.mapPartitions(fetch_process_warc_records)
|
||||
# output = sqlc.createDataFrame(rdd, schema=output_schema)
|
||||
# output.write.option('compression', 'gzip').format('json').mode('overwrite').save(OUTPUT_PATH)
|
||||
|
||||
|
||||
def get_domain_rating(url):
|
||||
domain = urlparse(url).netloc
|
||||
return DOMAINS.get(domain)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
|
@ -1,63 +0,0 @@
|
|||
import gzip
|
||||
import json
|
||||
import os
|
||||
from glob import glob
|
||||
from multiprocessing import Process, Lock
|
||||
|
||||
from .extract_process import fetch_process_warc_records
|
||||
from .fsqueue import FSQueue, GzipJsonRowSerializer
|
||||
from .paths import DATA_DIR
|
||||
|
||||
ARCHIVE_INFO_GLOB = 'outputs/records/*.gz'
|
||||
|
||||
NUM_PROCESSES = 8
|
||||
|
||||
|
||||
def get_records():
|
||||
for path in glob(ARCHIVE_INFO_GLOB):
|
||||
with gzip.open(path) as data_file:
|
||||
for line in data_file:
|
||||
yield json.loads(line)
|
||||
|
||||
|
||||
def process(record):
|
||||
print("Record", record)
|
||||
return list(fetch_process_warc_records([record]))
|
||||
|
||||
|
||||
def run(lock: Lock):
|
||||
input_queue = FSQueue(DATA_DIR, 'records', GzipJsonRowSerializer())
|
||||
output_queue = FSQueue(DATA_DIR, 'search-items', GzipJsonRowSerializer())
|
||||
|
||||
while True:
|
||||
with lock:
|
||||
queue_item = input_queue.get()
|
||||
if queue_item is None:
|
||||
print("All finished, stopping:", os.getpid())
|
||||
break
|
||||
item_id, records = queue_item
|
||||
print("Got item: ", item_id, os.getpid())
|
||||
search_items = []
|
||||
for record in records:
|
||||
search_items += list(fetch_process_warc_records([record]))
|
||||
if search_items:
|
||||
output_queue.put(search_items)
|
||||
input_queue.done(item_id)
|
||||
|
||||
|
||||
def run_multiprocessing():
|
||||
input_queue = FSQueue(DATA_DIR, 'records', GzipJsonRowSerializer())
|
||||
input_queue.unlock_all()
|
||||
processes = []
|
||||
lock = Lock()
|
||||
for i in range(NUM_PROCESSES):
|
||||
new_process = Process(target=run, args=(lock,))
|
||||
new_process.start()
|
||||
processes.append(new_process)
|
||||
|
||||
for running_process in processes:
|
||||
running_process.join()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_multiprocessing()
|
|
@ -1,137 +0,0 @@
|
|||
from io import BytesIO
|
||||
|
||||
import boto3
|
||||
from justext import get_stoplist
|
||||
from justext.core import LENGTH_LOW_DEFAULT, LENGTH_HIGH_DEFAULT, STOPWORDS_LOW_DEFAULT, STOPWORDS_HIGH_DEFAULT, \
|
||||
MAX_LINK_DENSITY_DEFAULT, MAX_HEADING_DISTANCE_DEFAULT, NO_HEADINGS_DEFAULT, DEFAULT_ENCODING, DEFAULT_ENC_ERRORS, \
|
||||
preprocessor, html_to_dom, ParagraphMaker, classify_paragraphs, revise_paragraph_classification
|
||||
from langdetect import detect
|
||||
from lxml.etree import ParserError
|
||||
from warcio import ArchiveIterator
|
||||
|
||||
MAX_URI_LENGTH = 150
|
||||
NUM_CHARS_TO_ANALYSE = 1000
|
||||
NUM_TITLE_CHARS = 65
|
||||
NUM_EXTRACT_CHARS = 155
|
||||
|
||||
|
||||
def fetch_process_warc_records(rows):
|
||||
"""Fetch all WARC records defined by filenames and offsets in rows,
|
||||
parse the records and the contained HTML, split the text into words
|
||||
and emit pairs <word, 1>"""
|
||||
s3client = boto3.client('s3')
|
||||
for row in rows:
|
||||
warc_path = row['warc_filename']
|
||||
offset = int(row['warc_record_offset'])
|
||||
length = int(row['warc_record_length'])
|
||||
rangereq = 'bytes={}-{}'.format(offset, (offset+length-1))
|
||||
response = s3client.get_object(Bucket='commoncrawl',
|
||||
Key=warc_path,
|
||||
Range=rangereq)
|
||||
record_stream = BytesIO(response["Body"].read())
|
||||
for record in ArchiveIterator(record_stream):
|
||||
for result in process_record(record):
|
||||
yield result
|
||||
|
||||
|
||||
def is_html(record):
|
||||
"""Return true if (detected) MIME type of a record is HTML"""
|
||||
html_types = ['text/html', 'application/xhtml+xml']
|
||||
if (('WARC-Identified-Payload-Type' in record.rec_headers) and
|
||||
(record.rec_headers['WARC-Identified-Payload-Type'] in
|
||||
html_types)):
|
||||
return True
|
||||
content_type = record.http_headers.get_header('content-type', None)
|
||||
if content_type:
|
||||
for html_type in html_types:
|
||||
if html_type in content_type:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def justext(html_text, stoplist, length_low=LENGTH_LOW_DEFAULT,
|
||||
length_high=LENGTH_HIGH_DEFAULT, stopwords_low=STOPWORDS_LOW_DEFAULT,
|
||||
stopwords_high=STOPWORDS_HIGH_DEFAULT, max_link_density=MAX_LINK_DENSITY_DEFAULT,
|
||||
max_heading_distance=MAX_HEADING_DISTANCE_DEFAULT, no_headings=NO_HEADINGS_DEFAULT,
|
||||
encoding=None, default_encoding=DEFAULT_ENCODING,
|
||||
enc_errors=DEFAULT_ENC_ERRORS, preprocessor=preprocessor):
|
||||
"""
|
||||
Converts an HTML page into a list of classified paragraphs. Each paragraph
|
||||
is represented as instance of class ˙˙justext.paragraph.Paragraph˙˙.
|
||||
"""
|
||||
dom = html_to_dom(html_text, default_encoding, encoding, enc_errors)
|
||||
print("Parsed HTML")
|
||||
|
||||
try:
|
||||
title = dom.find(".//title").text
|
||||
except AttributeError:
|
||||
title = None
|
||||
|
||||
preprocessed_dom = preprocessor(dom)
|
||||
|
||||
paragraphs = ParagraphMaker.make_paragraphs(preprocessed_dom)
|
||||
print("Got paragraphs")
|
||||
|
||||
classify_paragraphs(paragraphs, stoplist, length_low, length_high,
|
||||
stopwords_low, stopwords_high, max_link_density, no_headings)
|
||||
revise_paragraph_classification(paragraphs, max_heading_distance)
|
||||
|
||||
return paragraphs, title
|
||||
|
||||
|
||||
def process_record(record):
|
||||
# print("Record", record.format, record.rec_type, record.rec_headers, record.raw_stream,
|
||||
# record.http_headers, record.content_type, record.length)
|
||||
|
||||
if record.rec_type != 'response':
|
||||
# skip over WARC request or metadata records
|
||||
return
|
||||
if not is_html(record):
|
||||
return
|
||||
|
||||
uri = record.rec_headers.get_header('WARC-Target-URI')
|
||||
if len(uri) > MAX_URI_LENGTH:
|
||||
print("URI too long", len(uri))
|
||||
return
|
||||
|
||||
# rating = get_domain_rating(uri)
|
||||
# print("Rating", rating)
|
||||
# if rating is None:
|
||||
# return
|
||||
|
||||
content = record.content_stream().read().strip()
|
||||
# print("Content", uri, content[:100])
|
||||
|
||||
if not content:
|
||||
return
|
||||
|
||||
try:
|
||||
all_paragraphs, full_title = justext(content, get_stoplist('English'))
|
||||
except UnicodeDecodeError:
|
||||
print("Unable to decode unicode")
|
||||
return
|
||||
except ParserError:
|
||||
print("Unable to parse")
|
||||
return
|
||||
|
||||
if full_title is None:
|
||||
print("Missing title")
|
||||
return
|
||||
|
||||
title = full_title[:NUM_TITLE_CHARS] + '…' \
|
||||
if len(full_title) > NUM_TITLE_CHARS else full_title
|
||||
|
||||
text = '\n'.join([p.text for p in all_paragraphs
|
||||
if not p.is_boilerplate])[:NUM_CHARS_TO_ANALYSE]
|
||||
print("Paragraphs", text)
|
||||
|
||||
if len(text) < NUM_EXTRACT_CHARS:
|
||||
return
|
||||
|
||||
language = detect(text)
|
||||
print("Got language", language)
|
||||
if language != 'en':
|
||||
return
|
||||
|
||||
extract = text[:NUM_EXTRACT_CHARS]
|
||||
yield uri, title, extract
|
|
@ -7,7 +7,7 @@ import json
|
|||
import os
|
||||
from abc import ABC
|
||||
from enum import Enum
|
||||
from typing import Union
|
||||
from typing import Union, Any
|
||||
from uuid import uuid4
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -59,10 +59,10 @@ class GzipJsonRowSerializer(Serializer):
|
|||
|
||||
|
||||
class GzipJsonBlobSerializer(Serializer):
|
||||
def serialize(self, items: list[object]) -> bytes:
|
||||
raise NotImplementedError("Serializer not needed - blob is generated by browser extension")
|
||||
def serialize(self, items: Any) -> bytes:
|
||||
return gzip.compress(json.dumps(items).encode('utf8'))
|
||||
|
||||
def deserialize(self, serialized_items: bytes) -> list[object]:
|
||||
def deserialize(self, serialized_items: bytes) -> Any:
|
||||
data = gzip.decompress(serialized_items).decode('utf8')
|
||||
return json.loads(data)
|
||||
|
||||
|
|
|
@ -2,16 +2,15 @@
|
|||
Create a search index
|
||||
"""
|
||||
from collections import Counter
|
||||
from itertools import islice
|
||||
from typing import Iterator, Iterable
|
||||
from typing import Iterable
|
||||
from urllib.parse import unquote
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# NUM_PAGES = 8192
|
||||
# PAGE_SIZE = 512
|
||||
from mwmbl.tinysearchengine.indexer import Document, TokenizedDocument, TinyIndex
|
||||
|
||||
DEFAULT_SCORE = 0
|
||||
|
||||
HTTP_START = 'http://'
|
||||
HTTPS_START = 'https://'
|
||||
BATCH_SIZE = 100
|
||||
|
@ -44,7 +43,7 @@ def prepare_url_for_tokenizing(url: str):
|
|||
return url
|
||||
|
||||
|
||||
def get_pages(nlp, titles_urls_and_extracts) -> Iterable[TokenizedDocument]:
|
||||
def get_pages(nlp, titles_urls_and_extracts, link_counts) -> Iterable[TokenizedDocument]:
|
||||
for i, (title_cleaned, url, extract) in enumerate(titles_urls_and_extracts):
|
||||
title_tokens = tokenize(nlp, title_cleaned)
|
||||
prepared_url = prepare_url_for_tokenizing(unquote(url))
|
||||
|
@ -52,26 +51,19 @@ def get_pages(nlp, titles_urls_and_extracts) -> Iterable[TokenizedDocument]:
|
|||
extract_tokens = tokenize(nlp, extract)
|
||||
print("Extract tokens", extract_tokens)
|
||||
tokens = title_tokens | url_tokens | extract_tokens
|
||||
yield TokenizedDocument(tokens=list(tokens), url=url, title=title_cleaned, extract=extract)
|
||||
score = link_counts.get(url, DEFAULT_SCORE)
|
||||
yield TokenizedDocument(tokens=list(tokens), url=url, title=title_cleaned, extract=extract, score=score)
|
||||
|
||||
if i % 1000 == 0:
|
||||
print("Processed", i)
|
||||
|
||||
|
||||
def grouper(n: int, iterator: Iterator):
|
||||
while True:
|
||||
chunk = tuple(islice(iterator, n))
|
||||
if not chunk:
|
||||
return
|
||||
yield chunk
|
||||
|
||||
|
||||
def index_titles_urls_and_extracts(indexer: TinyIndex, nlp, titles_urls_and_extracts, terms_path):
|
||||
def index_titles_urls_and_extracts(indexer: TinyIndex, nlp, titles_urls_and_extracts, link_counts, terms_path):
|
||||
terms = Counter()
|
||||
pages = get_pages(nlp, titles_urls_and_extracts)
|
||||
pages = get_pages(nlp, titles_urls_and_extracts, link_counts)
|
||||
for page in pages:
|
||||
for token in page.tokens:
|
||||
indexer.index(token, Document(url=page.url, title=page.title, extract=page.extract))
|
||||
indexer.index(token, Document(url=page.url, title=page.title, extract=page.extract, score=page.score))
|
||||
terms.update([t.lower() for t in page.tokens])
|
||||
|
||||
term_df = pd.DataFrame({
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
"""
|
||||
Index data crawled through the Mwmbl crawler.
|
||||
"""
|
||||
import json
|
||||
from logging import getLogger
|
||||
|
||||
import spacy
|
||||
|
||||
from mwmbl.indexer.fsqueue import FSQueue, GzipJsonBlobSerializer, FSQueueError
|
||||
from mwmbl.indexer.index import index_titles_urls_and_extracts
|
||||
from mwmbl.indexer.paths import INDEX_PATH, MWMBL_CRAWL_TERMS_PATH, DATA_DIR
|
||||
from mwmbl.indexer.paths import INDEX_PATH, MWMBL_CRAWL_TERMS_PATH, TINYSEARCH_DATA_DIR, LINK_COUNT_PATH
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, NUM_PAGES, PAGE_SIZE
|
||||
|
||||
|
||||
|
@ -16,16 +17,16 @@ logger = getLogger(__name__)
|
|||
|
||||
def index_mwmbl_crawl_data():
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
titles_urls_and_extracts = get_mwmbl_crawl_titles_urls_and_extracts()
|
||||
link_counts = json.load(open(LINK_COUNT_PATH))
|
||||
|
||||
TinyIndex.create(Document, INDEX_PATH, NUM_PAGES, PAGE_SIZE)
|
||||
|
||||
with TinyIndex(Document, INDEX_PATH, 'w') as indexer:
|
||||
titles_urls_and_extracts = get_mwmbl_crawl_titles_urls_and_extracts()
|
||||
index_titles_urls_and_extracts(indexer, nlp, titles_urls_and_extracts, MWMBL_CRAWL_TERMS_PATH)
|
||||
index_titles_urls_and_extracts(indexer, nlp, titles_urls_and_extracts, link_counts, MWMBL_CRAWL_TERMS_PATH)
|
||||
|
||||
|
||||
def get_mwmbl_crawl_titles_urls_and_extracts():
|
||||
input_queue = FSQueue(DATA_DIR, 'mwmbl-search-items', GzipJsonBlobSerializer())
|
||||
input_queue = FSQueue(TINYSEARCH_DATA_DIR, 'mwmbl-search-items', GzipJsonBlobSerializer())
|
||||
input_queue.unlock_all()
|
||||
while True:
|
||||
try:
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
import gzip
|
||||
from glob import glob
|
||||
|
||||
import bs4
|
||||
from spacy.lang.en import English
|
||||
|
||||
from .index import tokenize
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndexer, NUM_PAGES, PAGE_SIZE
|
||||
from .paths import INDEX_PATH, CRAWL_GLOB
|
||||
|
||||
|
||||
def run():
|
||||
# TODO: item_factory argument is unfilled.
|
||||
indexer = TinyIndexer(INDEX_PATH, NUM_PAGES, PAGE_SIZE)
|
||||
indexer.create()
|
||||
nlp = English()
|
||||
for path in glob(CRAWL_GLOB):
|
||||
print("Path", path)
|
||||
with gzip.open(path, 'rt') as html_file:
|
||||
url = html_file.readline().strip()
|
||||
content = html_file.read()
|
||||
|
||||
if indexer.document_indexed(url):
|
||||
print("Page exists, skipping", url)
|
||||
continue
|
||||
|
||||
cleaned_text = clean(content)
|
||||
try:
|
||||
title = bs4.BeautifulSoup(content, features="lxml").find('title').string
|
||||
except AttributeError:
|
||||
title = cleaned_text[:80]
|
||||
tokens = tokenize(nlp, cleaned_text)
|
||||
print("URL", url)
|
||||
print("Tokens", tokens)
|
||||
print("Title", title)
|
||||
indexer.index(tokens, url, title)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
|
||||
def clean(content):
|
||||
text = justext.justext(content, justext.get_stoplist("English"))
|
||||
pars = [par.text for par in text if not par.is_boilerplate]
|
||||
cleaned_text = ' '.join(pars)
|
||||
return cleaned_text
|
|
@ -1,31 +0,0 @@
|
|||
"""
|
||||
Index items in the file-system queue
|
||||
"""
|
||||
from spacy.lang.en import English
|
||||
|
||||
from .fsqueue import FSQueue, ZstdJsonSerializer
|
||||
from .index import index_titles_urls_and_extracts
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndexer, NUM_PAGES, PAGE_SIZE
|
||||
from .paths import DATA_DIR, DOMAINS_TITLES_QUEUE_NAME, INDEX_PATH
|
||||
|
||||
|
||||
def get_queue_items():
|
||||
titles_queue = FSQueue(DATA_DIR, DOMAINS_TITLES_QUEUE_NAME, ZstdJsonSerializer())
|
||||
titles_queue.unlock_all()
|
||||
while True:
|
||||
items_id, items = titles_queue.get()
|
||||
for item in items:
|
||||
if item['title'] is None:
|
||||
continue
|
||||
yield item['title'], item['url']
|
||||
|
||||
|
||||
def index_queue_items():
|
||||
nlp = English()
|
||||
with TinyIndexer(INDEX_PATH, NUM_PAGES, PAGE_SIZE) as indexer:
|
||||
titles_and_urls = get_queue_items()
|
||||
index_titles_urls_and_extracts(indexer, nlp, titles_and_urls)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
index_queue_items()
|
|
@ -1,49 +0,0 @@
|
|||
"""
|
||||
Index data downloaded from Common Crawl
|
||||
"""
|
||||
import logging
|
||||
import sys
|
||||
from logging import getLogger
|
||||
|
||||
import spacy
|
||||
|
||||
from .fsqueue import FSQueue, GzipJsonRowSerializer, FSQueueError
|
||||
from .index import index_titles_urls_and_extracts
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndexer, NUM_PAGES, PAGE_SIZE, Document
|
||||
from .paths import INDEX_PATH, DATA_DIR, COMMON_CRAWL_TERMS_PATH
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
def index_common_craw_data():
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
|
||||
with TinyIndexer(Document, INDEX_PATH, NUM_PAGES, PAGE_SIZE) as indexer:
|
||||
titles_urls_and_extracts = get_common_crawl_titles_urls_and_extracts()
|
||||
index_titles_urls_and_extracts(indexer, nlp, titles_urls_and_extracts, COMMON_CRAWL_TERMS_PATH)
|
||||
|
||||
|
||||
def get_common_crawl_titles_urls_and_extracts():
|
||||
input_queue = FSQueue(DATA_DIR, 'search-items', GzipJsonRowSerializer())
|
||||
input_queue.unlock_all()
|
||||
while True:
|
||||
try:
|
||||
next_item = input_queue.get()
|
||||
except FSQueueError as e:
|
||||
logger.exception(f'Error with item {e.item_id}')
|
||||
input_queue.error(e.item_id)
|
||||
continue
|
||||
if next_item is None:
|
||||
logger.info('Not more items to process, stopping')
|
||||
break
|
||||
item_id, items = next_item
|
||||
logger.info(f'Processing item {item_id}')
|
||||
for url, title, extract in items:
|
||||
yield title, url, extract
|
||||
input_queue.done(item_id)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
index_common_craw_data()
|
|
@ -7,7 +7,7 @@ import json
|
|||
from collections import defaultdict
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from analyse.analyse_crawled_domains import CRAWL_GLOB
|
||||
from mwmbl.indexer.paths import CRAWL_GLOB, LINK_COUNT_PATH
|
||||
|
||||
|
||||
def get_urls():
|
||||
|
@ -30,9 +30,9 @@ def collect_links(urls):
|
|||
def run():
|
||||
url_links = get_urls()
|
||||
collected = collect_links(url_links)
|
||||
top = sorted(collected.items(), key=lambda x: len(x[1]), reverse=True)[:1000]
|
||||
for url, items in top:
|
||||
print("URL", url, len(items))
|
||||
link_counts = {url: len(links) for url, links in collected.items()}
|
||||
with open(LINK_COUNT_PATH, 'w') as output_file:
|
||||
json.dump(link_counts, output_file, indent=2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
|
@ -3,24 +3,26 @@ from pathlib import Path
|
|||
|
||||
HOME = os.getenv('HOME')
|
||||
|
||||
DATA_DIR = Path(os.environ['HOME']) / 'data' / 'tinysearch'
|
||||
COMMON_CRAWL_TERMS_PATH = DATA_DIR / 'common-craw-terms.csv'
|
||||
DATA_DIR = Path(os.environ['HOME']) / 'data'
|
||||
TINYSEARCH_DATA_DIR = DATA_DIR / 'tinysearch'
|
||||
COMMON_CRAWL_TERMS_PATH = TINYSEARCH_DATA_DIR / 'common-craw-terms.csv'
|
||||
|
||||
HN_TOP_PATH = os.path.join(DATA_DIR, 'hn-top.csv')
|
||||
CRAWL_PREFIX = 'crawl_'
|
||||
CRAWL_GLOB = os.path.join(DATA_DIR, f"{CRAWL_PREFIX}*")
|
||||
TEST_INDEX_PATH = os.path.join(DATA_DIR, 'index-test.tinysearch')
|
||||
TEST_TERMS_PATH = os.path.join(DATA_DIR, 'index-terms.csv')
|
||||
WIKI_DATA_PATH = os.path.join(DATA_DIR, 'enwiki-20210301-pages-articles1.xml-p1p41242.bz2')
|
||||
WIKI_TITLES_PATH = os.path.join(DATA_DIR, 'abstract-titles-sorted.txt.gz')
|
||||
TEST_INDEX_PATH = os.path.join(TINYSEARCH_DATA_DIR, 'index-test.tinysearch')
|
||||
TEST_TERMS_PATH = os.path.join(TINYSEARCH_DATA_DIR, 'index-terms.csv')
|
||||
WIKI_DATA_PATH = os.path.join(TINYSEARCH_DATA_DIR, 'enwiki-20210301-pages-articles1.xml-p1p41242.bz2')
|
||||
WIKI_TITLES_PATH = os.path.join(TINYSEARCH_DATA_DIR, 'abstract-titles-sorted.txt.gz')
|
||||
|
||||
URLS_PATH = DATA_DIR / 'urls.sqlite3'
|
||||
URLS_PATH = TINYSEARCH_DATA_DIR / 'urls.sqlite3'
|
||||
DOMAINS_QUEUE_NAME = 'domains-queue-fs'
|
||||
DOMAINS_TITLES_QUEUE_NAME = 'domains-title-queue-fs'
|
||||
DOMAINS_PATH = os.path.join(DATA_DIR, 'top10milliondomains.csv.gz')
|
||||
DOMAINS_PATH = os.path.join(TINYSEARCH_DATA_DIR, 'top10milliondomains.csv.gz')
|
||||
|
||||
LOCAL_DATA_DIR = Path(__file__).parent.parent.parent / 'data'
|
||||
INDEX_PATH = LOCAL_DATA_DIR / 'index.tinysearch'
|
||||
MWMBL_CRAWL_TERMS_PATH = LOCAL_DATA_DIR / 'mwmbl-crawl-terms.csv'
|
||||
|
||||
TOP_DOMAINS_JSON_PATH = DATA_DIR / 'hn-top-domains.json'
|
||||
TOP_DOMAINS_JSON_PATH = TINYSEARCH_DATA_DIR / 'hn-top-domains.json'
|
||||
|
||||
MWMBL_DATA_DIR = DATA_DIR / "mwmbl"
|
||||
CRAWL_GLOB = str(MWMBL_DATA_DIR / "b2") + "/*/*/*/*/*/*.json.gz"
|
||||
LINK_COUNT_PATH = MWMBL_DATA_DIR / 'crawl-counts.json'
|
||||
|
|
|
@ -1,41 +0,0 @@
|
|||
"""
|
||||
Index Wikipedia
|
||||
"""
|
||||
import gzip
|
||||
import html
|
||||
from urllib.parse import quote
|
||||
|
||||
from spacy.lang.en import English
|
||||
|
||||
from .index import index_titles_urls_and_extracts
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndexer, NUM_PAGES, PAGE_SIZE
|
||||
from .paths import WIKI_TITLES_PATH, INDEX_PATH
|
||||
|
||||
TEXT_TAGS = ['mediawiki', 'page', 'revision', 'text']
|
||||
TITLE_START = '<title>Wikipedia: '
|
||||
TITLE_END = '</title>\n'
|
||||
|
||||
|
||||
def index_wiki():
|
||||
nlp = English()
|
||||
with TinyIndexer(INDEX_PATH, NUM_PAGES, PAGE_SIZE) as indexer:
|
||||
titles_and_urls = get_wiki_titles_and_urls()
|
||||
index_titles_urls_and_extracts(indexer, nlp, titles_and_urls)
|
||||
|
||||
|
||||
def get_wiki_titles_and_urls():
|
||||
start_len = len(TITLE_START)
|
||||
end_len = len(TITLE_END)
|
||||
with gzip.open(WIKI_TITLES_PATH, 'rt') as wiki_titles_file:
|
||||
wiki_titles_file.readline()
|
||||
for raw_title in wiki_titles_file:
|
||||
assert raw_title.startswith(TITLE_START)
|
||||
assert raw_title.endswith(TITLE_END)
|
||||
title = raw_title[start_len:-end_len]
|
||||
unescaped_title = html.unescape(title)
|
||||
url = 'https://en.wikipedia.org/wiki/' + quote(unescaped_title.replace(' ', '_'))
|
||||
yield unescaped_title, url
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
index_wiki()
|
|
@ -7,7 +7,7 @@ import uvicorn
|
|||
from mwmbl.tinysearchengine import create_app
|
||||
from mwmbl.tinysearchengine.completer import Completer
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
||||
from mwmbl.tinysearchengine.rank import Ranker
|
||||
from mwmbl.tinysearchengine.rank import HeuristicRanker
|
||||
|
||||
logging.basicConfig()
|
||||
|
||||
|
@ -37,7 +37,7 @@ def main():
|
|||
completer = Completer(terms)
|
||||
|
||||
with TinyIndex(item_factory=Document, index_path=args.index) as tiny_index:
|
||||
ranker = Ranker(tiny_index, completer)
|
||||
ranker = HeuristicRanker(tiny_index, completer)
|
||||
|
||||
# Initialize FastApi instance
|
||||
app = create_app.create(ranker)
|
||||
|
|
|
@ -10,7 +10,7 @@ from starlette.middleware.cors import CORSMiddleware
|
|||
from mwmbl.tinysearchengine.completer import Completer
|
||||
from mwmbl.tinysearchengine.hn_top_domains_filtered import DOMAINS
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
||||
from mwmbl.tinysearchengine.rank import Ranker
|
||||
from mwmbl.tinysearchengine.rank import HeuristicRanker
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
@ -18,7 +18,7 @@ logger = getLogger(__name__)
|
|||
SCORE_THRESHOLD = 0.25
|
||||
|
||||
|
||||
def create(ranker: Ranker):
|
||||
def create(ranker: HeuristicRanker):
|
||||
app = FastAPI()
|
||||
|
||||
# Allow CORS requests from any site
|
||||
|
|
|
@ -12,7 +12,7 @@ VERSION = 1
|
|||
METADATA_CONSTANT = b'mwmbl-tiny-search'
|
||||
METADATA_SIZE = 4096
|
||||
|
||||
NUM_PAGES = 76800
|
||||
NUM_PAGES = 128000
|
||||
PAGE_SIZE = 4096
|
||||
|
||||
|
||||
|
@ -21,6 +21,7 @@ class Document:
|
|||
title: str
|
||||
url: str
|
||||
extract: str
|
||||
score: float
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
53
mwmbl/tinysearchengine/ltr.py
Normal file
53
mwmbl/tinysearchengine/ltr.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
"""
|
||||
Learning to rank predictor
|
||||
"""
|
||||
from pandas import DataFrame, Series
|
||||
from sklearn.base import BaseEstimator, RegressorMixin, TransformerMixin
|
||||
|
||||
from mwmbl.tinysearchengine.rank import get_match_features, get_domain_score, score_match
|
||||
|
||||
|
||||
class ThresholdPredictor(BaseEstimator, RegressorMixin):
|
||||
def __init__(self, threshold: float, classifier: BaseEstimator):
|
||||
self.threshold = threshold
|
||||
self.classifier = classifier
|
||||
|
||||
def fit(self, X, y) -> BaseEstimator:
|
||||
y_thresholded = y > self.threshold
|
||||
self.classifier.fit(X, y_thresholded)
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
predictions = self.classifier.predict_proba(X)
|
||||
if predictions.shape[1] == 2:
|
||||
return predictions[:, 1]
|
||||
return predictions
|
||||
|
||||
|
||||
def get_match_features_as_series(item: Series):
|
||||
terms = item['query'].lower().split()
|
||||
features = {}
|
||||
for part in ['title', 'extract', 'url']:
|
||||
last_match_char, match_length, total_possible_match_length = get_match_features(terms, item[part], True, False)
|
||||
features[f'last_match_char_{part}'] = last_match_char
|
||||
features[f'match_length_{part}'] = match_length
|
||||
features[f'total_possible_match_length_{part}'] = total_possible_match_length
|
||||
# features[f'score_{part}'] = score_match(last_match_char, match_length, total_possible_match_length)
|
||||
|
||||
features['num_terms'] = len(terms)
|
||||
features['num_chars'] = len(' '.join(terms))
|
||||
features['domain_score'] = get_domain_score(item['url'])
|
||||
features['item_score'] = item['score']
|
||||
return Series(features)
|
||||
|
||||
|
||||
class FeatureExtractor(BaseEstimator, TransformerMixin):
|
||||
def fit(self, X, y=None):
|
||||
return self
|
||||
|
||||
def transform(self, X: DataFrame, y=None):
|
||||
features = X.apply(get_match_features_as_series, axis=1)
|
||||
print("Features", features.columns)
|
||||
return features
|
||||
|
||||
|
35
mwmbl/tinysearchengine/ltr_rank.py
Normal file
35
mwmbl/tinysearchengine/ltr_rank.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
import numpy as np
|
||||
from pandas import DataFrame
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
from mwmbl.tinysearchengine.completer import Completer
|
||||
from mwmbl.tinysearchengine.indexer import Document, TinyIndex
|
||||
from mwmbl.tinysearchengine.rank import Ranker, order_results
|
||||
|
||||
|
||||
class LTRRanker(Ranker):
|
||||
def __init__(self, model: BaseEstimator, tiny_index: TinyIndex, completer: Completer):
|
||||
super().__init__(tiny_index, completer)
|
||||
self.model = model
|
||||
self.top_n = 20
|
||||
|
||||
def order_results(self, terms, pages: list[Document], is_complete):
|
||||
if len(pages) == 0:
|
||||
return []
|
||||
|
||||
top_pages = order_results(terms, pages, is_complete)[:self.top_n]
|
||||
|
||||
query = ' '.join(terms)
|
||||
data = {
|
||||
'query': [query] * len(top_pages),
|
||||
'url': [page.url for page in top_pages],
|
||||
'title': [page.title for page in top_pages],
|
||||
'extract': [page.extract for page in top_pages],
|
||||
'score': [page.score for page in top_pages],
|
||||
}
|
||||
|
||||
dataframe = DataFrame(data)
|
||||
print("Ordering results", dataframe)
|
||||
predictions = self.model.predict(dataframe)
|
||||
indexes = np.argsort(predictions)[::-1]
|
||||
return [top_pages[i] for i in indexes]
|
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
from abc import abstractmethod
|
||||
from logging import getLogger
|
||||
from operator import itemgetter
|
||||
from pathlib import Path
|
||||
|
@ -14,27 +15,49 @@ from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
|||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
SCORE_THRESHOLD = 0.25
|
||||
SCORE_THRESHOLD = 0.0
|
||||
|
||||
|
||||
def _get_query_regex(terms, is_complete):
|
||||
def _get_query_regex(terms, is_complete, is_url):
|
||||
if not terms:
|
||||
return ''
|
||||
|
||||
word_sep = r'\b' if is_url else ''
|
||||
if is_complete:
|
||||
term_patterns = [rf'\b{term}\b' for term in terms]
|
||||
term_patterns = [rf'{word_sep}{re.escape(term)}{word_sep}' for term in terms]
|
||||
else:
|
||||
term_patterns = [rf'\b{term}\b' for term in terms[:-1]] + [rf'\b{terms[-1]}']
|
||||
term_patterns = [rf'{word_sep}{re.escape(term)}{word_sep}' for term in terms[:-1]] + [
|
||||
rf'{word_sep}{re.escape(terms[-1])}']
|
||||
pattern = '|'.join(term_patterns)
|
||||
return pattern
|
||||
|
||||
|
||||
def _score_result(terms, result: Document, is_complete: bool):
|
||||
domain = urlparse(result.url).netloc
|
||||
domain_score = DOMAINS.get(domain, 0.0)
|
||||
def _score_result(terms, result: Document, is_complete: bool, max_score: float):
|
||||
domain_score = get_domain_score(result.url)
|
||||
|
||||
result_string = f"{result.title.strip()} {result.extract.strip()}"
|
||||
query_regex = _get_query_regex(terms, is_complete)
|
||||
last_match_char, match_length, total_possible_match_length = get_match_features(
|
||||
terms, result_string, is_complete, False)
|
||||
|
||||
match_score = score_match(last_match_char, match_length, total_possible_match_length)
|
||||
score = 0.01 * domain_score + 0.99 * match_score
|
||||
# score = (0.1 + 0.9*match_score) * (0.1 + 0.9*(result.score / max_score))
|
||||
# score = 0.01 * match_score + 0.99 * (result.score / max_score)
|
||||
return score
|
||||
|
||||
|
||||
def score_match(last_match_char, match_length, total_possible_match_length):
|
||||
return (match_length + 1. / last_match_char) / (total_possible_match_length + 1)
|
||||
|
||||
|
||||
def get_domain_score(url):
|
||||
domain = urlparse(url).netloc
|
||||
domain_score = DOMAINS.get(domain, 0.0)
|
||||
return domain_score
|
||||
|
||||
|
||||
def get_match_features(terms, result_string, is_complete, is_url):
|
||||
query_regex = _get_query_regex(terms, is_complete, is_url)
|
||||
matches = list(re.finditer(query_regex, result_string, flags=re.IGNORECASE))
|
||||
match_strings = {x.group(0).lower() for x in matches}
|
||||
match_length = sum(len(x) for x in match_strings)
|
||||
|
@ -48,12 +71,15 @@ def _score_result(terms, result: Document, is_complete: bool):
|
|||
seen_matches.add(value)
|
||||
|
||||
total_possible_match_length = sum(len(x) for x in terms)
|
||||
score = 0.1*domain_score + 0.9*(match_length + 1./last_match_char) / (total_possible_match_length + 1)
|
||||
return score
|
||||
return last_match_char, match_length, total_possible_match_length
|
||||
|
||||
|
||||
def _order_results(terms: list[str], results: list[Document], is_complete: bool):
|
||||
results_and_scores = [(_score_result(terms, result, is_complete), result) for result in results]
|
||||
def order_results(terms: list[str], results: list[Document], is_complete: bool) -> list[Document]:
|
||||
if len(results) == 0:
|
||||
return []
|
||||
|
||||
max_score = max(result.score for result in results)
|
||||
results_and_scores = [(_score_result(terms, result, is_complete, max_score), result) for result in results]
|
||||
ordered_results = sorted(results_and_scores, key=itemgetter(0), reverse=True)
|
||||
filtered_results = [result for score, result in ordered_results if score > SCORE_THRESHOLD]
|
||||
return filtered_results
|
||||
|
@ -64,11 +90,15 @@ class Ranker:
|
|||
self.tiny_index = tiny_index
|
||||
self.completer = completer
|
||||
|
||||
@abstractmethod
|
||||
def order_results(self, terms, pages, is_complete):
|
||||
pass
|
||||
|
||||
def search(self, s: str):
|
||||
results, terms = self._get_results(s)
|
||||
results, terms = self.get_results(s)
|
||||
|
||||
is_complete = s.endswith(' ')
|
||||
pattern = _get_query_regex(terms, is_complete)
|
||||
pattern = _get_query_regex(terms, is_complete, False)
|
||||
formatted_results = []
|
||||
for result in results:
|
||||
formatted_result = {}
|
||||
|
@ -89,14 +119,14 @@ class Ranker:
|
|||
return formatted_results
|
||||
|
||||
def complete(self, q: str):
|
||||
ordered_results, terms = self._get_results(q)
|
||||
ordered_results, terms = self.get_results(q)
|
||||
results = [item.title.replace("\n", "") + ' — ' +
|
||||
item.url.replace("\n", "") for item in ordered_results]
|
||||
if len(results) == 0:
|
||||
return []
|
||||
return [q, results]
|
||||
|
||||
def _get_results(self, q):
|
||||
def get_results(self, q):
|
||||
terms = [x.lower() for x in q.replace('.', ' ').split()]
|
||||
is_complete = q.endswith(' ')
|
||||
if len(terms) > 0 and not is_complete:
|
||||
|
@ -115,5 +145,11 @@ class Ranker:
|
|||
pages.append(item)
|
||||
seen_items.add(item.title)
|
||||
|
||||
ordered_results = _order_results(terms, pages, is_complete)
|
||||
ordered_results = self.order_results(terms, pages, is_complete)
|
||||
return ordered_results, terms
|
||||
|
||||
|
||||
class HeuristicRanker(Ranker):
|
||||
def order_results(self, terms, pages, is_complete):
|
||||
return order_results(terms, pages, is_complete)
|
||||
|
||||
|
|
459
poetry.lock
generated
459
poetry.lock
generated
|
@ -65,7 +65,7 @@ lxml = ["lxml"]
|
|||
|
||||
[[package]]
|
||||
name = "blis"
|
||||
version = "0.7.5"
|
||||
version = "0.7.6"
|
||||
description = "The Blis BLAS-like linear algebra library, as a self-contained C-extension."
|
||||
category = "main"
|
||||
optional = true
|
||||
|
@ -135,7 +135,7 @@ pycparser = "*"
|
|||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
version = "2.0.11"
|
||||
version = "2.0.12"
|
||||
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
||||
category = "main"
|
||||
optional = true
|
||||
|
@ -146,7 +146,7 @@ unicode_backport = ["unicodedata2"]
|
|||
|
||||
[[package]]
|
||||
name = "click"
|
||||
version = "8.0.3"
|
||||
version = "8.0.4"
|
||||
description = "Composable command line interface toolkit"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -185,7 +185,6 @@ spacy = ">=3.2.0,<3.3.0"
|
|||
[package.source]
|
||||
type = "url"
|
||||
url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0.tar.gz"
|
||||
|
||||
[[package]]
|
||||
name = "fastapi"
|
||||
version = "0.70.1"
|
||||
|
@ -250,6 +249,14 @@ category = "main"
|
|||
optional = true
|
||||
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
|
||||
[[package]]
|
||||
name = "joblib"
|
||||
version = "1.1.0"
|
||||
description = "Lightweight pipelining with Python functions"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "justext"
|
||||
version = "3.0.0"
|
||||
|
@ -310,11 +317,11 @@ source = ["Cython (>=0.29.7)"]
|
|||
|
||||
[[package]]
|
||||
name = "markupsafe"
|
||||
version = "2.0.1"
|
||||
version = "2.1.1"
|
||||
description = "Safely add untrusted strings to HTML/XML markup."
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[[package]]
|
||||
name = "mmh3"
|
||||
|
@ -334,11 +341,11 @@ python-versions = "*"
|
|||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "1.21.1"
|
||||
version = "1.22.3"
|
||||
description = "NumPy is the fundamental package for array computing with Python."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.8"
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
|
@ -353,7 +360,7 @@ pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
|
|||
|
||||
[[package]]
|
||||
name = "pandas"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
description = "Powerful data structures for data analysis, time series, and statistics"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -489,11 +496,11 @@ sql = ["pandas (>=0.23.2)", "pyarrow (>=1.0.0)"]
|
|||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "7.0.1"
|
||||
version = "7.1.1"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
|
||||
|
@ -521,7 +528,7 @@ six = ">=1.5"
|
|||
|
||||
[[package]]
|
||||
name = "pytz"
|
||||
version = "2021.3"
|
||||
version = "2022.1"
|
||||
description = "World timezone definitions, modern and historical"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -566,7 +573,7 @@ use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
|
|||
|
||||
[[package]]
|
||||
name = "s3transfer"
|
||||
version = "0.5.0"
|
||||
version = "0.5.2"
|
||||
description = "An Amazon S3 Transfer Manager"
|
||||
category = "main"
|
||||
optional = true
|
||||
|
@ -578,6 +585,37 @@ botocore = ">=1.12.36,<2.0a.0"
|
|||
[package.extras]
|
||||
crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "scikit-learn"
|
||||
version = "1.0.2"
|
||||
description = "A set of python modules for machine learning and data mining"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
joblib = ">=0.11"
|
||||
numpy = ">=1.14.6"
|
||||
scipy = ">=1.1.0"
|
||||
threadpoolctl = ">=2.0.0"
|
||||
|
||||
[package.extras]
|
||||
benchmark = ["matplotlib (>=2.2.3)", "pandas (>=0.25.0)", "memory-profiler (>=0.57.0)"]
|
||||
docs = ["matplotlib (>=2.2.3)", "scikit-image (>=0.14.5)", "pandas (>=0.25.0)", "seaborn (>=0.9.0)", "memory-profiler (>=0.57.0)", "sphinx (>=4.0.1)", "sphinx-gallery (>=0.7.0)", "numpydoc (>=1.0.0)", "Pillow (>=7.1.2)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"]
|
||||
examples = ["matplotlib (>=2.2.3)", "scikit-image (>=0.14.5)", "pandas (>=0.25.0)", "seaborn (>=0.9.0)"]
|
||||
tests = ["matplotlib (>=2.2.3)", "scikit-image (>=0.14.5)", "pandas (>=0.25.0)", "pytest (>=5.0.1)", "pytest-cov (>=2.9.0)", "flake8 (>=3.8.2)", "black (>=21.6b0)", "mypy (>=0.770)", "pyamg (>=4.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "scipy"
|
||||
version = "1.8.0"
|
||||
description = "SciPy: Scientific Library for Python"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.8,<3.11"
|
||||
|
||||
[package.dependencies]
|
||||
numpy = ">=1.17.3,<1.25.0"
|
||||
|
||||
[[package]]
|
||||
name = "six"
|
||||
version = "1.16.0"
|
||||
|
@ -672,7 +710,7 @@ transformers = ["spacy-transformers (>=1.1.2,<1.2.0)"]
|
|||
|
||||
[[package]]
|
||||
name = "spacy-legacy"
|
||||
version = "3.0.8"
|
||||
version = "3.0.9"
|
||||
description = "Legacy registered functions for spaCy backwards compatibility"
|
||||
category = "main"
|
||||
optional = true
|
||||
|
@ -716,7 +754,7 @@ full = ["itsdangerous", "jinja2", "python-multipart", "pyyaml", "requests", "gra
|
|||
|
||||
[[package]]
|
||||
name = "thinc"
|
||||
version = "8.0.13"
|
||||
version = "8.0.15"
|
||||
description = "A refreshing functional take on deep learning, compatible with your favorite libraries"
|
||||
category = "main"
|
||||
optional = true
|
||||
|
@ -726,7 +764,7 @@ python-versions = ">=3.6"
|
|||
blis = ">=0.4.0,<0.8.0"
|
||||
catalogue = ">=2.0.4,<2.1.0"
|
||||
cymem = ">=2.0.2,<2.1.0"
|
||||
murmurhash = ">=0.28.0,<1.1.0"
|
||||
murmurhash = ">=1.0.2,<1.1.0"
|
||||
numpy = ">=1.15.0"
|
||||
preshed = ">=3.0.2,<3.1.0"
|
||||
pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.9.0"
|
||||
|
@ -743,6 +781,7 @@ cuda111 = ["cupy-cuda111 (>=5.0.0b4)"]
|
|||
cuda112 = ["cupy-cuda112 (>=5.0.0b4)"]
|
||||
cuda113 = ["cupy-cuda113 (>=5.0.0b4)"]
|
||||
cuda114 = ["cupy-cuda114 (>=5.0.0b4)"]
|
||||
cuda115 = ["cupy-cuda115 (>=5.0.0b4)"]
|
||||
cuda80 = ["cupy-cuda80 (>=5.0.0b4)"]
|
||||
cuda90 = ["cupy-cuda90 (>=5.0.0b4)"]
|
||||
cuda91 = ["cupy-cuda91 (>=5.0.0b4)"]
|
||||
|
@ -750,7 +789,15 @@ cuda92 = ["cupy-cuda92 (>=5.0.0b4)"]
|
|||
datasets = ["ml-datasets (>=0.2.0,<0.3.0)"]
|
||||
mxnet = ["mxnet (>=1.5.1,<1.6.0)"]
|
||||
tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"]
|
||||
torch = ["torch (>=1.5.0)"]
|
||||
torch = ["torch (>=1.6.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "threadpoolctl"
|
||||
version = "3.1.0"
|
||||
description = "threadpoolctl"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
|
@ -762,7 +809,7 @@ python-versions = ">=3.7"
|
|||
|
||||
[[package]]
|
||||
name = "tqdm"
|
||||
version = "4.62.3"
|
||||
version = "4.63.0"
|
||||
description = "Fast, Extensible Progress Meter"
|
||||
category = "main"
|
||||
optional = true
|
||||
|
@ -795,7 +842,7 @@ test = ["shellingham (>=1.3.0,<2.0.0)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (
|
|||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.0.1"
|
||||
version = "4.1.1"
|
||||
description = "Backported and Experimental Type Hints for Python 3.6+"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -811,14 +858,14 @@ python-versions = ">=3.6"
|
|||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "1.26.8"
|
||||
version = "1.26.9"
|
||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
|
||||
|
||||
[package.extras]
|
||||
brotli = ["brotlipy (>=0.6.0)"]
|
||||
brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"]
|
||||
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||
|
||||
|
@ -876,8 +923,8 @@ indexer = ["botocore", "boto3", "ujson", "warcio", "idna", "beautifulsoup4", "lx
|
|||
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "edb2d4bc50cb09ac5f7ba311d5238eb2deeab1d12f479067cc7239e3232bf6c9"
|
||||
python-versions = ">=3.10,<3.11"
|
||||
content-hash = "be01dba545a5f118cb4d1cd2726c0b9fadbba44684fcd4af2bef2e812e28da93"
|
||||
|
||||
[metadata.files]
|
||||
anyio = [
|
||||
|
@ -901,22 +948,22 @@ beautifulsoup4 = [
|
|||
{file = "beautifulsoup4-4.10.0.tar.gz", hash = "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"},
|
||||
]
|
||||
blis = [
|
||||
{file = "blis-0.7.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5812a7c04561ae7332cf730f57d9f82cbd12c5f86a5bfad66ee244e51d06266d"},
|
||||
{file = "blis-0.7.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eecfce3d8fce61dede7b0ae0dffa461c22072437b6cde85587db0c1aa75b450"},
|
||||
{file = "blis-0.7.5-cp310-cp310-win_amd64.whl", hash = "sha256:0e476931f0d5703a21c77e7f69b8ebdeeea493fc7858a86f627ac2b376a12c8d"},
|
||||
{file = "blis-0.7.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5966ddf3bce84aa7bb09ce4ca059309602fa63280a5d5e5365bb2a294bd5a138"},
|
||||
{file = "blis-0.7.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9034dabce4e42e3a1a7b99cc6de430484c8c369e51556ee8d47a53c085de681"},
|
||||
{file = "blis-0.7.5-cp36-cp36m-win_amd64.whl", hash = "sha256:730952f74adb0fa7dde9f1bc11249d5a64f3a3a9cf7dfa23b189a4b767bdf2d0"},
|
||||
{file = "blis-0.7.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2339cb19594134775bda8b86f23a893828fc7e8d63f09ba9a15f30b2b16c966c"},
|
||||
{file = "blis-0.7.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5023781272e0b2868be2f92017aa6836557990f1ca5ba2af5e9f5a0acf04fd8a"},
|
||||
{file = "blis-0.7.5-cp37-cp37m-win_amd64.whl", hash = "sha256:65ba723821cc57eb4227eb8dd05c57fff23d97f826d4325b316cd8a63aac8d6a"},
|
||||
{file = "blis-0.7.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad4af690c37a5953d3aea660ad89b636bfbb80ca1470995554670ca2143f0cb2"},
|
||||
{file = "blis-0.7.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf11c233ea5c2d30683e7c9641c5dc4cd76ed0f64755ba3321dfb8db39feb316"},
|
||||
{file = "blis-0.7.5-cp38-cp38-win_amd64.whl", hash = "sha256:31401da283ed42905f0fbf2f8b88ea424c6a911482426f84b5b88c54d382e4d1"},
|
||||
{file = "blis-0.7.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0c185979f8f528d634f5548b8cd84ab0366d340c27c039ad3937fab186c1c252"},
|
||||
{file = "blis-0.7.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8345bd04777557ef385e2f2d1f14a19d53b2ea9ca5fe107a2cdc50d7bafb8eb2"},
|
||||
{file = "blis-0.7.5-cp39-cp39-win_amd64.whl", hash = "sha256:66204a19e38986645940c887498c7b5520efb5bbc6526bf1b8a58f7d3eb37da0"},
|
||||
{file = "blis-0.7.5.tar.gz", hash = "sha256:833e01e9eaff4c01aa6e049bbc1e6acb9eca6ee513d7b35b5bf135d49705ad33"},
|
||||
{file = "blis-0.7.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:904532b38e8f93c97ba9639a0462f5a827e9a8e9fb0aaee441cbbf6d847a5bc0"},
|
||||
{file = "blis-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4bf5549f0b54359f2186e4f7d1e75136c4f313e17596f6ce2de601a033d9d44"},
|
||||
{file = "blis-0.7.6-cp310-cp310-win_amd64.whl", hash = "sha256:d0b4f2b76d81f28d1402bf69c775a9dd6ad37058ceb3ffed3da44cd951855cdf"},
|
||||
{file = "blis-0.7.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:58d051f900b0ff4cdfb0b2b3b28fede1d26f7af0cb920f48b89b8185f8d740e2"},
|
||||
{file = "blis-0.7.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7eb12219796d18f65797d1fa402b36a03de974848c05767bc03ba0d72c512d"},
|
||||
{file = "blis-0.7.6-cp36-cp36m-win_amd64.whl", hash = "sha256:f036561d1739787e9d02e4106c340a79a519d635200c13463031f39b6e234c0b"},
|
||||
{file = "blis-0.7.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d1f44badd1d0b1aa1d68e0990e285b3dcf320a6120709318b1cdc9b0204b8ae8"},
|
||||
{file = "blis-0.7.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7027bcdc73a80d313c1423d9b6dc381c7253f08eca7b8453a7a6ba2c49c202f7"},
|
||||
{file = "blis-0.7.6-cp37-cp37m-win_amd64.whl", hash = "sha256:dd416a08f099644bd229667d2acdbfedc50d709f8c88b4d32363eb7ab7962e1b"},
|
||||
{file = "blis-0.7.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dcf2bb2a3223683eee7ee348c647566daadc1642a775f36ab52a8b62e6ad6a3"},
|
||||
{file = "blis-0.7.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66115b899052ded91fedfe3091295eb4da81dc2115e292ab4c5dd97d9e458e75"},
|
||||
{file = "blis-0.7.6-cp38-cp38-win_amd64.whl", hash = "sha256:1ef5b9fd08fe4efb5679d60d9e61f2ca2c36158c90ba01cbf9e46e8be473212f"},
|
||||
{file = "blis-0.7.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:762714b1d6901d628c53a11072db932aeeb01b6df2a394ec240f0a051e4e8e8e"},
|
||||
{file = "blis-0.7.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43789dc60d81692d5db4978d3eba1c9fa02e4d1e9eadad11000244609d924521"},
|
||||
{file = "blis-0.7.6-cp39-cp39-win_amd64.whl", hash = "sha256:6eb5553a9905bbc63eebeba4bf555bbabb47d029aaed8b0a4f0490a199dccba7"},
|
||||
{file = "blis-0.7.6.tar.gz", hash = "sha256:fe97b10f68a1c7b54c0e54beada84e18f4efb68dd40c906fb8748f5114743ec6"},
|
||||
]
|
||||
boto3 = [
|
||||
{file = "boto3-1.20.20-py3-none-any.whl", hash = "sha256:6c173ffaf0604e34d6865edf7a9a71e1b3e79bd441b8b465ca4b2d44f840806d"},
|
||||
|
@ -987,12 +1034,12 @@ cffi = [
|
|||
{file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"},
|
||||
]
|
||||
charset-normalizer = [
|
||||
{file = "charset-normalizer-2.0.11.tar.gz", hash = "sha256:98398a9d69ee80548c762ba991a4728bfc3836768ed226b3945908d1a688371c"},
|
||||
{file = "charset_normalizer-2.0.11-py3-none-any.whl", hash = "sha256:2842d8f5e82a1f6aa437380934d5e1cd4fcf2003b06fed6940769c164a480a45"},
|
||||
{file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
|
||||
{file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
|
||||
]
|
||||
click = [
|
||||
{file = "click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3"},
|
||||
{file = "click-8.0.3.tar.gz", hash = "sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b"},
|
||||
{file = "click-8.0.4-py3-none-any.whl", hash = "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1"},
|
||||
{file = "click-8.0.4.tar.gz", hash = "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"},
|
||||
]
|
||||
colorama = [
|
||||
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
|
||||
|
@ -1041,6 +1088,10 @@ jmespath = [
|
|||
{file = "jmespath-0.10.0-py2.py3-none-any.whl", hash = "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f"},
|
||||
{file = "jmespath-0.10.0.tar.gz", hash = "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9"},
|
||||
]
|
||||
joblib = [
|
||||
{file = "joblib-1.1.0-py2.py3-none-any.whl", hash = "sha256:f21f109b3c7ff9d95f8387f752d0d9c34a02aa2f7060c2135f465da0e5160ff6"},
|
||||
{file = "joblib-1.1.0.tar.gz", hash = "sha256:4158fcecd13733f8be669be0683b96ebdbbd38d23559f54dca7205aea1bf1e35"},
|
||||
]
|
||||
justext = [
|
||||
{file = "jusText-3.0.0-py2.py3-none-any.whl", hash = "sha256:86b48f5b1d99505acd072f5831def6cd3f1306043651c524a1c609e62e3544e4"},
|
||||
{file = "jusText-3.0.0.tar.gz", hash = "sha256:7640e248218795f6be65f6c35fe697325a3280fcb4675d1525bcdff2b86faadf"},
|
||||
|
@ -1170,75 +1221,46 @@ lxml = [
|
|||
{file = "lxml-4.6.4.tar.gz", hash = "sha256:daf9bd1fee31f1c7a5928b3e1059e09a8d683ea58fb3ffc773b6c88cb8d1399c"},
|
||||
]
|
||||
markupsafe = [
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53"},
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38"},
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad"},
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d"},
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646"},
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b"},
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a"},
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a"},
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-win32.whl", hash = "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28"},
|
||||
{file = "MarkupSafe-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"},
|
||||
{file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"},
|
||||
{file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"},
|
||||
{file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"},
|
||||
{file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"},
|
||||
{file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"},
|
||||
{file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"},
|
||||
{file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"},
|
||||
{file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"},
|
||||
{file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"},
|
||||
{file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"},
|
||||
{file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"},
|
||||
{file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"},
|
||||
{file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"},
|
||||
{file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"},
|
||||
{file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"},
|
||||
{file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"},
|
||||
{file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"},
|
||||
]
|
||||
mmh3 = [
|
||||
{file = "mmh3-3.0.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:23912dde2ad4f701926948dd8e79a0e42b000f73962806f153931f52985e1e07"},
|
||||
|
@ -1287,61 +1309,53 @@ murmurhash = [
|
|||
{file = "murmurhash-1.0.6.tar.gz", hash = "sha256:00a5252b569d3f914b5bd0bce72d2efe9c0fb91a9703556ea1b608b141c68f2d"},
|
||||
]
|
||||
numpy = [
|
||||
{file = "numpy-1.21.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50"},
|
||||
{file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a"},
|
||||
{file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062"},
|
||||
{file = "numpy-1.21.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1"},
|
||||
{file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671"},
|
||||
{file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e"},
|
||||
{file = "numpy-1.21.1-cp37-cp37m-win32.whl", hash = "sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172"},
|
||||
{file = "numpy-1.21.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-win32.whl", hash = "sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd"},
|
||||
{file = "numpy-1.21.1-cp38-cp38-win_amd64.whl", hash = "sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214"},
|
||||
{file = "numpy-1.21.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f"},
|
||||
{file = "numpy-1.21.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b"},
|
||||
{file = "numpy-1.21.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac"},
|
||||
{file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1"},
|
||||
{file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1"},
|
||||
{file = "numpy-1.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a"},
|
||||
{file = "numpy-1.21.1-cp39-cp39-win32.whl", hash = "sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2"},
|
||||
{file = "numpy-1.21.1-cp39-cp39-win_amd64.whl", hash = "sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33"},
|
||||
{file = "numpy-1.21.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4"},
|
||||
{file = "numpy-1.21.1.zip", hash = "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd"},
|
||||
{file = "numpy-1.22.3-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75"},
|
||||
{file = "numpy-1.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab"},
|
||||
{file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e"},
|
||||
{file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4"},
|
||||
{file = "numpy-1.22.3-cp310-cp310-win32.whl", hash = "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430"},
|
||||
{file = "numpy-1.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4"},
|
||||
{file = "numpy-1.22.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce"},
|
||||
{file = "numpy-1.22.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe"},
|
||||
{file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5"},
|
||||
{file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1"},
|
||||
{file = "numpy-1.22.3-cp38-cp38-win32.whl", hash = "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62"},
|
||||
{file = "numpy-1.22.3-cp38-cp38-win_amd64.whl", hash = "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676"},
|
||||
{file = "numpy-1.22.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123"},
|
||||
{file = "numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802"},
|
||||
{file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d"},
|
||||
{file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168"},
|
||||
{file = "numpy-1.22.3-cp39-cp39-win32.whl", hash = "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa"},
|
||||
{file = "numpy-1.22.3-cp39-cp39-win_amd64.whl", hash = "sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a"},
|
||||
{file = "numpy-1.22.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f"},
|
||||
{file = "numpy-1.22.3.zip", hash = "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18"},
|
||||
]
|
||||
packaging = [
|
||||
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
|
||||
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
|
||||
]
|
||||
pandas = [
|
||||
{file = "pandas-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de62cf699122dcef175988f0714678e59c453dc234c5b47b7136bfd7641e3c8c"},
|
||||
{file = "pandas-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:46a18572f3e1cb75db59d9461940e9ba7ee38967fa48dd58f4139197f6e32280"},
|
||||
{file = "pandas-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:73f7da2ccc38cc988b74e5400b430b7905db5f2c413ff215506bea034eaf832d"},
|
||||
{file = "pandas-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5229c95db3a907451dacebc551492db6f7d01743e49bbc862f4a6010c227d187"},
|
||||
{file = "pandas-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe454180ad31bbbe1e5d111b44443258730467f035e26b4e354655ab59405871"},
|
||||
{file = "pandas-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:784cca3f69cfd7f6bd7c7fdb44f2bbab17e6de55725e9ff36d6f382510dfefb5"},
|
||||
{file = "pandas-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:de8f8999864399529e8514a2e6bfe00fd161f0a667903655552ed12e583ae3cb"},
|
||||
{file = "pandas-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0f19504f2783526fb5b4de675ea69d68974e21c1624f4b92295d057a31d5ec5f"},
|
||||
{file = "pandas-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f045bb5c6bfaba536089573bf97d6b8ccc7159d951fe63904c395a5e486fbe14"},
|
||||
{file = "pandas-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5280d057ddae06fe4a3cd6aa79040b8c205cd6dd21743004cf8635f39ed01712"},
|
||||
{file = "pandas-1.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f3b74335390dda49f5d5089fab71958812bf56f42aa27663ee4c16d19f4f1c5"},
|
||||
{file = "pandas-1.4.0-cp38-cp38-win32.whl", hash = "sha256:51e5da3802aaee1aa4254108ffaf1129a15fb3810b7ce8da1ec217c655b418f5"},
|
||||
{file = "pandas-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:f103a5cdcd66cb18882ccdc18a130c31c3cfe3529732e7f10a8ab3559164819c"},
|
||||
{file = "pandas-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4a8d5a200f8685e7ea562b2f022c77ab7cb82c1ca5b240e6965faa6f84e5c1e9"},
|
||||
{file = "pandas-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b5af258c7b090cca7b742cf2bd67ad1919aa9e4e681007366c9edad2d6a3d42b"},
|
||||
{file = "pandas-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:156aac90dd7b303bf0b91bae96c0503212777f86c731e41929c571125d26c8e9"},
|
||||
{file = "pandas-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dad075089e17a72391de33021ad93720aff258c3c4b68c78e1cafce7e447045"},
|
||||
{file = "pandas-1.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d59c958d6b8f96fdf850c7821571782168d5acfe75ccf78cd8d1ac15fb921df"},
|
||||
{file = "pandas-1.4.0-cp39-cp39-win32.whl", hash = "sha256:55ec0e192eefa26d823fc25a1f213d6c304a3592915f368e360652994cdb8d9a"},
|
||||
{file = "pandas-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:23c04dab11f3c6359cfa7afa83d3d054a8f8c283d773451184d98119ef54da97"},
|
||||
{file = "pandas-1.4.0.tar.gz", hash = "sha256:cdd76254c7f0a1583bd4e4781fb450d0ebf392e10d3f12e92c95575942e37df5"},
|
||||
{file = "pandas-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3dfb32ed50122fe8c5e7f2b8d97387edd742cc78f9ec36f007ee126cd3720907"},
|
||||
{file = "pandas-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0259cd11e7e6125aaea3af823b80444f3adad6149ff4c97fef760093598b3e34"},
|
||||
{file = "pandas-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:96e9ece5759f9b47ae43794b6359bbc54805d76e573b161ae770c1ea59393106"},
|
||||
{file = "pandas-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508c99debccd15790d526ce6b1624b97a5e1e4ca5b871319fb0ebfd46b8f4dad"},
|
||||
{file = "pandas-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6a7bbbb7950063bfc942f8794bc3e31697c020a14f1cd8905fc1d28ec674a01"},
|
||||
{file = "pandas-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:c614001129b2a5add5e3677c3a213a9e6fd376204cb8d17c04e84ff7dfc02a73"},
|
||||
{file = "pandas-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:4e1176f45981c8ccc8161bc036916c004ca51037a7ed73f2d2a9857e6dbe654f"},
|
||||
{file = "pandas-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bbb15ad79050e8b8d39ec40dd96a30cd09b886a2ae8848d0df1abba4d5502a67"},
|
||||
{file = "pandas-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6d6ad1da00c7cc7d8dd1559a6ba59ba3973be6b15722d49738b2be0977eb8a0c"},
|
||||
{file = "pandas-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:358b0bc98a5ff067132d23bf7a2242ee95db9ea5b7bbc401cf79205f11502fd3"},
|
||||
{file = "pandas-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6105af6533f8b63a43ea9f08a2ede04e8f43e49daef0209ab0d30352bcf08bee"},
|
||||
{file = "pandas-1.4.1-cp38-cp38-win32.whl", hash = "sha256:04dd15d9db538470900c851498e532ef28d4e56bfe72c9523acb32042de43dfb"},
|
||||
{file = "pandas-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:1b384516dbb4e6aae30e3464c2e77c563da5980440fbdfbd0968e3942f8f9d70"},
|
||||
{file = "pandas-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f02e85e6d832be37d7f16cf6ac8bb26b519ace3e5f3235564a91c7f658ab2a43"},
|
||||
{file = "pandas-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0b1a13f647e4209ed7dbb5da3497891d0045da9785327530ab696417ef478f84"},
|
||||
{file = "pandas-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:19f7c632436b1b4f84615c3b127bbd7bc603db95e3d4332ed259dc815c9aaa26"},
|
||||
{file = "pandas-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ea47ba1d6f359680130bd29af497333be6110de8f4c35b9211eec5a5a9630fa"},
|
||||
{file = "pandas-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e5a7a1e0ecaac652326af627a3eca84886da9e667d68286866d4e33f6547caf"},
|
||||
{file = "pandas-1.4.1-cp39-cp39-win32.whl", hash = "sha256:1d85d5f6be66dfd6d1d8d13b9535e342a2214260f1852654b19fa4d7b8d1218b"},
|
||||
{file = "pandas-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:3129a35d9dad1d80c234dd78f8f03141b914395d23f97cf92a366dcd19f8f8bf"},
|
||||
{file = "pandas-1.4.1.tar.gz", hash = "sha256:8db93ec98ac7cb5f8ac1420c10f5e3c43533153f253fe7fb6d891cf5aa2b80d2"},
|
||||
]
|
||||
pathy = [
|
||||
{file = "pathy-0.6.1-py3-none-any.whl", hash = "sha256:25fd04cec6393661113086730ce69c789d121bea83ab1aa18452e8fd42faf29a"},
|
||||
|
@ -1451,16 +1465,16 @@ pyspark = [
|
|||
{file = "pyspark-3.2.0.tar.gz", hash = "sha256:bfea06179edbfb4bc76a0f470bd3c38e12f00e1023e3ad0373558d07cff102ab"},
|
||||
]
|
||||
pytest = [
|
||||
{file = "pytest-7.0.1-py3-none-any.whl", hash = "sha256:9ce3ff477af913ecf6321fe337b93a2c0dcf2a0a1439c43f5452112c1e4280db"},
|
||||
{file = "pytest-7.0.1.tar.gz", hash = "sha256:e30905a0c131d3d94b89624a1cc5afec3e0ba2fbdb151867d8e0ebd49850f171"},
|
||||
{file = "pytest-7.1.1-py3-none-any.whl", hash = "sha256:92f723789a8fdd7180b6b06483874feca4c48a5c76968e03bb3e7f806a1869ea"},
|
||||
{file = "pytest-7.1.1.tar.gz", hash = "sha256:841132caef6b1ad17a9afde46dc4f6cfa59a05f9555aae5151f73bdf2820ca63"},
|
||||
]
|
||||
python-dateutil = [
|
||||
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
|
||||
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
|
||||
]
|
||||
pytz = [
|
||||
{file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"},
|
||||
{file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
|
||||
{file = "pytz-2022.1-py2.py3-none-any.whl", hash = "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"},
|
||||
{file = "pytz-2022.1.tar.gz", hash = "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7"},
|
||||
]
|
||||
pyyaml = [
|
||||
{file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
|
||||
|
@ -1558,8 +1572,67 @@ requests = [
|
|||
{file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
|
||||
]
|
||||
s3transfer = [
|
||||
{file = "s3transfer-0.5.0-py3-none-any.whl", hash = "sha256:9c1dc369814391a6bda20ebbf4b70a0f34630592c9aa520856bf384916af2803"},
|
||||
{file = "s3transfer-0.5.0.tar.gz", hash = "sha256:50ed823e1dc5868ad40c8dc92072f757aa0e653a192845c94a3b676f4a62da4c"},
|
||||
{file = "s3transfer-0.5.2-py3-none-any.whl", hash = "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971"},
|
||||
{file = "s3transfer-0.5.2.tar.gz", hash = "sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed"},
|
||||
]
|
||||
scikit-learn = [
|
||||
{file = "scikit-learn-1.0.2.tar.gz", hash = "sha256:b5870959a5484b614f26d31ca4c17524b1b0317522199dc985c3b4256e030767"},
|
||||
{file = "scikit_learn-1.0.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:da3c84694ff693b5b3194d8752ccf935a665b8b5edc33a283122f4273ca3e687"},
|
||||
{file = "scikit_learn-1.0.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:75307d9ea39236cad7eea87143155eea24d48f93f3a2f9389c817f7019f00705"},
|
||||
{file = "scikit_learn-1.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f14517e174bd7332f1cca2c959e704696a5e0ba246eb8763e6c24876d8710049"},
|
||||
{file = "scikit_learn-1.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9aac97e57c196206179f674f09bc6bffcd0284e2ba95b7fe0b402ac3f986023"},
|
||||
{file = "scikit_learn-1.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:d93d4c28370aea8a7cbf6015e8a669cd5d69f856cc2aa44e7a590fb805bb5583"},
|
||||
{file = "scikit_learn-1.0.2-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:85260fb430b795d806251dd3bb05e6f48cdc777ac31f2bcf2bc8bbed3270a8f5"},
|
||||
{file = "scikit_learn-1.0.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a053a6a527c87c5c4fa7bf1ab2556fa16d8345cf99b6c5a19030a4a7cd8fd2c0"},
|
||||
{file = "scikit_learn-1.0.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:245c9b5a67445f6f044411e16a93a554edc1efdcce94d3fc0bc6a4b9ac30b752"},
|
||||
{file = "scikit_learn-1.0.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:158faf30684c92a78e12da19c73feff9641a928a8024b4fa5ec11d583f3d8a87"},
|
||||
{file = "scikit_learn-1.0.2-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:08ef968f6b72033c16c479c966bf37ccd49b06ea91b765e1cc27afefe723920b"},
|
||||
{file = "scikit_learn-1.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16455ace947d8d9e5391435c2977178d0ff03a261571e67f627c8fee0f9d431a"},
|
||||
{file = "scikit_learn-1.0.2-cp37-cp37m-win32.whl", hash = "sha256:2f3b453e0b149898577e301d27e098dfe1a36943f7bb0ad704d1e548efc3b448"},
|
||||
{file = "scikit_learn-1.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:46f431ec59dead665e1370314dbebc99ead05e1c0a9df42f22d6a0e00044820f"},
|
||||
{file = "scikit_learn-1.0.2-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:ff3fa8ea0e09e38677762afc6e14cad77b5e125b0ea70c9bba1992f02c93b028"},
|
||||
{file = "scikit_learn-1.0.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:9369b030e155f8188743eb4893ac17a27f81d28a884af460870c7c072f114243"},
|
||||
{file = "scikit_learn-1.0.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7d6b2475f1c23a698b48515217eb26b45a6598c7b1840ba23b3c5acece658dbb"},
|
||||
{file = "scikit_learn-1.0.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:285db0352e635b9e3392b0b426bc48c3b485512d3b4ac3c7a44ec2a2ba061e66"},
|
||||
{file = "scikit_learn-1.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cb33fe1dc6f73dc19e67b264dbb5dde2a0539b986435fdd78ed978c14654830"},
|
||||
{file = "scikit_learn-1.0.2-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1391d1a6e2268485a63c3073111fe3ba6ec5145fc957481cfd0652be571226d"},
|
||||
{file = "scikit_learn-1.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc3744dabc56b50bec73624aeca02e0def06b03cb287de26836e730659c5d29c"},
|
||||
{file = "scikit_learn-1.0.2-cp38-cp38-win32.whl", hash = "sha256:a999c9f02ff9570c783069f1074f06fe7386ec65b84c983db5aeb8144356a355"},
|
||||
{file = "scikit_learn-1.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:7626a34eabbf370a638f32d1a3ad50526844ba58d63e3ab81ba91e2a7c6d037e"},
|
||||
{file = "scikit_learn-1.0.2-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:a90b60048f9ffdd962d2ad2fb16367a87ac34d76e02550968719eb7b5716fd10"},
|
||||
{file = "scikit_learn-1.0.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7a93c1292799620df90348800d5ac06f3794c1316ca247525fa31169f6d25855"},
|
||||
{file = "scikit_learn-1.0.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:eabceab574f471de0b0eb3f2ecf2eee9f10b3106570481d007ed1c84ebf6d6a1"},
|
||||
{file = "scikit_learn-1.0.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:55f2f3a8414e14fbee03782f9fe16cca0f141d639d2b1c1a36779fa069e1db57"},
|
||||
{file = "scikit_learn-1.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80095a1e4b93bd33261ef03b9bc86d6db649f988ea4dbcf7110d0cded8d7213d"},
|
||||
{file = "scikit_learn-1.0.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa38a1b9b38ae1fad2863eff5e0d69608567453fdfc850c992e6e47eb764e846"},
|
||||
{file = "scikit_learn-1.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff746a69ff2ef25f62b36338c615dd15954ddc3ab8e73530237dd73235e76d62"},
|
||||
{file = "scikit_learn-1.0.2-cp39-cp39-win32.whl", hash = "sha256:e174242caecb11e4abf169342641778f68e1bfaba80cd18acd6bc84286b9a534"},
|
||||
{file = "scikit_learn-1.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:b54a62c6e318ddbfa7d22c383466d38d2ee770ebdb5ddb668d56a099f6eaf75f"},
|
||||
]
|
||||
scipy = [
|
||||
{file = "scipy-1.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:87b01c7d5761e8a266a0fbdb9d88dcba0910d63c1c671bdb4d99d29f469e9e03"},
|
||||
{file = "scipy-1.8.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ae3e327da323d82e918e593460e23babdce40d7ab21490ddf9fc06dec6b91a18"},
|
||||
{file = "scipy-1.8.0-cp310-cp310-macosx_12_0_universal2.macosx_10_9_x86_64.whl", hash = "sha256:16e09ef68b352d73befa8bcaf3ebe25d3941fe1a58c82909d5589856e6bc8174"},
|
||||
{file = "scipy-1.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c17a1878d00a5dd2797ccd73623ceca9d02375328f6218ee6d921e1325e61aff"},
|
||||
{file = "scipy-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:937d28722f13302febde29847bbe554b89073fbb924a30475e5ed7b028898b5f"},
|
||||
{file = "scipy-1.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:8f4d059a97b29c91afad46b1737274cb282357a305a80bdd9e8adf3b0ca6a3f0"},
|
||||
{file = "scipy-1.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:38aa39b6724cb65271e469013aeb6f2ce66fd44f093e241c28a9c6bc64fd79ed"},
|
||||
{file = "scipy-1.8.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:559a8a4c03a5ba9fe3232f39ed24f86457e4f3f6c0abbeae1fb945029f092720"},
|
||||
{file = "scipy-1.8.0-cp38-cp38-macosx_12_0_universal2.macosx_10_9_x86_64.whl", hash = "sha256:f4a6d3b9f9797eb2d43938ac2c5d96d02aed17ef170c8b38f11798717523ddba"},
|
||||
{file = "scipy-1.8.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:92b2c2af4183ed09afb595709a8ef5783b2baf7f41e26ece24e1329c109691a7"},
|
||||
{file = "scipy-1.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a279e27c7f4566ef18bab1b1e2c37d168e365080974758d107e7d237d3f0f484"},
|
||||
{file = "scipy-1.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad5be4039147c808e64f99c0e8a9641eb5d2fa079ff5894dcd8240e94e347af4"},
|
||||
{file = "scipy-1.8.0-cp38-cp38-win32.whl", hash = "sha256:3d9dd6c8b93a22bf9a3a52d1327aca7e092b1299fb3afc4f89e8eba381be7b59"},
|
||||
{file = "scipy-1.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:5e73343c5e0d413c1f937302b2e04fb07872f5843041bcfd50699aef6e95e399"},
|
||||
{file = "scipy-1.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:de2e80ee1d925984c2504812a310841c241791c5279352be4707cdcd7c255039"},
|
||||
{file = "scipy-1.8.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:c2bae431d127bf0b1da81fc24e4bba0a84d058e3a96b9dd6475dfcb3c5e8761e"},
|
||||
{file = "scipy-1.8.0-cp39-cp39-macosx_12_0_universal2.macosx_10_9_x86_64.whl", hash = "sha256:723b9f878095ed994756fa4ee3060c450e2db0139c5ba248ee3f9628bd64e735"},
|
||||
{file = "scipy-1.8.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:011d4386b53b933142f58a652aa0f149c9b9242abd4f900b9f4ea5fbafc86b89"},
|
||||
{file = "scipy-1.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6f0cd9c0bd374ef834ee1e0f0999678d49dcc400ea6209113d81528958f97c7"},
|
||||
{file = "scipy-1.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3720d0124aced49f6f2198a6900304411dbbeed12f56951d7c66ebef05e3df6"},
|
||||
{file = "scipy-1.8.0-cp39-cp39-win32.whl", hash = "sha256:3d573228c10a3a8c32b9037be982e6440e411b443a6267b067cac72f690b8d56"},
|
||||
{file = "scipy-1.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:bb7088e89cd751acf66195d2f00cf009a1ea113f3019664032d9075b1e727b6c"},
|
||||
{file = "scipy-1.8.0.tar.gz", hash = "sha256:31d4f2d6b724bc9a98e527b5849b8a7e589bf1ea630c33aa563eda912c9ff0bd"},
|
||||
]
|
||||
six = [
|
||||
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||
|
@ -1596,8 +1669,8 @@ spacy = [
|
|||
{file = "spacy-3.2.1.tar.gz", hash = "sha256:f6ebac511627740a8ca2b117b91ef5515c8f0b2fb117a69ebe01d010dd4fc53c"},
|
||||
]
|
||||
spacy-legacy = [
|
||||
{file = "spacy-legacy-3.0.8.tar.gz", hash = "sha256:b4725c5c161f0685ab4fce3fc912bc68aefdb7e102ba9848e852bb5842256c2f"},
|
||||
{file = "spacy_legacy-3.0.8-py2.py3-none-any.whl", hash = "sha256:eb37a3540bb461b5fe9348d4976784f18a0e345982e41e2c5c7cd8229889e825"},
|
||||
{file = "spacy-legacy-3.0.9.tar.gz", hash = "sha256:4f7dcbc4e6c8e8cb4eadbb009f9c0a1a2a67442e0032c8d6776c9470c3759903"},
|
||||
{file = "spacy_legacy-3.0.9-py2.py3-none-any.whl", hash = "sha256:dfd58b0cc65b3596cb06f7b95e7bf4fff34668297c59eb179eb050db07b199df"},
|
||||
]
|
||||
spacy-loggers = [
|
||||
{file = "spacy-loggers-1.0.1.tar.gz", hash = "sha256:17d0e249b2e6c6546c49fc6561a0a685f91a8edbf24a5b2b7759ead443c74654"},
|
||||
|
@ -1626,38 +1699,42 @@ starlette = [
|
|||
{file = "starlette-0.16.0.tar.gz", hash = "sha256:e1904b5d0007aee24bdd3c43994be9b3b729f4f58e740200de1d623f8c3a8870"},
|
||||
]
|
||||
thinc = [
|
||||
{file = "thinc-8.0.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f818b9f012169a11beb3561c43dc52080588e50cf495733e492efab8b9b4135e"},
|
||||
{file = "thinc-8.0.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f520daf45b7f42a04363852df43be1b423ae42d9327709d74f6c3279b3f73778"},
|
||||
{file = "thinc-8.0.13-cp310-cp310-win_amd64.whl", hash = "sha256:2b217059c9e126220b77e7d6c9da56912c4e1eb4e8a11af14f17752e198e88cc"},
|
||||
{file = "thinc-8.0.13-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0f956c693d180209075703072fd226a24408cbe80eb67bd3b6eea407f61cb283"},
|
||||
{file = "thinc-8.0.13-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a17d87469082b82c27b7d40dd86c793fc34c60f734209ee056cb02d7609f255b"},
|
||||
{file = "thinc-8.0.13-cp36-cp36m-win_amd64.whl", hash = "sha256:27ea64843d6af0f3de8c788ec2a00598a1e5b4d57aadb52845fa42e95e4038c2"},
|
||||
{file = "thinc-8.0.13-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f274bcaa781aaf1dba5eac7da7d88d9b0cb8c2fd7477647f0ca9d3221dfb958"},
|
||||
{file = "thinc-8.0.13-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d52a5621e1784af5c64af4cfa9b2924358ca07aafd99014c57a736cf032e42f7"},
|
||||
{file = "thinc-8.0.13-cp37-cp37m-win_amd64.whl", hash = "sha256:753f65e07860553551ed8806b934a74f26a4a50985d556ecd5c4ab50c29b3222"},
|
||||
{file = "thinc-8.0.13-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ffe0a4d74f2ba2819193a5d9179156256f44c69255d7ae286ce1861efcefbc64"},
|
||||
{file = "thinc-8.0.13-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b61f78f6f116d23438b034c3552804c9767c4165960b1d7e48f07b2e9a95afb0"},
|
||||
{file = "thinc-8.0.13-cp38-cp38-win_amd64.whl", hash = "sha256:ba576af211ad2b00af78ab3e24e689289b29af8a9e51619ad55fab86871d8652"},
|
||||
{file = "thinc-8.0.13-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:def8e96eddb5a098d07dcf8752266095e14a6cf5d056ff766e2cdc542eb63f02"},
|
||||
{file = "thinc-8.0.13-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce322b66053819654d0444877154a08ed01cf5b45c6b3c9763e59b78af4f6039"},
|
||||
{file = "thinc-8.0.13-cp39-cp39-win_amd64.whl", hash = "sha256:b3ae088f60d3dfe6a88c6be37548aae40023e46a718cffe3e43953b4f0ffc340"},
|
||||
{file = "thinc-8.0.13.tar.gz", hash = "sha256:47662a3ae33d445a77b6ea7b772444805c7bba8991f122e350daf72dedc8171a"},
|
||||
{file = "thinc-8.0.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0368c0b279492c0ed0b5b1bc79614e8a335ae1ccc3b1617de46f04eb74dc9a43"},
|
||||
{file = "thinc-8.0.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4276b64a8cd91197f30382c0874f59fa6c94ef533150d845b2f30998aae87cc"},
|
||||
{file = "thinc-8.0.15-cp310-cp310-win_amd64.whl", hash = "sha256:72cec290eb1b54ba6144b05d96f3247ea34eb41c66842961b05b408b93f2ba9b"},
|
||||
{file = "thinc-8.0.15-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a4ee24a6505d63b6f0161f25d0f73f87ab569e0e1a9799a6baca97352788a91f"},
|
||||
{file = "thinc-8.0.15-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:376b196da6c69c8efaaf26fb99f6997543d80ea4bc5f4ab8600e9d1d521a7dc9"},
|
||||
{file = "thinc-8.0.15-cp36-cp36m-win_amd64.whl", hash = "sha256:bed92be72516b1511fecaf616ea31ff1c2e972a7ec4ad991c212f9b2f5c94183"},
|
||||
{file = "thinc-8.0.15-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:42641f021f4fdc47eaec4b9ff66246b153b9783ef24e2c266bf0f51eccd40db5"},
|
||||
{file = "thinc-8.0.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0557791e73865fa81f09623dd1f9b98b6d4ab80c63fca5f141530536516aac98"},
|
||||
{file = "thinc-8.0.15-cp37-cp37m-win_amd64.whl", hash = "sha256:f9ba4e4dac98e166950e004c87a0f57b8f8796ecd0e3b6973beb6febc20257ff"},
|
||||
{file = "thinc-8.0.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:489521ca3cca469d67432fc30f14c7c13c17320b179bf8e362319313feaafbb7"},
|
||||
{file = "thinc-8.0.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ddda1aa1432eef8bab5c83e4cf2020f1ed891771a6dd86729f1aa6078f25f2c"},
|
||||
{file = "thinc-8.0.15-cp38-cp38-win_amd64.whl", hash = "sha256:70781a0802fbb62a27217ccb80e744e80a5b43f9107ac596c5cd2dc9878ae258"},
|
||||
{file = "thinc-8.0.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1f19dd9a7121d332d16446db39b4999abb4f040ce7c71bc86ea05664c86d361"},
|
||||
{file = "thinc-8.0.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecd8eab82598b079e901f16567818dc955481326c01d84b819c3c05801b97e07"},
|
||||
{file = "thinc-8.0.15-cp39-cp39-win_amd64.whl", hash = "sha256:5d98e6b3bf220c1068442d09d7c34dd8e52bbdfa43ea32f773747c5909a1c011"},
|
||||
{file = "thinc-8.0.15.tar.gz", hash = "sha256:2e315020da85c3791e191fbf37c4a2433f57cf322e27380da0cd4de99d96053b"},
|
||||
]
|
||||
threadpoolctl = [
|
||||
{file = "threadpoolctl-3.1.0-py3-none-any.whl", hash = "sha256:8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b"},
|
||||
{file = "threadpoolctl-3.1.0.tar.gz", hash = "sha256:a335baacfaa4400ae1f0d8e3a58d6674d2f8828e3716bb2802c44955ad391380"},
|
||||
]
|
||||
tomli = [
|
||||
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
||||
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
||||
]
|
||||
tqdm = [
|
||||
{file = "tqdm-4.62.3-py2.py3-none-any.whl", hash = "sha256:8dd278a422499cd6b727e6ae4061c40b48fce8b76d1ccbf5d34fca9b7f925b0c"},
|
||||
{file = "tqdm-4.62.3.tar.gz", hash = "sha256:d359de7217506c9851b7869f3708d8ee53ed70a1b8edbba4dbcb47442592920d"},
|
||||
{file = "tqdm-4.63.0-py2.py3-none-any.whl", hash = "sha256:e643e071046f17139dea55b880dc9b33822ce21613b4a4f5ea57f202833dbc29"},
|
||||
{file = "tqdm-4.63.0.tar.gz", hash = "sha256:1d9835ede8e394bb8c9dcbffbca02d717217113adc679236873eeaac5bc0b3cd"},
|
||||
]
|
||||
typer = [
|
||||
{file = "typer-0.4.0-py3-none-any.whl", hash = "sha256:d81169725140423d072df464cad1ff25ee154ef381aaf5b8225352ea187ca338"},
|
||||
{file = "typer-0.4.0.tar.gz", hash = "sha256:63c3aeab0549750ffe40da79a1b524f60e08a2cbc3126c520ebf2eeaf507f5dd"},
|
||||
]
|
||||
typing-extensions = [
|
||||
{file = "typing_extensions-4.0.1-py3-none-any.whl", hash = "sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b"},
|
||||
{file = "typing_extensions-4.0.1.tar.gz", hash = "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e"},
|
||||
{file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
|
||||
{file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
|
||||
]
|
||||
ujson = [
|
||||
{file = "ujson-4.3.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:3609e0514f6f721c6c9818b9374ec91b994e59fb193af2f924ca3f2f32009f1c"},
|
||||
|
@ -1706,8 +1783,8 @@ ujson = [
|
|||
{file = "ujson-4.3.0.tar.gz", hash = "sha256:baee56eca35cb5fbe02c28bd9c0936be41a96fa5c0812d9d4b7edeb5c3d568a0"},
|
||||
]
|
||||
urllib3 = [
|
||||
{file = "urllib3-1.26.8-py2.py3-none-any.whl", hash = "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed"},
|
||||
{file = "urllib3-1.26.8.tar.gz", hash = "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"},
|
||||
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
|
||||
{file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},
|
||||
]
|
||||
uvicorn = [
|
||||
{file = "uvicorn-0.16.0-py3-none-any.whl", hash = "sha256:d8c839231f270adaa6d338d525e2652a0b4a5f4c2430b5c4ef6ae4d11776b0d2"},
|
||||
|
|
|
@ -5,14 +5,16 @@ description = ""
|
|||
authors = ["Daoud Clarke <daoud.clarke@gmail.com>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
pandas = "^1.3.4"
|
||||
python = ">=3.10,<3.11"
|
||||
pandas = "^1.3.5"
|
||||
scipy = "^1.8.0"
|
||||
scikit-learn = "^1.0.2"
|
||||
zstandard = "^0.16.0"
|
||||
mmh3 = "^3.0.0"
|
||||
fastapi = "^0.70.1"
|
||||
uvicorn = "^0.16.0"
|
||||
numpy = "==1.21.1"
|
||||
pyyaml = "==6.0"
|
||||
|
||||
# Optional dependencies do not get installed by default. Look under tool.poetry.extras section
|
||||
# to see which extras to use.
|
||||
botocore = {version= "==1.23.20", optional = true}
|
||||
|
|
Loading…
Reference in a new issue