Rework installation of spaCy models for clarity

- Install the wheel package for compatibility with future pip
- Use `spacy download` for installing model(s)
- Use `spacy validate` for checking model compatibility explicitly

Signed-off-by: Masanori Ogino <167209+omasanori@users.noreply.github.com>
This commit is contained in:
Masanori Ogino 2022-12-27 11:30:35 +09:00
parent 80282cfc7a
commit 71187a3938
2 changed files with 5 additions and 6 deletions

View file

@ -25,8 +25,11 @@ COPY mwmbl /app/mwmbl
# Use pip to install the mwmbl python package
# PEP 518, PEP 517 and others have allowed for a standardized python packaging API, which allows
# pip to be able to install poetry packages.
RUN /venv/bin/pip install pip --upgrade && \
/venv/bin/pip install .
# en-core-web-sm requires a compatible version of spacy
RUN /venv/bin/pip install pip wheel --upgrade && \
/venv/bin/pip install . && \
/venv/bin/python -m spacy download en_core_web_sm-3.2.0 --direct && \
/venv/bin/python -m spacy validate
FROM base as final

View file

@ -31,9 +31,6 @@ langdetect = {version= "==1.0.9", optional = true}
pyarrow = {version= "==6.0.0", optional = true}
pyspark = {version= "==3.2.0", optional = true}
Levenshtein = {version= "==0.16.0", optional = true}
# en-core-web-sm requires a compatible version of spacy
en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0.tar.gz"}
[tool.poetry.extras]
indexer = [
@ -47,7 +44,6 @@ indexer = [
"pyarrow",
"pyspark",
"Levenshtein",
# en-core-web-sm requires a compatible version of spacy
]
[tool.poetry.dev-dependencies]