2 年前 · 977740045a
--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@@ -1,19 +1,26 @@
 
				-FROM python:3.10
			
 
				+FROM python:3.10 as builder
			
 
				 
			
 
				-ENV TRANSFORMERS_CACHE=/cache \
			
 
				-    PYTHONDONTWRITEBYTECODE=1 \
			
 
				+ENV PYTHONDONTWRITEBYTECODE=1 \
			
 
				     PYTHONUNBUFFERED=1 \
			
 
				     PIP_NO_CACHE_DIR=true
			
 
				 
			
 
				-WORKDIR /usr/src/app
			
 
				+COPY requirements.txt ./
			
 
				+
			
 
				+RUN python -m venv /opt/venv && \
			
 
				+    /opt/venv/bin/pip install --upgrade pip setuptools wheel && \
			
 
				+    /opt/venv/bin/pip install --no-deps -r requirements.txt
			
 
				+
			
 
				+FROM python:3.10-slim
			
 
				 
			
 
				-RUN python -m venv /opt/venv
			
 
				-ENV PATH="/opt/venv/bin:$PATH"
			
 
				+COPY --from=builder /opt/venv /opt/venv
			
 
				 
			
 
				-RUN pip install --pre torch  -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
			
 
				-RUN pip install transformers tqdm numpy scikit-learn scipy nltk sentencepiece flask Pillow
			
 
				-RUN pip install --no-deps sentence-transformers
			
 
				+ENV TRANSFORMERS_CACHE=/cache \
			
 
				+    PYTHONDONTWRITEBYTECODE=1 \
			
 
				+    PYTHONUNBUFFERED=1 \
			
 
				+    PATH="/opt/venv/bin:$PATH"
			
 
				+
			
 
				+WORKDIR /usr/src/app
			
 
				 
			
 
				 COPY . .
			
 
				 
			
 
				-CMD ["python", "src/main.py"]
			
 
				+CMD ["gunicorn", "src.main:server"]
			
--- a/machine-learning/gunicorn.conf.py
+++ b/machine-learning/gunicorn.conf.py
@@ -0,0 +1,29 @@
 
				+"""
			
 
				+Gunicorn configuration options.
			
 
				+https://docs.gunicorn.org/en/stable/settings.html
			
 
				+"""
			
 
				+import os
			
 
				+
			
 
				+
			
 
				+# Set the bind address based on the env
			
 
				+port = os.getenv("MACHINE_LEARNING_PORT") or "3003"
			
 
				+listen_ip = os.getenv("MACHINE_LEARNING_IP") or "0.0.0.0"
			
 
				+bind = [f"{listen_ip}:{port}"]
			
 
				+
			
 
				+# Preload the Flask app / models etc. before starting the server
			
 
				+preload_app = True
			
 
				+
			
 
				+# Logging settings - log to stdout and set log level
			
 
				+accesslog = "-"
			
 
				+loglevel = os.getenv("MACHINE_LEARNING_LOG_LEVEL") or "info"
			
 
				+
			
 
				+# Worker settings
			
 
				+# ----------------------
			
 
				+# It is important these are chosen carefully as per
			
 
				+# https://pythonspeed.com/articles/gunicorn-in-docker/
			
 
				+# Otherwise we get workers failing to respond to heartbeat checks,
			
 
				+# especially as requests take a long time to complete.
			
 
				+workers = 2
			
 
				+threads = 4
			
 
				+worker_tmp_dir = "/dev/shm"
			
 
				+timeout = 60
			
--- a/machine-learning/requirements.txt
+++ b/machine-learning/requirements.txt
@@ -0,0 +1,33 @@
 
				+certifi==2022.12.7
			
 
				+charset-normalizer==3.0.1
			
 
				+click==8.1.3
			
 
				+filelock==3.9.0
			
 
				+Flask==2.2.3
			
 
				+gunicorn==20.1.0
			
 
				+huggingface-hub==0.12.1
			
 
				+idna==3.4
			
 
				+importlib-metadata==6.0.0
			
 
				+itsdangerous==2.1.2
			
 
				+Jinja2==3.1.2
			
 
				+joblib==1.2.0
			
 
				+MarkupSafe==2.1.2
			
 
				+nltk==3.8.1
			
 
				+numpy==1.24.2
			
 
				+packaging==23.0
			
 
				+Pillow==9.4.0
			
 
				+PyYAML==6.0
			
 
				+regex==2022.10.31
			
 
				+requests==2.28.2
			
 
				+scikit-learn==1.2.1
			
 
				+scipy==1.10.1
			
 
				+sentence-transformers==2.2.2
			
 
				+sentencepiece==0.1.97
			
 
				+threadpoolctl==3.1.0
			
 
				+tokenizers==0.13.2
			
 
				+torch==1.13.1 -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
			
 
				+tqdm==4.64.1
			
 
				+transformers==4.26.1
			
 
				+typing-extensions==4.5.0
			
 
				+urllib3==1.26.14
			
 
				+Werkzeug==2.2.3
			
 
				+zipp==3.15.0