diff --git a/machine-learning/export/env.yaml b/machine-learning/export/env.yaml index f53f5b001..c1f80b4b0 100644 --- a/machine-learning/export/env.yaml +++ b/machine-learning/export/env.yaml @@ -22,5 +22,5 @@ dependencies: - pip: - multilingual-clip - onnx-simplifier - - tensorflow + - tensorflow==2.14.* category: main diff --git a/machine-learning/export/models/tfclip.py b/machine-learning/export/models/tfclip.py index 4dbe00d10..8dee62f4d 100644 --- a/machine-learning/export/models/tfclip.py +++ b/machine-learning/export/models/tfclip.py @@ -10,7 +10,7 @@ from .util import ModelType, get_model_path class _CLIPWrapper(tf.Module): def __init__(self, model_name: str): super(_CLIPWrapper) - self.model = TFCLIPModel.from_pretrained(model_name) + self.model = TFCLIPModel.from_pretrained(model_name).half() @tf.function() def encode_image(self, input): @@ -22,11 +22,13 @@ class _CLIPWrapper(tf.Module): # exported model signatures use batch size 2 because of the following reasons: -# 1. ARM-NN cannot use dynamic batch sizes +# 1. ARM-NN cannot use dynamic batch sizes for complex models like CLIP ViT # 2. batch size 1 creates a larger TF-Lite model that uses a lot (50%) more RAM -# 3. batch size 2 is ~50% faster on GPU than 1 while 4 (or larger) are not faster +# 3. batch size 2 is ~50% faster on GPU than 1 while 4 (or larger) are not really faster # 4. batch size >2 wastes more computation if only a single image is processed -BATCH_SIZE = 2 +BATCH_SIZE_IMAGE = 2 +# On most small-scale systems there will only be one query at a time, no sense in batching +BATCH_SIZE_TEXT = 1 SIGNATURE_TEXT = "encode_text" SIGNATURE_IMAGE = "encode_image" @@ -52,12 +54,12 @@ def _export_temporary_tf_model(model_name, tmp_path: str, context_length: int): wrapper = _CLIPWrapper(model_name) conf = wrapper.model.config.vision_config spec_visual = tf.TensorSpec( - shape=(BATCH_SIZE, conf.num_channels, conf.image_size, conf.image_size), dtype=tf.float32 + shape=(BATCH_SIZE_IMAGE, conf.num_channels, conf.image_size, conf.image_size), dtype=tf.float32 ) encode_image = wrapper.encode_image.get_concrete_function(spec_visual) - spec_text = tf.TensorSpec(shape=(BATCH_SIZE, context_length), dtype=tf.int32) + spec_text = tf.TensorSpec(shape=(BATCH_SIZE_TEXT, context_length), dtype=tf.int32) encode_text = wrapper.encode_text.get_concrete_function(spec_text) - signatures = {"encode_text": encode_text, "encode_image": encode_image} + signatures = { SIGNATURE_IMAGE: encode_image, SIGNATURE_TEXT: encode_text} tf.saved_model.save(wrapper, tmp_path, signatures) diff --git a/machine-learning/export/run.py b/machine-learning/export/run.py index 49dfef5a1..6fa262ad0 100644 --- a/machine-learning/export/run.py +++ b/machine-learning/export/run.py @@ -4,9 +4,10 @@ from pathlib import Path from tempfile import TemporaryDirectory from huggingface_hub import create_repo, login, upload_folder -from models import mclip, openclip, tfclip from rich.progress import Progress +from models import mclip, openclip, tfclip + models = [ "RN50::openai", "RN50::yfcc15m", diff --git a/machine-learning/poetry.lock b/machine-learning/poetry.lock index 2b5a11e8b..d0c995952 100644 --- a/machine-learning/poetry.lock +++ b/machine-learning/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiocache" @@ -3882,6 +3882,30 @@ files = [ [package.dependencies] mpmath = ">=0.19" +[[package]] +name = "tflite-runtime" +version = "2.14.0" +description = "TensorFlow Lite is for mobile and embedded devices." +optional = false +python-versions = "*" +files = [ + {file = "tflite_runtime-2.14.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:bb11df4283e281cd609c621ac9470ad0cb5674408593272d7593a2c6bde8a808"}, + {file = "tflite_runtime-2.14.0-cp310-cp310-manylinux_2_34_aarch64.whl", hash = "sha256:d38c6885f5e9673c11a61ccec5cad7c032ab97340718d26b17794137f398b780"}, + {file = "tflite_runtime-2.14.0-cp310-cp310-manylinux_2_34_armv7l.whl", hash = "sha256:7fe33f763263d1ff2733a09945a7547ab063d8bc311fd2a1be8144d850016ad3"}, + {file = "tflite_runtime-2.14.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:195ab752e7e57329a68e54dd3dd5439fad888b9bff1be0f0dc042a3237a90e4d"}, + {file = "tflite_runtime-2.14.0-cp311-cp311-manylinux_2_34_aarch64.whl", hash = "sha256:ce9fa5d770a9725c746dcbf6f59f3178233b3759f09982e8b2db8d2234c333b0"}, + {file = "tflite_runtime-2.14.0-cp311-cp311-manylinux_2_34_armv7l.whl", hash = "sha256:c4e66a74165b18089c86788400af19fa551768ac782d231a9beae2f6434f7949"}, + {file = "tflite_runtime-2.14.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:9f965054467f7890e678943858c6ac76a5197b17f61b48dcbaaba0af41d541a7"}, + {file = "tflite_runtime-2.14.0-cp38-cp38-manylinux_2_34_aarch64.whl", hash = "sha256:437167fe3d8b12f50f5d694da8f45d268ab84a495e24c3dd810e02e1012125de"}, + {file = "tflite_runtime-2.14.0-cp38-cp38-manylinux_2_34_armv7l.whl", hash = "sha256:79d8e17f68cc940df7e68a177b22dda60fcffba195fb9dd908d03724d65fd118"}, + {file = "tflite_runtime-2.14.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:4aa740210a0fd9e4db4a46e9778914846b136e161525681b41575ca4896158fb"}, + {file = "tflite_runtime-2.14.0-cp39-cp39-manylinux_2_34_aarch64.whl", hash = "sha256:be198b7dc4401204be54a15884d9e336389790eb707439524540f5a9329fdd02"}, + {file = "tflite_runtime-2.14.0-cp39-cp39-manylinux_2_34_armv7l.whl", hash = "sha256:eca7672adca32727bbf5c0f1caf398fc17bbe222f2a684c7a2caea6fc6767203"}, +] + +[package.dependencies] +numpy = ">=1.23.2" + [[package]] name = "threadpoolctl" version = "3.2.0" @@ -4025,6 +4049,14 @@ dev = ["tokenizers[testing]"] docs = ["setuptools_rust", "sphinx", "sphinx_rtd_theme"] testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] +[[package]] +name = "torch" +version = "2.0.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +optional = false +python-versions = "*" +files = [] + [[package]] name = "torch" version = "2.1.0" @@ -4772,4 +4804,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "bba5f87aa67bc1d2283a9f4b471ef78e572337f22413870d324e908014410d53" +content-hash = "56614afdeeeec3b7f0b786771a8fcc126761c882b1033664056042833767e521" diff --git a/machine-learning/pyproject.toml b/machine-learning/pyproject.toml index cd4acf9be..32f84997f 100644 --- a/machine-learning/pyproject.toml +++ b/machine-learning/pyproject.toml @@ -29,6 +29,7 @@ python-multipart = "^0.0.6" orjson = "^3.9.5" safetensors = "0.3.2" gunicorn = "^21.1.0" +tflite-runtime = "^2.14.0" [tool.poetry.group.dev.dependencies] mypy = "^1.3.0"