clip.py 1.0 KB

12345678910111213141516171819202122232425262728293031
  1. from typing import Any
  2. from PIL.Image import Image
  3. from sentence_transformers import SentenceTransformer
  4. from sentence_transformers.util import snapshot_download
  5. from ..schemas import ModelType
  6. from .base import InferenceModel
  7. class CLIPSTEncoder(InferenceModel):
  8. _model_type = ModelType.CLIP
  9. def _download(self, **model_kwargs: Any) -> None:
  10. repo_id = self.model_name if "/" in self.model_name else f"sentence-transformers/{self.model_name}"
  11. snapshot_download(
  12. cache_dir=self.cache_dir,
  13. repo_id=repo_id,
  14. library_name="sentence-transformers",
  15. ignore_files=["flax_model.msgpack", "rust_model.ot", "tf_model.h5"],
  16. )
  17. def _load(self, **model_kwargs: Any) -> None:
  18. self.model = SentenceTransformer(
  19. self.model_name,
  20. cache_folder=self.cache_dir.as_posix(),
  21. **model_kwargs,
  22. )
  23. def _predict(self, image_or_text: Image | str) -> list[float]:
  24. return self.model.encode(image_or_text).tolist()