immich/machine-learning/app/models/clip.py

23 lines
604 B
Python
Raw Normal View History

from pathlib import Path
from typing import Any
from PIL.Image import Image
from sentence_transformers import SentenceTransformer
from ..schemas import ModelType
from .base import InferenceModel
class CLIPSTEncoder(InferenceModel):
_model_type = ModelType.CLIP
def load(self, **model_kwargs: Any) -> None:
self.model = SentenceTransformer(
self.model_name,
cache_folder=self.cache_dir.as_posix(),
**model_kwargs,
)
def predict(self, image_or_text: Image | str) -> list[float]:
return self.model.encode(image_or_text).tolist()