models.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. import torch
  2. from insightface.app import FaceAnalysis
  3. from pathlib import Path
  4. from transformers import pipeline, Pipeline
  5. from sentence_transformers import SentenceTransformer
  6. from typing import Any, BinaryIO
  7. import cv2 as cv
  8. import numpy as np
  9. from PIL import Image
  10. from config import settings
  11. device = "cuda" if torch.cuda.is_available() else "cpu"
  12. def get_model(model_name: str, model_type: str, **model_kwargs):
  13. """
  14. Instantiates the specified model.
  15. Args:
  16. model_name: Name of model in the model hub used for the task.
  17. model_type: Model type or task, which determines which model zoo is used.
  18. `facial-recognition` uses Insightface, while all other models use the HF Model Hub.
  19. Options:
  20. `image-classification`, `clip`,`facial-recognition`, `tokenizer`, `processor`
  21. Returns:
  22. model: The requested model.
  23. """
  24. cache_dir = _get_cache_dir(model_name, model_type)
  25. match model_type:
  26. case "facial-recognition":
  27. model = _load_facial_recognition(
  28. model_name, cache_dir=cache_dir, **model_kwargs
  29. )
  30. case "clip":
  31. model = SentenceTransformer(
  32. model_name, cache_folder=cache_dir, **model_kwargs
  33. )
  34. case _:
  35. model = pipeline(
  36. model_type,
  37. model_name,
  38. model_kwargs={"cache_dir": cache_dir, **model_kwargs},
  39. )
  40. return model
  41. def run_classification(
  42. model: Pipeline, image: Image, min_score: float | None = None
  43. ):
  44. predictions: list[dict[str, Any]] = model(image) # type: ignore
  45. result = {
  46. tag
  47. for pred in predictions
  48. for tag in pred["label"].split(", ")
  49. if min_score is None or pred["score"] >= min_score
  50. }
  51. return list(result)
  52. def run_facial_recognition(
  53. model: FaceAnalysis, image: bytes
  54. ) -> list[dict[str, Any]]:
  55. file_bytes = np.frombuffer(image, dtype=np.uint8)
  56. img = cv.imdecode(file_bytes, cv.IMREAD_COLOR)
  57. height, width, _ = img.shape
  58. results = []
  59. faces = model.get(img)
  60. for face in faces:
  61. x1, y1, x2, y2 = face.bbox
  62. results.append(
  63. {
  64. "imageWidth": width,
  65. "imageHeight": height,
  66. "boundingBox": {
  67. "x1": round(x1),
  68. "y1": round(y1),
  69. "x2": round(x2),
  70. "y2": round(y2),
  71. },
  72. "score": face.det_score.item(),
  73. "embedding": face.normed_embedding.tolist(),
  74. }
  75. )
  76. return results
  77. def _load_facial_recognition(
  78. model_name: str,
  79. min_face_score: float | None = None,
  80. cache_dir: Path | str | None = None,
  81. **model_kwargs,
  82. ):
  83. if cache_dir is None:
  84. cache_dir = _get_cache_dir(model_name, "facial-recognition")
  85. if isinstance(cache_dir, Path):
  86. cache_dir = cache_dir.as_posix()
  87. if min_face_score is None:
  88. min_face_score = settings.min_face_score
  89. model = FaceAnalysis(
  90. name=model_name,
  91. root=cache_dir,
  92. allowed_modules=["detection", "recognition"],
  93. **model_kwargs,
  94. )
  95. model.prepare(ctx_id=0, det_thresh=min_face_score, det_size=(640, 640))
  96. return model
  97. def _get_cache_dir(model_name: str, model_type: str) -> Path:
  98. return Path(settings.cache_folder, device, model_type, model_name)