models.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. import torch
  2. from insightface.app import FaceAnalysis
  3. from pathlib import Path
  4. import os
  5. from transformers import pipeline, Pipeline
  6. from sentence_transformers import SentenceTransformer
  7. from typing import Any
  8. import cv2 as cv
  9. cache_folder = os.getenv("MACHINE_LEARNING_CACHE_FOLDER", "/cache")
  10. device = "cuda" if torch.cuda.is_available() else "cpu"
  11. def get_model(model_name: str, model_type: str, **model_kwargs):
  12. """
  13. Instantiates the specified model.
  14. Args:
  15. model_name: Name of model in the model hub used for the task.
  16. model_type: Model type or task, which determines which model zoo is used.
  17. `facial-recognition` uses Insightface, while all other models use the HF Model Hub.
  18. Options:
  19. `image-classification`, `clip`,`facial-recognition`, `tokenizer`, `processor`
  20. Returns:
  21. model: The requested model.
  22. """
  23. cache_dir = _get_cache_dir(model_name, model_type)
  24. match model_type:
  25. case "facial-recognition":
  26. model = _load_facial_recognition(
  27. model_name, cache_dir=cache_dir, **model_kwargs
  28. )
  29. case "clip":
  30. model = SentenceTransformer(
  31. model_name, cache_folder=cache_dir, **model_kwargs
  32. )
  33. case _:
  34. model = pipeline(
  35. model_type,
  36. model_name,
  37. model_kwargs={"cache_dir": cache_dir, **model_kwargs},
  38. )
  39. return model
  40. def run_classification(
  41. model: Pipeline, image_path: str, min_score: float | None = None
  42. ):
  43. predictions: list[dict[str, Any]] = model(image_path) # type: ignore
  44. result = {
  45. tag
  46. for pred in predictions
  47. for tag in pred["label"].split(", ")
  48. if min_score is None or pred["score"] >= min_score
  49. }
  50. return list(result)
  51. def run_facial_recognition(
  52. model: FaceAnalysis, image_path: str
  53. ) -> list[dict[str, Any]]:
  54. img = cv.imread(image_path)
  55. height, width, _ = img.shape
  56. results = []
  57. faces = model.get(img)
  58. for face in faces:
  59. x1, y1, x2, y2 = face.bbox
  60. results.append(
  61. {
  62. "imageWidth": width,
  63. "imageHeight": height,
  64. "boundingBox": {
  65. "x1": round(x1),
  66. "y1": round(y1),
  67. "x2": round(x2),
  68. "y2": round(y2),
  69. },
  70. "score": face.det_score.item(),
  71. "embedding": face.normed_embedding.tolist(),
  72. }
  73. )
  74. return results
  75. def _load_facial_recognition(
  76. model_name: str,
  77. min_face_score: float | None = None,
  78. cache_dir: Path | str | None = None,
  79. **model_kwargs,
  80. ):
  81. if cache_dir is None:
  82. cache_dir = _get_cache_dir(model_name, "facial-recognition")
  83. if isinstance(cache_dir, Path):
  84. cache_dir = cache_dir.as_posix()
  85. if min_face_score is None:
  86. min_face_score = float(os.getenv("MACHINE_LEARNING_MIN_FACE_SCORE", 0.7))
  87. model = FaceAnalysis(
  88. name=model_name,
  89. root=cache_dir,
  90. allowed_modules=["detection", "recognition"],
  91. **model_kwargs,
  92. )
  93. model.prepare(ctx_id=0, det_thresh=min_face_score, det_size=(640, 640))
  94. return model
  95. def _get_cache_dir(model_name: str, model_type: str) -> Path:
  96. return Path(cache_folder, device, model_type, model_name)