Compare commits
2 commits
main
...
feat/ml-ex
Author | SHA1 | Date | |
---|---|---|---|
|
683bb88f8b | ||
|
ae80def7f2 |
6 changed files with 137 additions and 74 deletions
|
@ -14,8 +14,7 @@ RUN micromamba install -y -n base -f /tmp/conda-lock.yml && \
|
||||||
|
|
||||||
WORKDIR /usr/src/app
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
COPY --chown=$MAMBA_USER:$MAMBA_USER start.sh .
|
COPY --chown=$MAMBA_USER:$MAMBA_USER export .
|
||||||
COPY --chown=$MAMBA_USER:$MAMBA_USER app .
|
|
||||||
|
|
||||||
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
|
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
|
||||||
CMD ["./start.sh"]
|
CMD ["python -m run"]
|
||||||
|
|
0
machine-learning/export/__init__.py
Normal file
0
machine-learning/export/__init__.py
Normal file
9
machine-learning/export/models/constants.py
Normal file
9
machine-learning/export/models/constants.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
from export.models.openclip import OpenCLIPModelConfig
|
||||||
|
|
||||||
|
|
||||||
|
MCLIP_TO_OPENCLIP = {
|
||||||
|
"XLM-Roberta-Large-Vit-B-32": OpenCLIPModelConfig("ViT-B-32", "openai"),
|
||||||
|
"XLM-Roberta-Large-Vit-B-16Plus": OpenCLIPModelConfig("ViT-B-16-plus-240", "laion400m_e32"),
|
||||||
|
"LABSE-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"),
|
||||||
|
"XLM-Roberta-Large-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"),
|
||||||
|
}
|
|
@ -1,22 +1,15 @@
|
||||||
import tempfile
|
import tempfile
|
||||||
import warnings
|
import warnings
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from export.models.constants import MCLIP_TO_OPENCLIP
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
|
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
from .openclip import OpenCLIPModelConfig
|
|
||||||
from .openclip import to_onnx as openclip_to_onnx
|
from .openclip import to_onnx as openclip_to_onnx
|
||||||
from .optimize import optimize
|
from .optimize import optimize
|
||||||
from .util import get_model_path
|
from .util import get_model_path, clean_name
|
||||||
|
|
||||||
_MCLIP_TO_OPENCLIP = {
|
|
||||||
"M-CLIP/XLM-Roberta-Large-Vit-B-32": OpenCLIPModelConfig("ViT-B-32", "openai"),
|
|
||||||
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus": OpenCLIPModelConfig("ViT-B-16-plus-240", "laion400m_e32"),
|
|
||||||
"M-CLIP/LABSE-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"),
|
|
||||||
"M-CLIP/XLM-Roberta-Large-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def to_onnx(
|
def to_onnx(
|
||||||
|
@ -33,7 +26,7 @@ def to_onnx(
|
||||||
param.requires_grad_(False)
|
param.requires_grad_(False)
|
||||||
|
|
||||||
export_text_encoder(model, textual_path)
|
export_text_encoder(model, textual_path)
|
||||||
openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual)
|
openclip_to_onnx(MCLIP_TO_OPENCLIP[clean_name(model_name)], output_dir_visual)
|
||||||
optimize(textual_path)
|
optimize(textual_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,9 @@ from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
_clean_name = str.maketrans(":\\/", "___", ".")
|
||||||
|
|
||||||
|
|
||||||
def get_model_path(output_dir: Path | str) -> Path:
|
def get_model_path(output_dir: Path | str) -> Path:
|
||||||
output_dir = Path(output_dir)
|
output_dir = Path(output_dir)
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
@ -13,3 +16,7 @@ def save_config(config: Any, output_path: Path | str) -> None:
|
||||||
output_path = Path(output_path)
|
output_path = Path(output_path)
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
json.dump(config, output_path.open("w"))
|
json.dump(config, output_path.open("w"))
|
||||||
|
|
||||||
|
|
||||||
|
def clean_name(model_name: str) -> str:
|
||||||
|
return model_name.split("/")[-1].translate(_clean_name)
|
||||||
|
|
|
@ -1,76 +1,131 @@
|
||||||
|
from enum import StrEnum
|
||||||
import gc
|
import gc
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from huggingface_hub import create_repo, login, upload_folder
|
from huggingface_hub import create_repo, upload_folder
|
||||||
from models import mclip, openclip
|
from export.models import mclip, openclip, insightface
|
||||||
|
from export.models.util import clean_name
|
||||||
from rich.progress import Progress
|
from rich.progress import Progress
|
||||||
|
import typer
|
||||||
|
|
||||||
models = [
|
|
||||||
"RN50::openai",
|
|
||||||
"RN50::yfcc15m",
|
|
||||||
"RN50::cc12m",
|
|
||||||
"RN101::openai",
|
|
||||||
"RN101::yfcc15m",
|
|
||||||
"RN50x4::openai",
|
|
||||||
"RN50x16::openai",
|
|
||||||
"RN50x64::openai",
|
|
||||||
"ViT-B-32::openai",
|
|
||||||
"ViT-B-32::laion2b_e16",
|
|
||||||
"ViT-B-32::laion400m_e31",
|
|
||||||
"ViT-B-32::laion400m_e32",
|
|
||||||
"ViT-B-32::laion2b-s34b-b79k",
|
|
||||||
"ViT-B-16::openai",
|
|
||||||
"ViT-B-16::laion400m_e31",
|
|
||||||
"ViT-B-16::laion400m_e32",
|
|
||||||
"ViT-B-16-plus-240::laion400m_e31",
|
|
||||||
"ViT-B-16-plus-240::laion400m_e32",
|
|
||||||
"ViT-L-14::openai",
|
|
||||||
"ViT-L-14::laion400m_e31",
|
|
||||||
"ViT-L-14::laion400m_e32",
|
|
||||||
"ViT-L-14::laion2b-s32b-b82k",
|
|
||||||
"ViT-L-14-336::openai",
|
|
||||||
"ViT-H-14::laion2b-s32b-b79k",
|
|
||||||
"ViT-g-14::laion2b-s12b-b42k",
|
|
||||||
"M-CLIP/LABSE-Vit-L-14",
|
|
||||||
"M-CLIP/XLM-Roberta-Large-Vit-B-32",
|
|
||||||
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
|
|
||||||
"M-CLIP/XLM-Roberta-Large-Vit-L-14",
|
|
||||||
]
|
|
||||||
|
|
||||||
login(token=os.environ["HF_AUTH_TOKEN"])
|
app = typer.Typer()
|
||||||
|
|
||||||
with Progress() as progress:
|
|
||||||
task1 = progress.add_task("[green]Exporting models...", total=len(models))
|
|
||||||
task2 = progress.add_task("[yellow]Uploading models...", total=len(models))
|
|
||||||
|
|
||||||
with TemporaryDirectory() as tmp:
|
class ModelLibrary(StrEnum):
|
||||||
tmpdir = Path(tmp)
|
MCLIP = "mclip"
|
||||||
for model in models:
|
OPENCLIP = "openclip"
|
||||||
model_name = model.split("/")[-1].replace("::", "__")
|
INSIGHTFACE = "insightface"
|
||||||
config_path = tmpdir / model_name / "config.json"
|
|
||||||
|
|
||||||
def upload() -> None:
|
|
||||||
progress.update(task2, description=f"[yellow]Uploading {model_name}")
|
|
||||||
repo_id = f"immich-app/{model_name}"
|
|
||||||
|
|
||||||
create_repo(repo_id, exist_ok=True)
|
def _export(model_name: str, library: ModelLibrary, export_dir: Path) -> None:
|
||||||
upload_folder(repo_id=repo_id, folder_path=tmpdir / model_name)
|
visual_dir = export_dir / "visual"
|
||||||
progress.update(task2, advance=1)
|
textual_dir = export_dir / "textual"
|
||||||
|
match library:
|
||||||
|
case ModelLibrary.MCLIP:
|
||||||
|
insightface.to_onnx(model_name, visual_dir, textual_dir)
|
||||||
|
case ModelLibrary.OPENCLIP:
|
||||||
|
mclip.to_onnx(model_name, visual_dir, textual_dir)
|
||||||
|
case ModelLibrary.INSIGHTFACE:
|
||||||
|
name, _, pretrained = model_name.partition("__")
|
||||||
|
openclip.to_onnx(openclip.OpenCLIPModelConfig(name, pretrained), visual_dir, textual_dir)
|
||||||
|
|
||||||
def export() -> None:
|
gc.collect()
|
||||||
progress.update(task1, description=f"[green]Exporting {model_name}")
|
|
||||||
visual_dir = tmpdir / model_name / "visual"
|
|
||||||
textual_dir = tmpdir / model_name / "textual"
|
|
||||||
if model.startswith("M-CLIP"):
|
|
||||||
mclip.to_onnx(model, visual_dir, textual_dir)
|
|
||||||
else:
|
|
||||||
name, _, pretrained = model_name.partition("__")
|
|
||||||
openclip.to_onnx(openclip.OpenCLIPModelConfig(name, pretrained), visual_dir, textual_dir)
|
|
||||||
|
|
||||||
progress.update(task1, advance=1)
|
|
||||||
gc.collect()
|
|
||||||
|
|
||||||
export()
|
def _upload(repo_id: str, upload_dir: Path, auth_token: str | None = os.environ.get("HF_AUTH_TOKEN", None)) -> None:
|
||||||
upload()
|
create_repo(repo_id, exist_ok=True, token=auth_token)
|
||||||
|
upload_folder(repo_id=repo_id, folder_path=upload_dir, token=auth_token)
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def export(
|
||||||
|
models: list[str] = typer.Argument(
|
||||||
|
..., help="The model(s) to be exported. Model names should be the same as used in the associated library."
|
||||||
|
),
|
||||||
|
library: ModelLibrary = typer.Option(
|
||||||
|
..., "--library", "-l", help="The library associated with the models to be exported."
|
||||||
|
),
|
||||||
|
output_dir: Optional[Path] = typer.Option(
|
||||||
|
None,
|
||||||
|
"--output-dir",
|
||||||
|
"-o",
|
||||||
|
help="Directory where exported models will be stored. Defaults to a temporary directory.",
|
||||||
|
),
|
||||||
|
should_upload: bool = typer.Option(False, "--upload", "-u", help="Whether to upload the exported models."),
|
||||||
|
auth_token: Optional[str] = typer.Option(
|
||||||
|
os.environ.get("HF_AUTH_TOKEN", None),
|
||||||
|
"--auth_token",
|
||||||
|
"-t",
|
||||||
|
help="If uploading models to Hugging Face, the auth token of the user or organisation.",
|
||||||
|
),
|
||||||
|
repo_prefix: str = typer.Option(
|
||||||
|
"immich-app",
|
||||||
|
"--repo_prefix",
|
||||||
|
"-p",
|
||||||
|
help="If uploading models to Hugging Face, the prefix to put before the model name. Can be a username or organisation.",
|
||||||
|
),
|
||||||
|
) -> None:
|
||||||
|
if not models:
|
||||||
|
raise ValueError("No models specified")
|
||||||
|
|
||||||
|
with Progress() as progress:
|
||||||
|
task1 = progress.add_task("[green]Exporting model(s)...", total=len(models))
|
||||||
|
|
||||||
|
with TemporaryDirectory() as tmp:
|
||||||
|
output_dir = output_dir if output_dir else Path(tmp)
|
||||||
|
for model_name in models:
|
||||||
|
cleaned_name = clean_name(model_name)
|
||||||
|
model_dir = output_dir / cleaned_name
|
||||||
|
progress.update(task1, description=f"[green]Exporting {cleaned_name}")
|
||||||
|
_export(model_name, library, model_dir)
|
||||||
|
progress.update(task1, advance=1, description=f"[green]Exported {cleaned_name}")
|
||||||
|
|
||||||
|
if should_upload:
|
||||||
|
upload(models, output_dir, auth_token, repo_prefix)
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def upload(
|
||||||
|
models: list[str] = typer.Argument(
|
||||||
|
..., help="The model(s) to be uploaded. Model names should be the same as used in the associated library."
|
||||||
|
),
|
||||||
|
output_dir: Optional[Path] = typer.Option(
|
||||||
|
None,
|
||||||
|
"--output-dir",
|
||||||
|
"-o",
|
||||||
|
help="Directory where exported models will be stored. Defaults to a temporary directory.",
|
||||||
|
),
|
||||||
|
auth_token: Optional[str] = typer.Option(
|
||||||
|
os.environ.get("HF_AUTH_TOKEN", None),
|
||||||
|
"--auth_token",
|
||||||
|
"-t",
|
||||||
|
help="The Hugging Face auth token of the user or organisation.",
|
||||||
|
),
|
||||||
|
repo_prefix: str = typer.Option(
|
||||||
|
"immich-app",
|
||||||
|
"--repo_prefix",
|
||||||
|
"-p",
|
||||||
|
help="The name to put before the model name to form the Hugging Face repo name. Can be a username or organisation.",
|
||||||
|
),
|
||||||
|
) -> None:
|
||||||
|
if not models:
|
||||||
|
raise ValueError("No models specified")
|
||||||
|
|
||||||
|
with Progress() as progress:
|
||||||
|
task2 = progress.add_task("[yellow]Uploading models...", total=len(models))
|
||||||
|
for model_name in models:
|
||||||
|
cleaned_name = clean_name(model_name)
|
||||||
|
repo_id = f"{repo_prefix}/{cleaned_name}"
|
||||||
|
model_dir = output_dir / cleaned_name
|
||||||
|
|
||||||
|
progress.update(task2, description=f"[yellow]Uploading {cleaned_name}")
|
||||||
|
_upload(repo_id, model_dir, auth_token)
|
||||||
|
progress.update(task2, advance=1, description=f"[yellow]Uploaded {cleaned_name}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app()
|
||||||
|
|
Loading…
Reference in a new issue