Compare commits

...

2 commits

Author SHA1 Message Date
mertalev
683bb88f8b
updated dockerfile 2023-11-12 18:03:08 -05:00
mertalev
ae80def7f2
export cli 2023-11-12 18:01:12 -05:00
6 changed files with 137 additions and 74 deletions

View file

@ -14,8 +14,7 @@ RUN micromamba install -y -n base -f /tmp/conda-lock.yml && \
WORKDIR /usr/src/app WORKDIR /usr/src/app
COPY --chown=$MAMBA_USER:$MAMBA_USER start.sh . COPY --chown=$MAMBA_USER:$MAMBA_USER export .
COPY --chown=$MAMBA_USER:$MAMBA_USER app .
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"] ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
CMD ["./start.sh"] CMD ["python -m run"]

View file

View file

@ -0,0 +1,9 @@
from export.models.openclip import OpenCLIPModelConfig
MCLIP_TO_OPENCLIP = {
"XLM-Roberta-Large-Vit-B-32": OpenCLIPModelConfig("ViT-B-32", "openai"),
"XLM-Roberta-Large-Vit-B-16Plus": OpenCLIPModelConfig("ViT-B-16-plus-240", "laion400m_e32"),
"LABSE-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"),
"XLM-Roberta-Large-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"),
}

View file

@ -1,22 +1,15 @@
import tempfile import tempfile
import warnings import warnings
from pathlib import Path from pathlib import Path
from export.models.constants import MCLIP_TO_OPENCLIP
import torch import torch
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
from transformers import AutoTokenizer from transformers import AutoTokenizer
from .openclip import OpenCLIPModelConfig
from .openclip import to_onnx as openclip_to_onnx from .openclip import to_onnx as openclip_to_onnx
from .optimize import optimize from .optimize import optimize
from .util import get_model_path from .util import get_model_path, clean_name
_MCLIP_TO_OPENCLIP = {
"M-CLIP/XLM-Roberta-Large-Vit-B-32": OpenCLIPModelConfig("ViT-B-32", "openai"),
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus": OpenCLIPModelConfig("ViT-B-16-plus-240", "laion400m_e32"),
"M-CLIP/LABSE-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"),
"M-CLIP/XLM-Roberta-Large-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"),
}
def to_onnx( def to_onnx(
@ -33,7 +26,7 @@ def to_onnx(
param.requires_grad_(False) param.requires_grad_(False)
export_text_encoder(model, textual_path) export_text_encoder(model, textual_path)
openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual) openclip_to_onnx(MCLIP_TO_OPENCLIP[clean_name(model_name)], output_dir_visual)
optimize(textual_path) optimize(textual_path)

View file

@ -3,6 +3,9 @@ from pathlib import Path
from typing import Any from typing import Any
_clean_name = str.maketrans(":\\/", "___", ".")
def get_model_path(output_dir: Path | str) -> Path: def get_model_path(output_dir: Path | str) -> Path:
output_dir = Path(output_dir) output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
@ -13,3 +16,7 @@ def save_config(config: Any, output_path: Path | str) -> None:
output_path = Path(output_path) output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)
json.dump(config, output_path.open("w")) json.dump(config, output_path.open("w"))
def clean_name(model_name: str) -> str:
return model_name.split("/")[-1].translate(_clean_name)

View file

@ -1,76 +1,131 @@
from enum import StrEnum
import gc import gc
import os import os
from pathlib import Path from pathlib import Path
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from typing import Optional
from huggingface_hub import create_repo, login, upload_folder from huggingface_hub import create_repo, upload_folder
from models import mclip, openclip from export.models import mclip, openclip, insightface
from export.models.util import clean_name
from rich.progress import Progress from rich.progress import Progress
import typer
models = [
"RN50::openai",
"RN50::yfcc15m",
"RN50::cc12m",
"RN101::openai",
"RN101::yfcc15m",
"RN50x4::openai",
"RN50x16::openai",
"RN50x64::openai",
"ViT-B-32::openai",
"ViT-B-32::laion2b_e16",
"ViT-B-32::laion400m_e31",
"ViT-B-32::laion400m_e32",
"ViT-B-32::laion2b-s34b-b79k",
"ViT-B-16::openai",
"ViT-B-16::laion400m_e31",
"ViT-B-16::laion400m_e32",
"ViT-B-16-plus-240::laion400m_e31",
"ViT-B-16-plus-240::laion400m_e32",
"ViT-L-14::openai",
"ViT-L-14::laion400m_e31",
"ViT-L-14::laion400m_e32",
"ViT-L-14::laion2b-s32b-b82k",
"ViT-L-14-336::openai",
"ViT-H-14::laion2b-s32b-b79k",
"ViT-g-14::laion2b-s12b-b42k",
"M-CLIP/LABSE-Vit-L-14",
"M-CLIP/XLM-Roberta-Large-Vit-B-32",
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
"M-CLIP/XLM-Roberta-Large-Vit-L-14",
]
login(token=os.environ["HF_AUTH_TOKEN"]) app = typer.Typer()
with Progress() as progress:
task1 = progress.add_task("[green]Exporting models...", total=len(models))
task2 = progress.add_task("[yellow]Uploading models...", total=len(models))
with TemporaryDirectory() as tmp: class ModelLibrary(StrEnum):
tmpdir = Path(tmp) MCLIP = "mclip"
for model in models: OPENCLIP = "openclip"
model_name = model.split("/")[-1].replace("::", "__") INSIGHTFACE = "insightface"
config_path = tmpdir / model_name / "config.json"
def upload() -> None:
progress.update(task2, description=f"[yellow]Uploading {model_name}")
repo_id = f"immich-app/{model_name}"
create_repo(repo_id, exist_ok=True) def _export(model_name: str, library: ModelLibrary, export_dir: Path) -> None:
upload_folder(repo_id=repo_id, folder_path=tmpdir / model_name) visual_dir = export_dir / "visual"
progress.update(task2, advance=1) textual_dir = export_dir / "textual"
match library:
case ModelLibrary.MCLIP:
insightface.to_onnx(model_name, visual_dir, textual_dir)
case ModelLibrary.OPENCLIP:
mclip.to_onnx(model_name, visual_dir, textual_dir)
case ModelLibrary.INSIGHTFACE:
name, _, pretrained = model_name.partition("__")
openclip.to_onnx(openclip.OpenCLIPModelConfig(name, pretrained), visual_dir, textual_dir)
def export() -> None: gc.collect()
progress.update(task1, description=f"[green]Exporting {model_name}")
visual_dir = tmpdir / model_name / "visual"
textual_dir = tmpdir / model_name / "textual"
if model.startswith("M-CLIP"):
mclip.to_onnx(model, visual_dir, textual_dir)
else:
name, _, pretrained = model_name.partition("__")
openclip.to_onnx(openclip.OpenCLIPModelConfig(name, pretrained), visual_dir, textual_dir)
progress.update(task1, advance=1)
gc.collect()
export() def _upload(repo_id: str, upload_dir: Path, auth_token: str | None = os.environ.get("HF_AUTH_TOKEN", None)) -> None:
upload() create_repo(repo_id, exist_ok=True, token=auth_token)
upload_folder(repo_id=repo_id, folder_path=upload_dir, token=auth_token)
@app.command()
def export(
models: list[str] = typer.Argument(
..., help="The model(s) to be exported. Model names should be the same as used in the associated library."
),
library: ModelLibrary = typer.Option(
..., "--library", "-l", help="The library associated with the models to be exported."
),
output_dir: Optional[Path] = typer.Option(
None,
"--output-dir",
"-o",
help="Directory where exported models will be stored. Defaults to a temporary directory.",
),
should_upload: bool = typer.Option(False, "--upload", "-u", help="Whether to upload the exported models."),
auth_token: Optional[str] = typer.Option(
os.environ.get("HF_AUTH_TOKEN", None),
"--auth_token",
"-t",
help="If uploading models to Hugging Face, the auth token of the user or organisation.",
),
repo_prefix: str = typer.Option(
"immich-app",
"--repo_prefix",
"-p",
help="If uploading models to Hugging Face, the prefix to put before the model name. Can be a username or organisation.",
),
) -> None:
if not models:
raise ValueError("No models specified")
with Progress() as progress:
task1 = progress.add_task("[green]Exporting model(s)...", total=len(models))
with TemporaryDirectory() as tmp:
output_dir = output_dir if output_dir else Path(tmp)
for model_name in models:
cleaned_name = clean_name(model_name)
model_dir = output_dir / cleaned_name
progress.update(task1, description=f"[green]Exporting {cleaned_name}")
_export(model_name, library, model_dir)
progress.update(task1, advance=1, description=f"[green]Exported {cleaned_name}")
if should_upload:
upload(models, output_dir, auth_token, repo_prefix)
@app.command()
def upload(
models: list[str] = typer.Argument(
..., help="The model(s) to be uploaded. Model names should be the same as used in the associated library."
),
output_dir: Optional[Path] = typer.Option(
None,
"--output-dir",
"-o",
help="Directory where exported models will be stored. Defaults to a temporary directory.",
),
auth_token: Optional[str] = typer.Option(
os.environ.get("HF_AUTH_TOKEN", None),
"--auth_token",
"-t",
help="The Hugging Face auth token of the user or organisation.",
),
repo_prefix: str = typer.Option(
"immich-app",
"--repo_prefix",
"-p",
help="The name to put before the model name to form the Hugging Face repo name. Can be a username or organisation.",
),
) -> None:
if not models:
raise ValueError("No models specified")
with Progress() as progress:
task2 = progress.add_task("[yellow]Uploading models...", total=len(models))
for model_name in models:
cleaned_name = clean_name(model_name)
repo_id = f"{repo_prefix}/{cleaned_name}"
model_dir = output_dir / cleaned_name
progress.update(task2, description=f"[yellow]Uploading {cleaned_name}")
_upload(repo_id, model_dir, auth_token)
progress.update(task2, advance=1, description=f"[yellow]Uploaded {cleaned_name}")
if __name__ == "__main__":
app()