mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-02-19 08:34:40 -05:00
* add basic functionality for model metadata fetching from hf and civitai * add storage * start unit tests * add unit tests and documentation * add missing dependency for pytests * remove redundant fetch; add modified/published dates; updated docs * add code to select diffusers files based on the variant type * implement Civitai installs * make huggingface parallel downloading work * add unit tests for model installation manager - Fixed race condition on selection of download destination path - Add fixtures common to several model_manager_2 unit tests - Added dummy model files for testing diffusers and safetensors downloading/probing - Refactored code for selecting proper variant from list of huggingface repo files - Regrouped ordering of methods in model_install_default.py * improve Civitai model downloading - Provide a better error message when Civitai requires an access token (doesn't give a 403 forbidden, but redirects to the HTML of an authorization page -- arrgh) - Handle case of Civitai providing a primary download link plus additional links for VAEs, config files, etc * add routes for retrieving metadata and tags * code tidying and documentation * fix ruff errors * add file needed to maintain test root diretory in repo for unit tests * fix self->cls in classmethod * add pydantic plugin for mypy * use TestSession instead of requests.Session to prevent any internet activity improve logging fix error message formatting fix logging again fix forward vs reverse slash issue in Windows install tests * Several fixes of problems detected during PR review: - Implement cancel_model_install_job and get_model_install_job routes to allow for better control of model download and install. - Fix thread deadlock that occurred after cancelling an install. - Remove unneeded pytest_plugins section from tests/conftest.py - Remove unused _in_terminal_state() from model_install_default. - Remove outdated documentation from several spots. - Add workaround for Civitai API results which don't return correct URL for the default model. * fix docs and tests to match get_job_by_source() rather than get_job() * Update invokeai/backend/model_manager/metadata/fetch/huggingface.py Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * Call CivitaiMetadata.model_validate_json() directly Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * Second round of revisions suggested by @ryanjdick: - Fix type mismatch in `list_all_metadata()` route. - Do not have a default value for the model install job id - Remove static class variable declarations from non Pydantic classes - Change `id` field to `model_id` for the sqlite3 `model_tags` table. - Changed AFTER DELETE triggers to ON DELETE CASCADE for the metadata and tags tables. - Made the `id` field of the `model_metadata` table into a primary key to achieve uniqueness. * Code cleanup suggested in PR review: - Narrowed the declaration of the `parts` attribute of the download progress event - Removed auto-conversion of str to Url in Url-containing sources - Fixed handling of `InvalidModelConfigException` - Made unknown sources raise `NotImplementedError` rather than `Exception` - Improved status reporting on cached HuggingFace access tokens * Multiple fixes: - `job.total_size` returns a valid size for locally installed models - new route `list_models` returns a paged summary of model, name, description, tags and other essential info - fix a few type errors * consolidated all invokeai root pytest fixtures into a single location * Update invokeai/backend/model_manager/metadata/metadata_store.py Co-authored-by: psychedelicious <4822129+psychedelicious@users.noreply.github.com> * Small tweaks in response to review comments: - Remove flake8 configuration from pyproject.toml - Use `id` rather than `modelId` for huggingface `ModelInfo` object - Use `last_modified` rather than `LastModified` for huggingface `ModelInfo` object - Add `sha256` field to file metadata downloaded from huggingface - Add `Invoker` argument to the model installer `start()` and `stop()` routines (but made it optional in order to facilitate use of the service outside the API) - Removed redundant `PRAGMA foreign_keys` from metadata store initialization code. * Additional tweaks and minor bug fixes - Fix calculation of aggregate diffusers model size to only count the size of files, not files + directories (which gives different unit test results on different filesystems). - Refactor _get_metadata() and _get_download_urls() to have distinct code paths for Civitai, HuggingFace and URL sources. - Forward the `inplace` flag from the source to the job and added unit test for this. - Attach cached model metadata to the job rather than to the model install service. * fix unit test that was breaking on windows due to CR/LF changing size of test json files * fix ruff formatting * a few last minor fixes before merging: - Turn job `error` and `error_type` into properties derived from the exception. - Add TODO comment about the reason for handling temporary directory destruction manually rather than using tempfile.tmpdir(). * add unit tests for reporting HTTP download errors --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> Co-authored-by: psychedelicious <4822129+psychedelicious@users.noreply.github.com>
133 lines
5.1 KiB
Python
133 lines
5.1 KiB
Python
# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Development Team
|
|
"""
|
|
Select the files from a HuggingFace repository needed for a particular model variant.
|
|
|
|
Usage:
|
|
```
|
|
from invokeai.backend.model_manager.util.select_hf_files import select_hf_model_files
|
|
from invokeai.backend.model_manager.metadata.fetch import HuggingFaceMetadataFetch
|
|
|
|
metadata = HuggingFaceMetadataFetch().from_url("https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0")
|
|
files_to_download = select_hf_model_files(metadata.files, variant='onnx')
|
|
```
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Set
|
|
|
|
from ..config import ModelRepoVariant
|
|
|
|
|
|
def filter_files(
|
|
files: List[Path],
|
|
variant: Optional[ModelRepoVariant] = None,
|
|
subfolder: Optional[Path] = None,
|
|
) -> List[Path]:
|
|
"""
|
|
Take a list of files in a HuggingFace repo root and return paths to files needed to load the model.
|
|
|
|
:param files: List of files relative to the repo root.
|
|
:param subfolder: Filter by the indicated subfolder.
|
|
:param variant: Filter by files belonging to a particular variant, such as fp16.
|
|
|
|
The file list can be obtained from the `files` field of HuggingFaceMetadata,
|
|
as defined in `invokeai.backend.model_manager.metadata.metadata_base`.
|
|
"""
|
|
variant = variant or ModelRepoVariant.DEFAULT
|
|
paths: List[Path] = []
|
|
|
|
# Start by filtering on model file extensions, discarding images, docs, etc
|
|
for file in files:
|
|
if file.name.endswith((".json", ".txt")):
|
|
paths.append(file)
|
|
elif file.name.endswith(("learned_embeds.bin", "ip_adapter.bin", "lora_weights.safetensors")):
|
|
paths.append(file)
|
|
# BRITTLENESS WARNING!!
|
|
# Diffusers models always seem to have "model" in their name, and the regex filter below is applied to avoid
|
|
# downloading random checkpoints that might also be in the repo. However there is no guarantee
|
|
# that a checkpoint doesn't contain "model" in its name, and no guarantee that future diffusers models
|
|
# will adhere to this naming convention, so this is an area of brittleness.
|
|
elif re.search(r"model(\.[^.]+)?\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
|
|
paths.append(file)
|
|
|
|
# limit search to subfolder if requested
|
|
if subfolder:
|
|
paths = [x for x in paths if x.parent == Path(subfolder)]
|
|
|
|
# _filter_by_variant uniquifies the paths and returns a set
|
|
return sorted(_filter_by_variant(paths, variant))
|
|
|
|
|
|
def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path]:
|
|
"""Select the proper variant files from a list of HuggingFace repo_id paths."""
|
|
result = set()
|
|
basenames: Dict[Path, Path] = {}
|
|
for path in files:
|
|
if path.suffix == ".onnx":
|
|
if variant == ModelRepoVariant.ONNX:
|
|
result.add(path)
|
|
|
|
elif "openvino_model" in path.name:
|
|
if variant == ModelRepoVariant.OPENVINO:
|
|
result.add(path)
|
|
|
|
elif "flax_model" in path.name:
|
|
if variant == ModelRepoVariant.FLAX:
|
|
result.add(path)
|
|
|
|
elif path.suffix in [".json", ".txt"]:
|
|
result.add(path)
|
|
|
|
elif path.suffix in [".bin", ".safetensors", ".pt", ".ckpt"] and variant in [
|
|
ModelRepoVariant.FP16,
|
|
ModelRepoVariant.FP32,
|
|
ModelRepoVariant.DEFAULT,
|
|
]:
|
|
parent = path.parent
|
|
suffixes = path.suffixes
|
|
if len(suffixes) == 2:
|
|
variant_label, suffix = suffixes
|
|
basename = parent / Path(path.stem).stem
|
|
else:
|
|
variant_label = ""
|
|
suffix = suffixes[0]
|
|
basename = parent / path.stem
|
|
|
|
if previous := basenames.get(basename):
|
|
if (
|
|
previous.suffix != ".safetensors" and suffix == ".safetensors"
|
|
): # replace non-safetensors with safetensors when available
|
|
basenames[basename] = path
|
|
if variant_label == f".{variant}":
|
|
basenames[basename] = path
|
|
elif not variant_label and variant in [ModelRepoVariant.FP32, ModelRepoVariant.DEFAULT]:
|
|
basenames[basename] = path
|
|
else:
|
|
basenames[basename] = path
|
|
|
|
else:
|
|
continue
|
|
|
|
for v in basenames.values():
|
|
result.add(v)
|
|
|
|
# If one of the architecture-related variants was specified and no files matched other than
|
|
# config and text files then we return an empty list
|
|
if (
|
|
variant
|
|
and variant in [ModelRepoVariant.ONNX, ModelRepoVariant.OPENVINO, ModelRepoVariant.FLAX]
|
|
and not any(variant.value in x.name for x in result)
|
|
):
|
|
return set()
|
|
|
|
# Prune folders that contain just a `config.json`. This happens when
|
|
# the requested variant (e.g. "onnx") is missing
|
|
directories: Dict[Path, int] = {}
|
|
for x in result:
|
|
if not x.parent:
|
|
continue
|
|
directories[x.parent] = directories.get(x.parent, 0) + 1
|
|
|
|
return {x for x in result if directories[x.parent] > 1 or x.name != "config.json"}
|