mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-04-23 03:00:31 -04:00
first draft of normalized model manager p-o-p
This commit is contained in:
142
invokeai/backend/normalized_mm/README.md
Normal file
142
invokeai/backend/normalized_mm/README.md
Normal file
@@ -0,0 +1,142 @@
|
||||
# Normalized Model Manager
|
||||
|
||||
This is proof-of-principle code that refactors model storage to be
|
||||
more space efficient. The driving observation is that there is a
|
||||
significant amount of redundancy in Stable Diffusion models. For
|
||||
example, the VAE and text encoders are frequently the same across
|
||||
multiple models derived from the same base models.
|
||||
|
||||
Components:
|
||||
|
||||
1. Blob folder located in invokeai/models/blob
|
||||
|
||||
This folder contains a series of subfolders with randomly-assigned
|
||||
UUIDs. Each subfolder contains the contents of a diffusers folder, e.g.
|
||||
the .json and .safetensors files.
|
||||
|
||||
2. Database located in invokeai/databases/models.db
|
||||
|
||||
This is a database that describes what each model is, and maps
|
||||
diffusion pipelines (main models) to the models that compose it.
|
||||
|
||||
## Database tables
|
||||
|
||||
This illustrates the database schema.
|
||||
|
||||
### MODEL_PATH
|
||||
|
||||
This provides the type and path of each fundamental model. The type
|
||||
can be any of the ModelType enums, including clip_vision, etc.
|
||||
|
||||
| **ID** | **TYPE** | **REFCOUNT** | **PATH** |
|
||||
|-------------|---------------|--------------|----------|
|
||||
| 1 | `vae` | 3 | /opt/invokeai/models/blob/abc-def-012 |
|
||||
| 2 | `text_encoder`| 2 | /opt/invokeai/models/blob/482-abc-321 |
|
||||
| 3 | `unet` | 1 | /opt/invokeai/models/blob/839-dea-444 |
|
||||
| 4 | `safety_checker`| 3 | /opt/invokeai/models/blob/982-472-a9e|
|
||||
| 5 | `lora` | 1 | /opt/invokeai/models/blob/111-222-333|
|
||||
|
||||
Refcount indicates how many pipelines the fundamental is being shared with.
|
||||
|
||||
### MODEL_NAME
|
||||
|
||||
Holds name and description of the model. Note that an anonymous model
|
||||
that is a component of a pipeline does not need to have any metadata.
|
||||
|
||||
| **ID** | **NAME** | **SOURCE** | **DESCRIPTION** |
|
||||
|-------------|------------|-------------|-----------------|
|
||||
| 5 | LoWRA | stabilityai/lowra | LoWRA adapted for low light renderings |
|
||||
|
||||
|
||||
### MODEL_BASE
|
||||
|
||||
This maps the model UUID to the base model types supported. Some
|
||||
fundamental models, such as unets, only support a single base. Others,
|
||||
such as sd-1 VAEs, support more than one, and others, such as the
|
||||
beloved safety checker, support all model bases (type "any"). So
|
||||
this table supports one-to-many relationships.
|
||||
|
||||
| **ID** | **BASE** |
|
||||
|-----------|-------------|
|
||||
| 1 | sd-1 |
|
||||
| 1 | sd-2 |
|
||||
| 2 | sd-1 |
|
||||
| 3 | sd-1 |
|
||||
| 4 | any |
|
||||
| 5 | sd-1 |
|
||||
|
||||
### PIPELINE
|
||||
|
||||
This is a table of pipeline models. The `toc` field holds the
|
||||
`index.json` file contained at the top level of a diffusers folder. It
|
||||
is there only for the purpose of exporting a working diffusers
|
||||
pipeline folder.
|
||||
|
||||
|
||||
| **ID** | **NAME** | **BASE** | **TOC**|
|
||||
|---------|----------------------|----------|----------|
|
||||
| 1 | stable-diffusion-1-5 | sd-1 |/opt/invokeai/models/blob/439-aaf-232.json |
|
||||
| 2 | stable-diffusion-2-1 | sd-2 |/opt/invokeai/models/blob/868-212-11f.json |
|
||||
|
||||
### PIPELINE_PARTS
|
||||
|
||||
This table describes how to put the fundamental models together in
|
||||
order to reconstruct the original pipeline.
|
||||
|
||||
| **PIPELINE_ID** | **PART_ID** | **PART_FOLDER** |
|
||||
|------------------|-------------|-------------------|
|
||||
| 1 | 1 | `vae` |
|
||||
| 1 | 2 | `text_encoder` |
|
||||
| 1 | 3 | `unet` |
|
||||
|
||||
## Initializing the normalized model manager
|
||||
|
||||
Initialization will look something like this:
|
||||
|
||||
```
|
||||
from invokeai.backend.normalized_mm import normalized_model_manager
|
||||
from invokeai.app.services.config import InvokeAIAppConfig
|
||||
|
||||
config = InvokeAIAppConfig.get_config()
|
||||
config.parse_args()
|
||||
nmm = normalized_model_manager(config)
|
||||
```
|
||||
|
||||
## Saving a model to the database
|
||||
|
||||
Pass the path to a diffusers model or safetensors file. "main"
|
||||
safetensors will be converted to diffusers behind the scenes.
|
||||
|
||||
```
|
||||
id = nmm.import('/path/to/folder')
|
||||
id = nmm.import('/path/to/file.safetensors')
|
||||
```
|
||||
|
||||
## Fetching a model
|
||||
|
||||
To fetch a fundamental model, use its name and type:
|
||||
|
||||
```
|
||||
model_info = nmm.get_model_by_name(name='LoWRA', type='lora')
|
||||
print(model_info.path)
|
||||
print(model_info.description)
|
||||
```
|
||||
|
||||
To fetch part of a pipeline, use its name, base and the submodel
|
||||
desired:
|
||||
|
||||
```
|
||||
model_info = nmm.get_pipeline_by_name(name='stable-diffusion-1-5', base='sd-1', submodel='vae')
|
||||
print(model_info.path)
|
||||
```
|
||||
|
||||
## Exporting a model
|
||||
|
||||
To export a model back into its native format (diffusers for main, safetensors for other types), use `export`:
|
||||
|
||||
```
|
||||
nmm.export(name='stable-diffusion-1-5', base='sd-1', destination='/path/to/export/folder')
|
||||
```
|
||||
|
||||
The model will be exported to the indicated folder with the name `stable-diffusion-1-5`.
|
||||
|
||||
0
invokeai/backend/normalized_mm/__init__.py
Normal file
0
invokeai/backend/normalized_mm/__init__.py
Normal file
64
invokeai/backend/normalized_mm/hash.py
Normal file
64
invokeai/backend/normalized_mm/hash.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Development Team
|
||||
"""
|
||||
Fast hashing of diffusers and checkpoint-style models.
|
||||
|
||||
Usage:
|
||||
from invokeai.backend.model_managre.model_hash import FastModelHash
|
||||
>>> FastModelHash.hash('/home/models/stable-diffusion-v1.5')
|
||||
'a8e693a126ea5b831c96064dc569956f'
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, Union
|
||||
|
||||
from imohash import hashfile
|
||||
|
||||
from invokeai.backend.model_management.models import InvalidModelException
|
||||
|
||||
|
||||
class FastModelHash(object):
|
||||
"""FastModelHash obect provides one public class method, hash()."""
|
||||
|
||||
@classmethod
|
||||
def hash(cls, model_location: Union[str, Path]) -> str:
|
||||
"""
|
||||
Return hexdigest string for model located at model_location.
|
||||
|
||||
:param model_location: Path to the model
|
||||
"""
|
||||
model_location = Path(model_location)
|
||||
if model_location.is_file():
|
||||
return cls._hash_file(model_location)
|
||||
elif model_location.is_dir():
|
||||
return cls._hash_dir(model_location)
|
||||
else:
|
||||
raise InvalidModelException(f"Not a valid file or directory: {model_location}")
|
||||
|
||||
@classmethod
|
||||
def _hash_file(cls, model_location: Union[str, Path]) -> str:
|
||||
"""
|
||||
Fasthash a single file and return its hexdigest.
|
||||
|
||||
:param model_location: Path to the model file
|
||||
"""
|
||||
# we return md5 hash of the filehash to make it shorter
|
||||
# cryptographic security not needed here
|
||||
return hashlib.md5(hashfile(model_location)).hexdigest()
|
||||
|
||||
@classmethod
|
||||
def _hash_dir(cls, model_location: Union[str, Path]) -> str:
|
||||
components: Dict[str, str] = {}
|
||||
|
||||
for root, dirs, files in os.walk(model_location):
|
||||
for file in files:
|
||||
path = (Path(root) / file).as_posix()
|
||||
fast_hash = cls._hash_file(path)
|
||||
components.update({path: fast_hash})
|
||||
|
||||
# hash all the model hashes together, using alphabetic file order
|
||||
md5 = hashlib.md5()
|
||||
for path, fast_hash in sorted(components.items()):
|
||||
md5.update(fast_hash.encode("utf-8"))
|
||||
return md5.hexdigest()
|
||||
274
invokeai/backend/normalized_mm/normalized_model_manager.py
Normal file
274
invokeai/backend/normalized_mm/normalized_model_manager.py
Normal file
@@ -0,0 +1,274 @@
|
||||
import sqlite3
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from invokeai.app.services.config import InvokeAIAppConfig
|
||||
from shutils import copy
|
||||
from typing import Optional, Dict, Set
|
||||
from uuid import uuid4
|
||||
from .hash import FastModelHash
|
||||
from ..model_management.model_probe import ModelProbe
|
||||
from ..model_management import (BaseModelType, ModelType)
|
||||
|
||||
# this should be derived from the modeltype enum
|
||||
MODEL_TYPES = {'vae', 'lora', 'controlnet', 'embedding',
|
||||
'ip_adapter', 'clip_vision', 't2i_adapter',
|
||||
'text_encoder', 'scheduler', 'tokenizer',
|
||||
'unet',
|
||||
}
|
||||
MODEL_SQL_ENUM = ','.join([f'"{x}"' for x in MODEL_TYPES])
|
||||
|
||||
BASE_TYPES = {'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner'}
|
||||
BASE_SQL_ENUM = ','.join([f'"{x}"' for x in BASE_TYPES])
|
||||
|
||||
@dataclass
|
||||
class ModelPart:
|
||||
type: ModelType
|
||||
path: Path
|
||||
|
||||
@dataclass
|
||||
class ModelConfig:
|
||||
name: str
|
||||
description: str
|
||||
base_models: Set[BaseModelType]
|
||||
parts: Dict[str, ModelPart]
|
||||
|
||||
class NormalizedModelManager():
|
||||
|
||||
_conn: sqlite3.Connection
|
||||
_cursor: sqlite3.Cursor
|
||||
_blob_directory: Path
|
||||
|
||||
def __init__(self, config=InvokeAIAppConfig):
|
||||
database_file = config.db_path.parent / 'normalized_models.db'
|
||||
Path(database_file).parent.mkdir(parents=True, exist_ok=True)
|
||||
self._conn = sqlite3.connect(database_file, check_same_thread=True)
|
||||
self._conn.isolation_level = 'DEFERRED'
|
||||
self._cursor = self._conn.cursor()
|
||||
self._blob_directory = config.root_path / 'model_blobs'
|
||||
self._blob_directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self._conn.execute("PRAGMA foreign_keys = ON;")
|
||||
self._create_tables()
|
||||
self._conn.commit()
|
||||
|
||||
def ingest_simple_model(self, model_path: Path) -> int:
|
||||
"""Insert a simple one-part model, returning its ID."""
|
||||
model_name = model_path.stem
|
||||
model_hash = FastModelHash.hash(model_path)
|
||||
|
||||
try:
|
||||
# retrieve or create the single part that goes into this model
|
||||
part_id = self._lookup_part_by_hash(model_hash) or self._install_part(model_hash, model_path)
|
||||
|
||||
# create the model name/source entry
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
INSERT INTO model_name (
|
||||
name, source, description, is_pipeline
|
||||
)
|
||||
VALUES (?, ?, ?, 0);
|
||||
""",
|
||||
(model_name, model_path, f"Imported model {model_name}"),
|
||||
)
|
||||
|
||||
# associate the part with the model
|
||||
model_id = self._cursor.lastrowid
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
INSERT INTO model_parts (
|
||||
model_id, part_id
|
||||
)
|
||||
VALUES (?, ?);
|
||||
""",
|
||||
(model_id, part_id,),
|
||||
)
|
||||
self._conn.commit()
|
||||
except sqlite3.Error as e:
|
||||
self._conn.rollback()
|
||||
raise e
|
||||
|
||||
return model_id
|
||||
|
||||
def ingest_pipeline_model(self, model_path: Path) -> int:
|
||||
pass
|
||||
|
||||
|
||||
# in this p-o-p implementation, we assume that the model name is unique
|
||||
def get_model(self, name: str) -> Optional[ModelConfig]:
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
SELECT a.source, a.description, c.type, b.part_name, b.path, d.base
|
||||
FROM model_name as a,
|
||||
model_parts as b,
|
||||
model_part as c,
|
||||
model_base as d,
|
||||
WHERE a.model_id=?
|
||||
AND a.model_id=b.model_id
|
||||
AND b.part_id=c.part_id
|
||||
AND b.part_id=d.part_id;
|
||||
""",
|
||||
(name,),
|
||||
)
|
||||
rows = self._cursor.fetchall()
|
||||
if len(rows) == 0:
|
||||
return None
|
||||
|
||||
bases: Set[BaseModelType] = {
|
||||
BaseModelType(x['base']) for x in rows
|
||||
}
|
||||
parts: Dict[str, ModelPart] = {
|
||||
x['part_name']: ModelPart(
|
||||
type=ModelType(x['type']),
|
||||
path=Path(x['path'])
|
||||
) for x in rows
|
||||
}
|
||||
|
||||
return ModelConfig(
|
||||
name=name,
|
||||
description=rows[0]['description'],
|
||||
base_models=bases,
|
||||
parts=parts
|
||||
)
|
||||
|
||||
def _lookup_part_by_hash(self, hash: str) -> Optional[int]:
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
SELECT part_id from model_part
|
||||
WHERE hash=?;
|
||||
""",
|
||||
(hash,),
|
||||
)
|
||||
rows = self._cursor.fetchone()
|
||||
if not rows:
|
||||
return None
|
||||
return rows[0]
|
||||
|
||||
# may raise an exception
|
||||
def _install_part(self, model_hash: str, model_path: Path) -> int:
|
||||
model_info = ModelProbe.probe(model_path)
|
||||
model_type = model_info.model_type
|
||||
model_base = model_info.base_type
|
||||
model_bases = set()
|
||||
|
||||
# hardcoded logic to test multiple base type compatibility
|
||||
if model_type == ModelType('vae') and model_base == BaseModelType('sd-1'):
|
||||
model_bases = {'sd-1', 'sd-2'}
|
||||
elif model_base == BaseModelType('any'):
|
||||
model_bases = BASE_TYPES
|
||||
else:
|
||||
model_bases = {model_base}
|
||||
|
||||
# make the storage name slightly easier to interpret
|
||||
blob_name = model_type.value + '-' + uuid4()
|
||||
if model_path.is_file() and model_path.suffix:
|
||||
blob_name += model_path.suffix
|
||||
|
||||
destination = self._blob_directory / blob_name
|
||||
assert not blob_name.exists(), f"a path named {destination} already exists"
|
||||
copy(model_path, destination)
|
||||
|
||||
# create entry in the model_path table
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
INSERT INTO model_part (
|
||||
type, hash, path
|
||||
)
|
||||
VALUES (?, ?, ?);
|
||||
""",
|
||||
(model_type.value, model_hash, destination),
|
||||
)
|
||||
|
||||
# id of the inserted row
|
||||
part_id = self._cursor.lastrowid
|
||||
|
||||
# create base compatibility info
|
||||
for base in model_bases:
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
INSERT INTO model_base (id, base)
|
||||
VALUES (?, ?);
|
||||
""",
|
||||
(part_id, base),
|
||||
)
|
||||
|
||||
return part_id
|
||||
|
||||
def _create_tables(self):
|
||||
self._cursor.execute(
|
||||
f"""--sql
|
||||
CREATE TABLE IF NOT EXISTS model_part (
|
||||
part_id INTEGER PRIMARY KEY,
|
||||
type TEXT CHECK( type IN ({MODEL_SQL_ENUM}) ) NOT NULL,
|
||||
hash TEXT UNIQUE,
|
||||
refcount INTEGER NOT NULL DEFAULT '0',
|
||||
path TEXT NOT NULL
|
||||
);
|
||||
"""
|
||||
)
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
CREATE TABLE IF NOT EXISTS model_name (
|
||||
model_id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
source TEXT,
|
||||
description TEXT,
|
||||
is_pipeline BOOLEAN NOT NULL DEFAULT '0',
|
||||
table_of_contents TEXT, -- this is the contents of model_index.json
|
||||
UNIQUE(name)
|
||||
);
|
||||
"""
|
||||
)
|
||||
self._cursor.execute(
|
||||
f"""--sql
|
||||
CREATE TABLE IF NOT EXISTS model_base (
|
||||
part_id TEXT NOT NULL,
|
||||
base TEXT CHECK( base in ({BASE_SQL_ENUM}) ) NOT NULL,
|
||||
FOREIGN KEY(part_id) REFERENCES model_part(part_id),
|
||||
unique(id,base)
|
||||
);
|
||||
"""
|
||||
)
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
CREATE TABLE IF NOT EXISTS model_parts (
|
||||
model_id INTEGER NOT NULL,
|
||||
part_id INTEGER NOT NULL,
|
||||
part_name TEXT DEFAULT 'root',
|
||||
FOREIGN KEY(model_id) REFERENCES model_name(model_id),
|
||||
FOREIGN KEY(part_id) REFERENCES model_part(part_id),
|
||||
unique(model_id, part_id)
|
||||
);
|
||||
"""
|
||||
)
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
CREATE TRIGGER IF NOT EXISTS insert_model_refcount
|
||||
AFTER INSERT
|
||||
ON model_parts FOR EACH ROW
|
||||
BEGIN
|
||||
UPDATE model_part SET refcount=refcount+1 WHERE model_part.part_id=new.part_id;
|
||||
END;
|
||||
"""
|
||||
)
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
CREATE TRIGGER IF NOT EXISTS delete_model_refcount
|
||||
AFTER DELETE
|
||||
ON model_parts FOR EACH ROW
|
||||
BEGIN
|
||||
UPDATE model_part SET refcount=refcount-1 WHERE model_part.part_id=old.part_id;
|
||||
END;
|
||||
"""
|
||||
)
|
||||
self._cursor.execute(
|
||||
"""--sql
|
||||
CREATE TRIGGER IF NOT EXISTS update_model_refcount
|
||||
AFTER UPDATE
|
||||
ON model_parts FOR EACH ROW
|
||||
BEGIN
|
||||
UPDATE model_part SET refcount=refcount-1 WHERE model_part.part_id=old.part_id;
|
||||
UPDATE model_part SET refcount=refcount+1 WHERE model_part.part_id=new.part_id;
|
||||
END;
|
||||
"""
|
||||
)
|
||||
Reference in New Issue
Block a user