feat(nodes): add design doc

This commit is contained in:
psychedelicious
2023-05-20 02:39:46 +10:00
parent f92afaac7c
commit c7392e7948

View File

@@ -0,0 +1,380 @@
from abc import ABC, abstractmethod
from typing import Optional, Union
from PIL.Image import Image as PILImage
from pydantic import BaseModel, Field
from invokeai.app.models.image import ImageType
from invokeai.app.services.item_storage import PaginatedResults
"""
Substantial proposed changes to the management of images and latents.
tl;dr:
With the upcoming move to latents-only nodes, we need to handle metadata differently. After struggling with this unsuccessfully - trying to smoosh it in to the existing setup - I believe we need to expand the scope of the refactor to include the management of images and latents.
full story:
The consensus for latents-only nodes' metadata was to traverse the execution graph and grab the core parameters to write to the image. This was straightforward, and I've written functions to find the nearest t2l/l2l, noise, and compel nodes and build the metadata from those.
But struggling to integrate this and the associated edge cases this brought up a number of issues deeper in the system (some of which I had previously implemented). The ImageStorageService is doing way too much, and we have a need to be able to retrieve sessions the session given image/latent id, which is not currently feasible due to SQLite's JSON parsing performance.
I made a new ResultsService and `results` table in the db to facilitate this. This first attempt failed because it doesn't handle uploads and leaves the codebase messy.
So I've spent the day trying to figure out to handle this in a sane way and think I've got something decent. I've described some changes to service bases and the database below.
The gist of it is to store the core parameters for an image in its metadata when the image is saved, but never to read from it. Instead, the same metadata is stored in the database, which will be set up for efficient access. So when a page of images is requested, the metadata comes from the db instead of a filesystem operation.
The URL generation responsibilities have been split off the image storage service in to a URL service. New database services/tables for images and latents are added. These services will provide paginated images/latents for the API to serve. This also paves the way for handling latents as first-class outputs.
"""
# TODO: Make a new model for this
LatentsType = ImageType
"""
Core Generation Metadata Pydantic Model
I've already implemented the code to traverse a session to build this object.
"""
class CoreGenerationMetadata(BaseModel):
"""Core generation metadata for an image/latents generated in InvokeAI.
Generated by traversing the execution graph, collecting the parameters of the nearest ancestors of a given node.
Full metadata may be accessed by querying for the session in the `graph_executions` table.
"""
positive_conditioning: Optional[str] = Field(
description="The positive conditioning."
)
negative_conditioning: Optional[str] = Field(
description="The negative conditioning."
)
width: Optional[int] = Field(description="Width of the image/latents in pixels.")
height: Optional[int] = Field(description="Height of the image/latents in pixels.")
seed: Optional[int] = Field(description="The seed used for noise generation.")
cfg_scale: Optional[float] = Field(
description="The classifier-free guidance scale."
)
steps: Optional[int] = Field(description="The number of steps used for inference.")
scheduler: Optional[str] = Field(description="The scheduler used for inference.")
model: Optional[str] = Field(description="The model used for inference.")
strength: Optional[float] = Field(
description="The strength used for image-to-image/latents-to-latents."
)
image: Optional[str] = Field(description="The ID of the initial image.")
latents: Optional[str] = Field(description="The ID of the initial latents.")
# Pending model refactor:
# vae: Optional[str] = Field(description="The VAE used for decoding.")
# unet: Optional[str] = Field(description="The UNet used dor inference.")
# clip: Optional[str] = Field(description="The CLIP Encoder used for conditioning.")
"""
Minimal Uploads Metadata Model
"""
class UploadsMetadata(BaseModel):
"""Limited metadata for an uploaded image/latents."""
width: Optional[int] = Field(description="Width of the image/latents in pixels.")
height: Optional[int] = Field(description="Height of the image/latents in pixels.")
# The extra field will be the contents of the PNG file's tEXt chunk. It may have come
# from another SD application or InvokeAI, so we need to make it very flexible. I think it's
# best to just store it as a string and let the frontend parse it.
# If the upload is a latents type, this will be omitted.
extra: Optional[str] = Field(
description="Extra metadata, extracted from the PNG tEXt chunk."
)
"""
Slimmed-down Image Storage Service Base
- No longer lists images or generates URLs - only stores and retrieves images.
- OSS implementation for disk storage
"""
class ImageStorageBase(ABC):
"""Responsible for storing and retrieving images."""
@abstractmethod
def save(
self,
image: PILImage,
image_type: ImageType,
context_id: str,
node_id: str,
metadata: CoreGenerationMetadata,
) -> str:
"""Saves an image and its thumbnail, returning its unique identifier."""
pass
@abstractmethod
def get(self, id: str, thumbnail: bool = False) -> Union[PILImage, None]:
"""Retrieves an image as a PIL Image."""
pass
@abstractmethod
def delete(self, id: str) -> None:
"""Deletes an image."""
pass
"""
New Url Service Base
- Abstracts the logic for generating URLs out of the storage service
- OSS implementation for locally-hosted URLs
"""
class URLServiceBase(ABC):
"""Responsible for locating `image` and `latents` resources (and their thumbnails)."""
@abstractmethod
def get_url(self, id: str) -> str:
"""Gets the URL for a resource."""
pass
"""
New Images Database Service Base
This is a new service that will be responsible for the new `images` table(s):
- Storing images in the table
- Retrieving individual images and pages of images
- Deleting individual images
Operations will typically use joins with the various `images` tables.
"""
class ImagesDbServiceBase(ABC):
"""Responsible for interfacing with `images` table."""
class GeneratedImageEntity(BaseModel):
id: str = Field(description="The unique identifier for the image.")
session_id: str = Field(description="The session ID.")
node_id: str = Field(description="The node ID.")
metadata: CoreGenerationMetadata = Field(
description="The metadata for the image."
)
class UploadedImageEntity(BaseModel):
id: str = Field(description="The unique identifier for the image.")
metadata: UploadsMetadata = Field(description="The metadata for the image.")
@abstractmethod
def get(self, id: str) -> Union[GeneratedImageEntity, UploadedImageEntity, None]:
"""Gets an image from the `images` table."""
pass
@abstractmethod
def get_many(
self, image_type: ImageType, page: int = 0, per_page: int = 10
) -> PaginatedResults[Union[GeneratedImageEntity, UploadedImageEntity]]:
"""Gets a page of images from the `images` table."""
pass
@abstractmethod
def delete(self, id: str) -> None:
"""Deletes an image from the `images` table."""
pass
@abstractmethod
def set(
self,
id: str,
image_type: ImageType,
session_id: Optional[str],
node_id: Optional[str],
metadata: CoreGenerationMetadata | UploadsMetadata,
) -> None:
"""Sets an image in the `images` table."""
pass
"""
New Latents Database Service Base
This is a new service that will be responsible for the new `latents` table:
- Storing latents in the table
- Retrieving individual latents and pages of latents
- Deleting individual latents
Operations will always use joins with the `latents_metadata` table.
"""
class LatentsDbServiceBase(ABC):
"""Responsible for interfacing with `latents` table."""
class GeneratedLatentsEntity(BaseModel):
id: str = Field(description="The unique identifier for the latents.")
session_id: str = Field(description="The session ID.")
node_id: str = Field(description="The node ID.")
metadata: CoreGenerationMetadata = Field(
description="The metadata for the latents."
)
class UploadedLatentsEntity(BaseModel):
id: str = Field(description="The unique identifier for the latents.")
metadata: UploadsMetadata = Field(description="The metadata for the latents.")
@abstractmethod
def get(
self, id: str
) -> Union[GeneratedLatentsEntity, UploadedLatentsEntity, None]:
"""Gets a latents from the `latents` table."""
pass
@abstractmethod
def get_many(
self, latents_type: LatentsType, page: int = 0, per_page: int = 10
) -> PaginatedResults[Union[GeneratedLatentsEntity, UploadedLatentsEntity]]:
"""Gets a page of latents from the `latents` table."""
pass
@abstractmethod
def delete(self, id: str) -> None:
"""Deletes a latents from the `latents` table."""
pass
@abstractmethod
def set(
self,
id: str,
latents_type: LatentsType,
session_id: Optional[str],
node_id: Optional[str],
metadata: CoreGenerationMetadata | UploadsMetadata,
) -> None:
"""Sets a latents in the `latents` table."""
pass
"""
Database Changes
The existing tables will remain as-is, new tables will be added.
Latents now also have the same types as images - `results`, `intermediates`, `uploads`. Storage, retrieval, and operations may diverge from images in the future, so they are managed separately.
A few `images` tables are created to store all images:
- `results` and `intermediates` images have additional data: `session_id` and `node_id`, and may be further differentiated in the future. For this reason, they each get their own table.
- `uploads` do not get their own table, as they are never going to have more than an `id`, `image_type` and `timestamp`.
- `images_metadata` holds the same image metadata that is written to the image. This table, along with the URL service, allow us to more efficiently serve images without having to read the image from storage.
The same tables are made for `latents` and for the moment, implementation is expected to be identical.
Schemas for each table below.
Insertions and updates of ancillary tables (e.g. `results_images`, `images_metadata`, etc) will need to be done manually in the services, but should be straightforward. Deletion via cascading will be handled by the database.
"""
"""
`images` stores all images, regardless of type
"""
"""--sql
CREATE TABLE IF NOT EXISTS images (
id TEXT PRIMARY KEY,
image_type TEXT CHECK(type IN ('results', 'intermediates', 'uploads')),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_images ON images(id);
CREATE INDEX IF NOT EXISTS idx_images_image_type ON images(image_type);
"""
"""
`results_images` stores additional data specific to `results` images.
"""
"""--sql
CREATE TABLE IF NOT EXISTS results_images (
images_id TEXT PRIMARY KEY,
session_id TEXT NOT NULL,
node_id TEXT NOT NULL,
FOREIGN KEY(images_id) REFERENCES images(id) ON DELETE CASCADE
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_results_images_images_id ON results_images(id);
"""
"""
`intermediates_images` stores additional data specific to `intermediates` images
"""
"""--sql
CREATE TABLE IF NOT EXISTS intermediates_images (
images_id TEXT PRIMARY KEY,
session_id TEXT NOT NULL,
node_id TEXT NOT NULL,
FOREIGN KEY(images_id) REFERENCES images(id) ON DELETE CASCADE
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_intermediates_images_images_id ON intermediates_images(id);
"""
"""
`images_metadata` stores basic metadata for any image type
"""
"""--sql
CREATE TABLE IF NOT EXISTS images_metadata (
images_id TEXT PRIMARY KEY,
metadata TEXT,
FOREIGN KEY(images_id) REFERENCES images(id) ON DELETE CASCADE
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_images_metadata_images_id ON images_metadata(images_id);
"""
# `latents` table: stores references to latents
"""--sql
CREATE TABLE IF NOT EXISTS latents (
id TEXT PRIMARY KEY,
latents_type TEXT CHECK(type IN ('results', 'intermediates', 'uploads')),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_latents_id ON latents(id);
CREATE INDEX IF NOT EXISTS idx_latents_image_type ON latents(latents_type);
"""
# `results_latents` stores additional data specific to `result` latents
"""--sql
CREATE TABLE IF NOT EXISTS results_latents (
latents_id TEXT PRIMARY KEY,
session_id TEXT NOT NULL,
node_id TEXT NOT NULL,
FOREIGN KEY(latents_id) REFERENCES latents(id) ON DELETE CASCADE
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_results_latents_latents_id ON results_latents(latents_id);
"""
# `intermediates_latents` stores additional data specific to `intermediate` latents
"""--sql
CREATE TABLE IF NOT EXISTS intermediates_latents (
latents_id TEXT PRIMARY KEY,
session_id TEXT NOT NULL,
node_id TEXT NOT NULL,
FOREIGN KEY(latents_id) REFERENCES latents(id) ON DELETE CASCADE
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_intermediates_latents_latents_id ON intermediates_latents(latents_id);
"""
# `latents_metadata` table: stores generated/transformed metadata for latents
"""--sql
CREATE TABLE IF NOT EXISTS latents_metadata (
latents_id TEXT PRIMARY KEY,
metadata TEXT,
FOREIGN KEY(latents_id) REFERENCES latents(id) ON DELETE CASCADE
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_latents_metadata_latents_id ON latents_metadata(latents_id);
"""