Enable LoRAPatcher.apply_smart_lora_patches(...) throughout the stack.

(minor) Rename num_layers -> num_loras in unit tests.
Add test_apply_smart_lora_patches_to_partially_loaded_model(...).
2026-01-21 07:28:06 -05:00 · 2024-12-12 22:41:50 +00:00 · 2024-12-12 22:41:50 +00:00 · 2024-12-12 22:41:50 +00:00 · 2024-12-12 22:41:50 +00:00 · 2024-12-12 22:41:46 +00:00
107 changed files with 3552 additions and 1688 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,29 +2,42 @@

 ## Builder stage

-FROM library/ubuntu:23.04 AS builder
+FROM library/ubuntu:24.04 AS builder

 ARG DEBIAN_FRONTEND=noninteractive
 RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt update && apt-get install -y \
-        git \
-        python3-venv \
-        python3-pip \
-        build-essential
+        build-essential \
+        git

-ENV INVOKEAI_SRC=/opt/invokeai
-ENV VIRTUAL_ENV=/opt/venv/invokeai
+# Install `uv` for package management
+COPY --from=ghcr.io/astral-sh/uv:0.5.5 /uv /uvx /bin/

+ENV VIRTUAL_ENV=/opt/venv
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+ENV INVOKEAI_SRC=/opt/invokeai
+ENV PYTHON_VERSION=3.11
+ENV UV_COMPILE_BYTECODE=1
+ENV UV_LINK_MODE=copy
+
 ARG GPU_DRIVER=cuda
 ARG TARGETPLATFORM="linux/amd64"
 # unused but available
 ARG BUILDPLATFORM

-WORKDIR ${INVOKEAI_SRC}
+# Switch to the `ubuntu` user to work around dependency issues with uv-installed python
+RUN mkdir -p ${VIRTUAL_ENV} && \
+    mkdir -p ${INVOKEAI_SRC} && \
+    chmod -R a+w /opt
+USER ubuntu

+# Install python and create the venv
+RUN uv python install ${PYTHON_VERSION} && \
+    uv venv --relocatable --prompt "invoke" --python ${PYTHON_VERSION} ${VIRTUAL_ENV}
+
+WORKDIR ${INVOKEAI_SRC}
 COPY invokeai ./invokeai
 COPY pyproject.toml ./

@@ -32,25 +45,18 @@ COPY pyproject.toml ./
 # the local working copy can be bind-mounted into the image
 # at path defined by ${INVOKEAI_SRC}
 # NOTE: there are no pytorch builds for arm64 + cuda, only cpu
-# x86_64/CUDA is default
-RUN --mount=type=cache,target=/root/.cache/pip \
-    python3 -m venv ${VIRTUAL_ENV} &&\
+# x86_64/CUDA is the default
+RUN --mount=type=cache,target=/home/ubuntu/.cache/uv,uid=1000,gid=1000 \
    if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then \
        extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/cpu"; \
    elif [ "$GPU_DRIVER" = "rocm" ]; then \
        extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/rocm6.1"; \
    else \
        extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/cu124"; \
-    fi &&\
+    fi && \
+    uv pip install --python ${PYTHON_VERSION} $extra_index_url_arg -e "."

-    # xformers + triton fails to install on arm64
-    if [ "$GPU_DRIVER" = "cuda" ] && [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
-        pip install $extra_index_url_arg -e ".[xformers]"; \
-    else \
-        pip install $extra_index_url_arg -e "."; \
-    fi
-
-# #### Build the Web UI ------------------------------------
+#### Build the Web UI ------------------------------------

 FROM node:20-slim AS web-builder
 ENV PNPM_HOME="/pnpm"
@@ -66,7 +72,7 @@ RUN npx vite build

 #### Runtime stage ---------------------------------------

-FROM library/ubuntu:23.04 AS runtime
+FROM library/ubuntu:24.04 AS runtime

 ARG DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
@@ -83,17 +89,16 @@ RUN apt update && apt install -y --no-install-recommends \
        gosu \
        magic-wormhole \
        libglib2.0-0 \
-        libgl1-mesa-glx \
-        python3-venv \
-        python3-pip \
+        libgl1 \
+        libglx-mesa0 \
        build-essential \
        libopencv-dev \
        libstdc++-10-dev &&\
    apt-get clean && apt-get autoclean

-
 ENV INVOKEAI_SRC=/opt/invokeai
-ENV VIRTUAL_ENV=/opt/venv/invokeai
+ENV VIRTUAL_ENV=/opt/venv
+ENV PYTHON_VERSION=3.11
 ENV INVOKEAI_ROOT=/invokeai
 ENV INVOKEAI_HOST=0.0.0.0
 ENV INVOKEAI_PORT=9090
@@ -101,6 +106,14 @@ ENV PATH="$VIRTUAL_ENV/bin:$INVOKEAI_SRC:$PATH"
 ENV CONTAINER_UID=${CONTAINER_UID:-1000}
 ENV CONTAINER_GID=${CONTAINER_GID:-1000}

+# Install `uv` for package management
+# and install python for the ubuntu user (expected to exist on ubuntu >=24.x)
+# this is too tiny to optimize with multi-stage builds, but maybe we'll come back to it
+COPY --from=ghcr.io/astral-sh/uv:0.5.5 /uv /uvx /bin/
+USER ubuntu
+RUN uv python install ${PYTHON_VERSION}
+USER root
+
 # --link requires buldkit w/ dockerfile syntax 1.4
 COPY --link --from=builder ${INVOKEAI_SRC} ${INVOKEAI_SRC}
 COPY --link --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
@@ -115,7 +128,7 @@ WORKDIR ${INVOKEAI_SRC}

 # build patchmatch
 RUN cd /usr/lib/$(uname -p)-linux-gnu/pkgconfig/ && ln -sf opencv4.pc opencv.pc
-RUN python3 -c "from patchmatch import patch_match"
+RUN python -c "from patchmatch import patch_match"

 RUN mkdir -p ${INVOKEAI_ROOT} && chown -R ${CONTAINER_UID}:${CONTAINER_GID} ${INVOKEAI_ROOT}

--- a/docker/docker-entrypoint.sh
+++ b/docker/docker-entrypoint.sh
@@ -16,6 +16,9 @@ set -e -o pipefail

 USER_ID=${CONTAINER_UID:-1000}
 USER=ubuntu
+# if the user does not exist, create it. It is expected to be present on ubuntu >=24.x
+_=$(id ${USER} 2>&1) || useradd -u ${USER_ID} ${USER}
+# ensure the UID is correct
 usermod -u ${USER_ID} ${USER} 1>/dev/null

 ### Set the $PUBLIC_KEY env var to enable SSH access.
@@ -36,6 +39,8 @@ fi
 mkdir -p "${INVOKEAI_ROOT}"
 chown --recursive ${USER} "${INVOKEAI_ROOT}" || true
 cd "${INVOKEAI_ROOT}"
+export HF_HOME=${HF_HOME:-$INVOKEAI_ROOT/.cache/huggingface}
+export MPLCONFIGDIR=${MPLCONFIGDIR:-$INVOKEAI_ROOT/.matplotlib}

 # Run the CMD as the Container User (not root).
 exec gosu ${USER} "$@"
--- a/docs/contributing/ARCHITECTURE.md
+++ b/docs/contributing/ARCHITECTURE.md
@@ -50,7 +50,7 @@ Applications are built on top of the invoke framework. They should construct `in

 ### Web UI

-The Web UI is built on top of an HTTP API built with [FastAPI](https://fastapi.tiangolo.com/) and [Socket.IO](https://socket.io/). The frontend code is found in `/frontend` and the backend code is found in `/ldm/invoke/app/api_app.py` and `/ldm/invoke/app/api/`. The code is further organized as such:
+The Web UI is built on top of an HTTP API built with [FastAPI](https://fastapi.tiangolo.com/) and [Socket.IO](https://socket.io/). The frontend code is found in `/invokeai/frontend` and the backend code is found in `/invokeai/app/api_app.py` and `/invokeai/app/api/`. The code is further organized as such:

 | Component | Description |
 | --- | --- |
@@ -62,7 +62,7 @@ The Web UI is built on top of an HTTP API built with [FastAPI](https://fastapi.t

 ### CLI

-The CLI is built automatically from invocation metadata, and also supports invocation piping and auto-linking. Code is available in `/ldm/invoke/app/cli_app.py`.
+The CLI is built automatically from invocation metadata, and also supports invocation piping and auto-linking. Code is available in `/invokeai/frontend/cli`.

 ## Invoke

@@ -70,7 +70,7 @@ The Invoke framework provides the interface to the underlying AI systems and is

 ### Invoker

-The invoker (`/ldm/invoke/app/services/invoker.py`) is the primary interface through which applications interact with the framework. Its primary purpose is to create, manage, and invoke sessions. It also maintains two sets of services:
+The invoker (`/invokeai/app/services/invoker.py`) is the primary interface through which applications interact with the framework. Its primary purpose is to create, manage, and invoke sessions. It also maintains two sets of services:
 - **invocation services**, which are used by invocations to interact with core functionality.
 - **invoker services**, which are used by the invoker to manage sessions and manage the invocation queue.

@@ -82,12 +82,12 @@ The session graph does not support looping. This is left as an application probl

 ### Invocations

-Invocations represent individual units of execution, with inputs and outputs. All invocations are located in `/ldm/invoke/app/invocations`, and are all automatically discovered and made available in the applications. These are the primary way to expose new functionality in Invoke.AI, and the [implementation guide](INVOCATIONS.md) explains how to add new invocations.
+Invocations represent individual units of execution, with inputs and outputs. All invocations are located in `/invokeai/app/invocations`, and are all automatically discovered and made available in the applications. These are the primary way to expose new functionality in Invoke.AI, and the [implementation guide](INVOCATIONS.md) explains how to add new invocations.

 ### Services

-Services provide invocations access AI Core functionality and other necessary functionality (e.g. image storage). These are available in `/ldm/invoke/app/services`. As a general rule, new services should provide an interface as an abstract base class, and may provide a lightweight local implementation by default in their module. The goal for all services should be to enable the usage of different implementations (e.g. using cloud storage for image storage), but should not load any module dependencies unless that implementation has been used (i.e. don't import anything that won't be used, especially if it's expensive to import).
+Services provide invocations access AI Core functionality and other necessary functionality (e.g. image storage). These are available in `/invokeai/app/services`. As a general rule, new services should provide an interface as an abstract base class, and may provide a lightweight local implementation by default in their module. The goal for all services should be to enable the usage of different implementations (e.g. using cloud storage for image storage), but should not load any module dependencies unless that implementation has been used (i.e. don't import anything that won't be used, especially if it's expensive to import).

 ## AI Core

-The AI Core is represented by the rest of the code base (i.e. the code outside of `/ldm/invoke/app/`).
+The AI Core is represented by the rest of the code base (i.e. the code outside of `/invokeai/app/`).
--- a/docs/contributing/INVOCATIONS.md
+++ b/docs/contributing/INVOCATIONS.md
@@ -287,8 +287,8 @@ new Invocation ready to be used.

 Once you've created a Node, the next step is to share it with the community! The
 best way to do this is to submit a Pull Request to add the Node to the
-[Community Nodes](nodes/communityNodes) list. If you're not sure how to do that,
-take a look a at our [contributing nodes overview](contributingNodes).
+[Community Nodes](../nodes/communityNodes.md) list. If you're not sure how to do that,
+take a look a at our [contributing nodes overview](../nodes/contributingNodes.md).

 ## Advanced

--- a/docs/contributing/MODEL_MANAGER.md
+++ b/docs/contributing/MODEL_MANAGER.md
@@ -9,20 +9,20 @@ model. These are the:
  configuration information. Among other things, the record service
  tracks the type of the model, its provenance, and where it can be
  found on disk.
-  
+
 * _ModelInstallServiceBase_ A service for installing models to
  disk. It uses `DownloadQueueServiceBase` to download models and
  their metadata, and `ModelRecordServiceBase` to store that
  information. It is also responsible for managing the InvokeAI
  `models` directory and its contents.
-  
+
 * _DownloadQueueServiceBase_
  A multithreaded downloader responsible
  for downloading models from a remote source to disk. The download
  queue has special methods for downloading repo_id folders from
  Hugging Face, as well as discriminating among model versions in
  Civitai, but can be used for arbitrary content.
-  
+
  * _ModelLoadServiceBase_
  Responsible for loading a model from disk
  into RAM and VRAM and getting it ready for inference.
@@ -207,9 +207,9 @@ for use in the InvokeAI web server. Its signature is:

 ```
 def open(
-       cls, 
-    config: InvokeAIAppConfig, 
-    conn: Optional[sqlite3.Connection] = None, 
+       cls,
+    config: InvokeAIAppConfig,
+    conn: Optional[sqlite3.Connection] = None,
    lock: Optional[threading.Lock] = None
    ) -> Union[ModelRecordServiceSQL, ModelRecordServiceFile]:
 ```
@@ -363,7 +363,7 @@ functionality:

 * Registering a model config record for a model already located on the
  local filesystem, without moving it or changing its path.
-  
+
 * Installing a model alreadiy located on the local filesystem, by
  moving it into the InvokeAI root directory under the
  `models` folder (or wherever config parameter `models_dir`
@@ -371,21 +371,21 @@ functionality:

 * Probing of models to determine their type, base type and other key
  information.
-  
+
 * Interface with the InvokeAI event bus to provide status updates on
  the download, installation and registration process.
-  
+
 * Downloading a model from an arbitrary URL and installing it in
  `models_dir`.

 * Special handling for HuggingFace repo_ids to recursively download
  the contents of the repository, paying attention to alternative
  variants such as fp16.
-  
+
 * Saving tags and other metadata about the model into the invokeai database
  when fetching from a repo that provides that type of information,
  (currently only HuggingFace).
-  
+
 ### Initializing the installer

 A default installer is created at InvokeAI api startup time and stored
@@ -461,7 +461,7 @@ revision.
 `config` is an optional dict of values that will override the
 autoprobed values for model type, base, scheduler prediction type, and
 so forth. See [Model configuration and
-probing](#Model-configuration-and-probing) for details.
+probing](#model-configuration-and-probing) for details.

 `access_token` is an optional access token for accessing resources
 that need authentication.
@@ -494,7 +494,7 @@ source8 = URLModelSource(url='https://civitai.com/api/download/models/63006', ac

 for source in [source1, source2, source3, source4, source5, source6, source7]:
   install_job = installer.install_model(source)
-   
+
 source2job = installer.wait_for_installs(timeout=120)
 for source in sources:
    job = source2job[source]
@@ -504,7 +504,7 @@ for source in sources:
  print(f"{source} installed as {model_key}")
 elif job.errored:
     print(f"{source}: {job.error_type}.\nStack trace:\n{job.error}")
- 
+
 ```

 As shown here, the `import_model()` method accepts a variety of
@@ -1364,6 +1364,7 @@ the in-memory loaded model:
 |----------------|-----------------|------------------|
 | `config`       | AnyModelConfig         | A copy of the model's configuration record for retrieving base type, etc. |
 | `model`        | AnyModel               | The instantiated model (details below) |
+| `locker`       | ModelLockerBase        | A context manager that mediates the movement of the model into VRAM |

 ### get_model_by_key(key, [submodel]) -> LoadedModel

--- a/docs/contributing/TESTS.md
+++ b/docs/contributing/TESTS.md
@@ -1,6 +1,6 @@
 # InvokeAI Backend Tests

-We use `pytest` to run the backend python tests. (See [pyproject.toml](/pyproject.toml) for the default `pytest` options.)
+We use `pytest` to run the backend python tests. (See [pyproject.toml](https://github.com/invoke-ai/InvokeAI/blob/main/pyproject.toml) for the default `pytest` options.)

 ## Fast vs. Slow
 All tests are categorized as either 'fast' (no test annotation) or 'slow' (annotated with the `@pytest.mark.slow` decorator).
@@ -33,7 +33,7 @@ pytest tests -m ""

 ## Test Organization

-All backend tests are in the [`tests/`](/tests/) directory. This directory mirrors the organization of the `invokeai/` directory. For example, tests for `invokeai/model_management/model_manager.py` would be found in `tests/model_management/test_model_manager.py`.
+All backend tests are in the [`tests/`](https://github.com/invoke-ai/InvokeAI/tree/main/tests) directory. This directory mirrors the organization of the `invokeai/` directory. For example, tests for `invokeai/model_management/model_manager.py` would be found in `tests/model_management/test_model_manager.py`.

 TODO: The above statement is aspirational. A re-organization of legacy tests is required to make it true.

--- a/docs/contributing/contribution_guides/development.md
+++ b/docs/contributing/contribution_guides/development.md
@@ -2,7 +2,7 @@

 ## **What do I need to know to help?**

-If you are looking to help with a code contribution, InvokeAI uses several different technologies under the hood: Python (Pydantic, FastAPI, diffusers) and Typescript (React, Redux Toolkit, ChakraUI, Mantine, Konva). Familiarity with StableDiffusion and image generation concepts is helpful, but not essential. 
+If you are looking to help with a code contribution, InvokeAI uses several different technologies under the hood: Python (Pydantic, FastAPI, diffusers) and Typescript (React, Redux Toolkit, ChakraUI, Mantine, Konva). Familiarity with StableDiffusion and image generation concepts is helpful, but not essential.


 ## **Get Started**
@@ -12,7 +12,7 @@ To get started, take a look at our [new contributors checklist](newContributorCh
 Once you're setup, for more information, you can review the documentation specific to your area of interest:

 * #### [InvokeAI Architecure](../ARCHITECTURE.md)
-* #### [Frontend Documentation](https://github.com/invoke-ai/InvokeAI/tree/main/invokeai/frontend/web)
+* #### [Frontend Documentation](../frontend/index.md)
 * #### [Node Documentation](../INVOCATIONS.md)
 * #### [Local Development](../LOCAL_DEVELOPMENT.md)

@@ -20,15 +20,15 @@ Once you're setup, for more information, you can review the documentation specif

 If you don't feel ready to make a code contribution yet, no problem! You can also help out in other ways, such as [documentation](documentation.md), [translation](translation.md) or helping support other users and triage issues as they're reported in GitHub.

-There are two paths to making a development contribution: 
+There are two paths to making a development contribution:

 1. Choosing an open issue to address. Open issues can be found in the [Issues](https://github.com/invoke-ai/InvokeAI/issues?q=is%3Aissue+is%3Aopen) section of the InvokeAI repository. These are tagged by the issue type (bug, enhancement, etc.) along with the “good first issues” tag denoting if they are suitable for first time contributors.
-    1. Additional items can be found on our [roadmap](https://github.com/orgs/invoke-ai/projects/7). The roadmap is organized in terms of priority, and contains features of varying size and complexity. If there is an inflight item you’d like to help with, reach out to the contributor assigned to the item to see how you can help. 
+    1. Additional items can be found on our [roadmap](https://github.com/orgs/invoke-ai/projects/7). The roadmap is organized in terms of priority, and contains features of varying size and complexity. If there is an inflight item you’d like to help with, reach out to the contributor assigned to the item to see how you can help.
 2. Opening a new issue or feature to add. **Please make sure you have searched through existing issues before creating new ones.**

 *Regardless of what you choose, please post in the  [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) channel of the Discord before you start development in order to confirm that the issue or feature is aligned with the current direction of the project. We value our contributors time and effort and want to ensure that no one’s time is being misspent.*

-## Best Practices: 
+## Best Practices:
 * Keep your pull requests small. Smaller pull requests are more likely to be accepted and merged
 * Comments! Commenting your code helps reviewers easily understand your contribution
 * Use Python and Typescript’s typing systems, and consider using an editor with [LSP](https://microsoft.github.io/language-server-protocol/) support to streamline development
@@ -38,7 +38,7 @@ There are two paths to making a development contribution:

 If you need help, you can ask questions in the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) channel of the Discord.

-For frontend related work, **@psychedelicious** is the best person to reach out to. 
+For frontend related work, **@psychedelicious** is the best person to reach out to.

 For backend related work, please reach out to **@blessedcoolant**, **@lstein**, **@StAlKeR7779** or **@psychedelicious**.

--- a/docs/contributing/contribution_guides/newContributorChecklist.md
+++ b/docs/contributing/contribution_guides/newContributorChecklist.md
@@ -22,15 +22,15 @@ Before starting these steps, ensure you have your local environment [configured
 2. Fork the [InvokeAI](https://github.com/invoke-ai/InvokeAI) repository to your GitHub profile. This means that you will have a copy of the repository under **your-GitHub-username/InvokeAI**.
 3. Clone the repository to your local machine using:

-   ```bash
-   git clone https://github.com/your-GitHub-username/InvokeAI.git
-   ```
+    ```bash
+    git clone https://github.com/your-GitHub-username/InvokeAI.git
+    ```

 If you're unfamiliar with using Git through the commandline, [GitHub Desktop](https://desktop.github.com) is a easy-to-use alternative with a UI. You can do all the same steps listed here, but through the interface. 4. Create a new branch for your fix using:

-    ```bash
-    git checkout -b branch-name-here
-    ```
+  ```bash
+  git checkout -b branch-name-here
+  ```

 5. Make the appropriate changes for the issue you are trying to address or the feature that you want to add.
 6. Add the file contents of the changed files to the "snapshot" git uses to manage the state of the project, also known as the index:
--- a/docs/contributing/dev-environment.md
+++ b/docs/contributing/dev-environment.md
@@ -27,9 +27,9 @@ If you just want to use Invoke, you should use the [installer][installer link].

 5. Activate the venv (you'll need to do this every time you want to run the app):

-        ```sh
-        source .venv/bin/activate
-        ```
+      ```sh
+      source .venv/bin/activate
+      ```

 6. Install the repo as an [editable install][editable install link]:

@@ -37,7 +37,7 @@ If you just want to use Invoke, you should use the [installer][installer link].
      pip install -e ".[dev,test,xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu121
      ```

-      Refer to the [manual installation][manual install link]] instructions for more determining the correct install options. `xformers` is optional, but `dev` and `test` are not.
+      Refer to the [manual installation][manual install link] instructions for more determining the correct install options. `xformers` is optional, but `dev` and `test` are not.

 7. Install the frontend dev toolchain:

--- a/docs/contributing/index.md
+++ b/docs/contributing/index.md
@@ -34,11 +34,11 @@ Please reach out to @hipsterusername on [Discord](https://discord.gg/ZmtBAhwWhy)

 ## Contributors

-This project is a combined effort of dedicated people from across the world. [Check out the list of all these amazing people](https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/). We thank them for their time, hard work and effort.
+This project is a combined effort of dedicated people from across the world. [Check out the list of all these amazing people](contributors.md). We thank them for their time, hard work and effort.

 ## Code of Conduct

-The InvokeAI community is a welcoming place, and we want your help in maintaining that. Please review our [Code of Conduct](https://github.com/invoke-ai/InvokeAI/blob/main/docs/CODE_OF_CONDUCT.md) to learn more - it's essential to maintaining a respectful and inclusive environment.
+The InvokeAI community is a welcoming place, and we want your help in maintaining that. Please review our [Code of Conduct](../CODE_OF_CONDUCT.md) to learn more - it's essential to maintaining a respectful and inclusive environment.

 By making a contribution to this project, you certify that:

--- a/invokeai/app/api/routers/model_manager.py
+++ b/invokeai/app/api/routers/model_manager.py
@@ -37,7 +37,7 @@ from invokeai.backend.model_manager.config import (
    ModelFormat,
    ModelType,
 )
-from invokeai.backend.model_manager.load.model_cache.cache_stats import CacheStats
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import CacheStats
 from invokeai.backend.model_manager.metadata.fetch.huggingface import HuggingFaceMetadataFetch
 from invokeai.backend.model_manager.metadata.metadata_base import ModelMetadataWithFiles, UnknownMetadataException
 from invokeai.backend.model_manager.search import ModelSearch
--- a/invokeai/app/invocations/init.py
+++ b/invokeai/app/invocations/init.py
@@ -15,6 +15,11 @@ custom_nodes_readme_path = str(custom_nodes_path / "README.md")
 shutil.copy(Path(__file__).parent / "custom_nodes/init.py", custom_nodes_init_path)
 shutil.copy(Path(__file__).parent / "custom_nodes/README.md", custom_nodes_readme_path)

+# set the same permissions as the destination directory, in case our source is read-only,
+# so that the files are user-writable
+for p in custom_nodes_path.glob("**/*"):
+    p.chmod(custom_nodes_path.stat().st_mode)
+
 # Import custom nodes, see https://docs.python.org/3/library/importlib.html#importing-programmatically
 spec = spec_from_file_location("custom_nodes", custom_nodes_init_path)
 if spec is None or spec.loader is None:
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -82,10 +82,11 @@ class CompelInvocation(BaseInvocation):
            # apply all patches while the model is on the target device
            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
-            LoRAPatcher.apply_lora_patches(
+            LoRAPatcher.apply_smart_lora_patches(
                model=text_encoder,
                patches=_lora_loader(),
                prefix="lora_te_",
+                dtype=TorchDevice.choose_torch_dtype(),
                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
@@ -179,10 +180,11 @@ class SDXLPromptInvocationBase:
            # apply all patches while the model is on the target device
            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
-            LoRAPatcher.apply_lora_patches(
+            LoRAPatcher.apply_smart_lora_patches(
                text_encoder,
                patches=_lora_loader(),
                prefix=lora_prefix,
+                dtype=TorchDevice.choose_torch_dtype(),
                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@@ -1003,10 +1003,11 @@ class DenoiseLatentsInvocation(BaseInvocation):
            ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
            SeamlessExt.static_patch_model(unet, self.unet.seamless_axes),  # FIXME
            # Apply the LoRA after unet has been moved to its target device for faster patching.
-            LoRAPatcher.apply_lora_patches(
+            LoRAPatcher.apply_smart_lora_patches(
                model=unet,
                patches=_lora_loader(),
                prefix="lora_unet_",
+                dtype=unet.dtype,
                cached_weights=cached_weights,
            ),
        ):
--- a/invokeai/app/invocations/fields.py
+++ b/invokeai/app/invocations/fields.py
@@ -56,6 +56,7 @@ class UIType(str, Enum, metaclass=MetaEnum):
    CLIPLEmbedModel = "CLIPLEmbedModelField"
    CLIPGEmbedModel = "CLIPGEmbedModelField"
    SpandrelImageToImageModel = "SpandrelImageToImageModelField"
+    StructuralLoRAModel = "StructuralLoRAModelField"
    # endregion

    # region Misc Field Types
@@ -143,6 +144,7 @@ class FieldDescriptions:
    controlnet_model = "ControlNet model to load"
    vae_model = "VAE model to load"
    lora_model = "LoRA model to load"
+    structural_lora_model = "Structural LoRA model to load"
    main_model = "Main model (UNet, VAE, CLIP) to load"
    flux_model = "Flux model (Transformer) to load"
    sd3_model = "SD3 model (MMDiTX) to load"
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -1,5 +1,5 @@
 from contextlib import ExitStack
-from typing import Callable, Iterator, Optional, Tuple
+from typing import Callable, Iterator, Optional, Tuple, Union

 import numpy as np
 import numpy.typing as npt
@@ -8,6 +8,8 @@ import torchvision.transforms as tv_transforms
 from torchvision.transforms.functional import resize as tv_resize
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection

+from invokeai.backend.flux.modules.autoencoder import AutoEncoder
+
 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
 from invokeai.app.invocations.fields import (
    DenoiseMaskField,
@@ -22,7 +24,7 @@ from invokeai.app.invocations.fields import (
 )
 from invokeai.app.invocations.flux_controlnet import FluxControlNetField
 from invokeai.app.invocations.ip_adapter import IPAdapterField
-from invokeai.app.invocations.model import TransformerField, VAEField
+from invokeai.app.invocations.model import TransformerField, VAEField, StructuralLoRAField, LoRAField
 from invokeai.app.invocations.primitives import LatentsOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.flux.controlnet.instantx_controlnet_flux import InstantXControlNetFlux
@@ -43,6 +45,8 @@ from invokeai.backend.flux.sampling_utils import (
    pack,
    unpack,
 )
+from invokeai.backend.flux.flux_tools_sampling_utils import prepare_control
+from invokeai.backend.flux.modules.conditioner import HFEncoder
 from invokeai.backend.flux.text_conditioning import FluxTextConditioning
 from invokeai.backend.lora.conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
@@ -284,6 +288,16 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                dtype=inference_dtype,
                device=x.device,
            )
+            img_cond = None
+            if struct_lora := self.transformer.structural_lora:
+                # What should we do when we have multiple of these?
+                if not self.controlnet_vae:
+                    raise ValueError("controlnet_vae must be set when using a strutural lora")
+                ae_info = context.models.load(self.controlnet_vae.vae)
+                img = context.images.get_pil(struct_lora.img.image_name)
+                with ae_info as ae:
+                    assert isinstance(ae, AutoEncoder)
+                    img_cond = prepare_control(self.height, self.width, self.seed, ae, img)

            # Load the transformer model.
            (cached_weights, transformer) = exit_stack.enter_context(transformer_info.model_on_device())
@@ -296,10 +310,11 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
            if config.format in [ModelFormat.Checkpoint]:
                # The model is non-quantized, so we can apply the LoRA weights directly into the model.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_lora_patches(
+                    LoRAPatcher.apply_smart_lora_patches(
                        model=transformer,
                        patches=self._lora_iterator(context),
                        prefix=FLUX_LORA_TRANSFORMER_PREFIX,
+                        dtype=inference_dtype,
                        cached_weights=cached_weights,
                    )
                )
@@ -311,7 +326,7 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                # The model is quantized, so apply the LoRA weights as sidecar layers. This results in slower inference,
                # than directly patching the weights, but is agnostic to the quantization format.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_lora_sidecar_patches(
+                    LoRAPatcher.apply_lora_wrapper_patches(
                        model=transformer,
                        patches=self._lora_iterator(context),
                        prefix=FLUX_LORA_TRANSFORMER_PREFIX,
@@ -345,6 +360,7 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                controlnet_extensions=controlnet_extensions,
                pos_ip_adapter_extensions=pos_ip_adapter_extensions,
                neg_ip_adapter_extensions=neg_ip_adapter_extensions,
+                img_cond=img_cond
            )

        x = unpack(x.float(), self.height, self.width)
@@ -682,7 +698,10 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
        return pos_ip_adapter_extensions, neg_ip_adapter_extensions

    def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[LoRAModelRaw, float]]:
-        for lora in self.transformer.loras:
+        loras: list[Union[LoRAField, StructuralLoRAField]] = [*self.transformer.loras]
+        if self.transformer.structural_lora:
+            loras.append(self.transformer.structural_lora)
+        for lora in loras:
            lora_info = context.models.load(lora.lora)
            assert isinstance(lora_info.model, LoRAModelRaw)
            yield (lora_info.model, lora.weight)
--- a/invokeai/app/invocations/flux_model_loader.py
+++ b/invokeai/app/invocations/flux_model_loader.py
@@ -81,8 +81,8 @@ class FluxModelLoaderInvocation(BaseInvocation):
        assert isinstance(transformer_config, CheckpointConfigBase)

        return FluxModelLoaderOutput(
-            transformer=TransformerField(transformer=transformer, loras=[]),
-            clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0),
+            transformer=TransformerField(transformer=transformer, loras=[], structural_loras=[]),
+            clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], structural_loras=[], skipped_layers=0),
            t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder),
            vae=VAEField(vae=vae),
            max_seq_len=max_seq_lengths[transformer_config.config_path],
--- a/invokeai/app/invocations/flux_structural_lora_loader.py
+++ b/invokeai/app/invocations/flux_structural_lora_loader.py
@@ -0,0 +1,70 @@
+from typing import Optional, Literal
+
+from invokeai.app.invocations.baseinvocation import (
+    BaseInvocation,
+    BaseInvocationOutput,
+    Classification,
+    invocation,
+    invocation_output,
+)
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType, ImageField
+from invokeai.app.invocations.model import VAEField, StructuralLoRAField, ModelIdentifierField, TransformerField
+from invokeai.app.services.shared.invocation_context import InvocationContext
+
+
+@invocation_output("flux_structural_lora_loader_output")
+class FluxStructuralLoRALoaderOutput(BaseInvocationOutput):
+    """Flux Structural LoRA Loader Output"""
+
+    transformer: Optional[TransformerField] = OutputField(
+        default=None, description=FieldDescriptions.transformer, title="FLUX Transformer"
+    )
+
+
+@invocation(
+    "flux_structural_lora_loader",
+    title="Flux Structural LoRA",
+    tags=["lora", "model", "flux"],
+    category="model",
+    version="1.1.0",
+    classification=Classification.Prototype,
+)
+class FluxStructuralLoRALoaderInvocation(BaseInvocation):
+    """Apply a LoRA model to a FLUX transformer and/or text encoder."""
+
+    lora: ModelIdentifierField = InputField(
+        description=FieldDescriptions.structural_lora_model, title="Structural LoRA", ui_type=UIType.StructuralLoRAModel
+    )
+    transformer: TransformerField | None = InputField(
+        default=None,
+        description=FieldDescriptions.transformer,
+        input=Input.Connection,
+        title="FLUX Transformer",
+    )
+    image: ImageField = InputField(
+        description="The image to encode.",
+    )
+    weight: float = InputField(default=0.75, description=FieldDescriptions.lora_weight)
+
+    def invoke(self, context: InvocationContext) -> FluxStructuralLoRALoaderOutput:
+        lora_key = self.lora.key
+
+        if not context.models.exists(lora_key):
+            raise ValueError(f"Unknown lora: {lora_key}!")
+
+        # Check for existing LoRAs with the same key.
+        if self.transformer and self.transformer.structural_lora and self.transformer.structural_lora.lora.key == lora_key:
+            raise ValueError(f'Structural LoRA "{lora_key}" already applied to transformer.')
+
+        output = FluxStructuralLoRALoaderOutput()
+
+        # Attach LoRA layers to the models.
+        if self.transformer is not None:
+            output.transformer = self.transformer.model_copy(deep=True)
+            output.transformer.structural_lora = StructuralLoRAField(
+                lora=self.lora,
+                img=self.image,
+                weight=self.weight,
+            )
+
+        return output
--- a/invokeai/app/invocations/flux_text_encoder.py
+++ b/invokeai/app/invocations/flux_text_encoder.py
@@ -22,6 +22,7 @@ from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
 from invokeai.backend.lora.lora_patcher import LoRAPatcher
 from invokeai.backend.model_manager.config import ModelFormat
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo
+from invokeai.backend.util.devices import TorchDevice


@invocation(
@@ -111,10 +112,11 @@ class FluxTextEncoderInvocation(BaseInvocation):
            if clip_text_encoder_config.format in [ModelFormat.Diffusers]:
                # The model is non-quantized, so we can apply the LoRA weights directly into the model.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_lora_patches(
+                    LoRAPatcher.apply_smart_lora_patches(
                        model=clip_text_encoder,
                        patches=self._clip_lora_iterator(context),
                        prefix=FLUX_LORA_CLIP_PREFIX,
+                        dtype=TorchDevice.choose_torch_dtype(),
                        cached_weights=cached_weights,
                    )
                )
--- a/invokeai/app/invocations/model.py
+++ b/invokeai/app/invocations/model.py
@@ -1,5 +1,5 @@
 import copy
-from typing import List, Optional
+from typing import List, Optional, Literal

 from pydantic import BaseModel, Field

@@ -10,7 +10,7 @@ from invokeai.app.invocations.baseinvocation import (
    invocation,
    invocation_output,
 )
-from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType, ImageField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.shared.models import FreeUConfig
 from invokeai.backend.model_manager.config import (
@@ -65,11 +65,6 @@ class CLIPField(BaseModel):
    loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")


-class TransformerField(BaseModel):
-    transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel")
-    loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")
-
-
 class T5EncoderField(BaseModel):
    tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel")
    text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
@@ -79,6 +74,13 @@ class VAEField(BaseModel):
    vae: ModelIdentifierField = Field(description="Info to load vae submodel")
    seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless')

+class StructuralLoRAField(LoRAField):
+    img: ImageField = Field(description="Image to use in structural conditioning")
+
+class TransformerField(BaseModel):
+    transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel")
+    loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")
+    structural_lora: Optional[StructuralLoRAField] = Field(description="Structural LoRAs to apply on model loading", default=None)

@invocation_output("unet_output")
 class UNetOutput(BaseInvocationOutput):
--- a/invokeai/app/invocations/sd3_text_encoder.py
+++ b/invokeai/app/invocations/sd3_text_encoder.py
@@ -21,6 +21,7 @@ from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
 from invokeai.backend.lora.lora_patcher import LoRAPatcher
 from invokeai.backend.model_manager.config import ModelFormat
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, SD3ConditioningInfo
+from invokeai.backend.util.devices import TorchDevice

 # The SD3 T5 Max Sequence Length set based on the default in diffusers.
 SD3_T5_MAX_SEQ_LEN = 256
@@ -150,10 +151,11 @@ class Sd3TextEncoderInvocation(BaseInvocation):
            if clip_text_encoder_config.format in [ModelFormat.Diffusers]:
                # The model is non-quantized, so we can apply the LoRA weights directly into the model.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_lora_patches(
+                    LoRAPatcher.apply_smart_lora_patches(
                        model=clip_text_encoder,
                        patches=self._clip_lora_iterator(context, clip_model),
                        prefix=FLUX_LORA_CLIP_PREFIX,
+                        dtype=TorchDevice.choose_torch_dtype(),
                        cached_weights=cached_weights,
                    )
                )
--- a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
+++ b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
@@ -207,7 +207,9 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
        with (
            ExitStack() as exit_stack,
            unet_info as unet,
-            LoRAPatcher.apply_lora_patches(model=unet, patches=_lora_loader(), prefix="lora_unet_"),
+            LoRAPatcher.apply_smart_lora_patches(
+                model=unet, patches=_lora_loader(), prefix="lora_unet_", dtype=unet.dtype
+            ),
        ):
            assert isinstance(unet, UNet2DConditionModel)
            latents = latents.to(device=unet.device, dtype=unet.dtype)
--- a/invokeai/app/services/config/config_default.py
+++ b/invokeai/app/services/config/config_default.py
@@ -4,6 +4,7 @@
 from __future__ import annotations

 import copy
+import filecmp
 import locale
 import os
 import re
@@ -525,9 +526,35 @@ def get_config() -> InvokeAIAppConfig:
    ]
    example_config.write_file(config.config_file_path.with_suffix(".example.yaml"), as_example=True)

-    # Copy all legacy configs - We know `__path__[0]` is correct here
+    # Copy all legacy configs only if needed
+    # We know `__path__[0]` is correct here
    configs_src = Path(model_configs.__path__[0])  # pyright: ignore [reportUnknownMemberType, reportUnknownArgumentType, reportAttributeAccessIssue]
-    shutil.copytree(configs_src, config.legacy_conf_path, dirs_exist_ok=True)
+    dest_path = config.legacy_conf_path
+
+    # Create destination (we don't need to check for existence)
+    dest_path.mkdir(parents=True, exist_ok=True)
+
+    # Compare directories recursively
+    comparison = filecmp.dircmp(configs_src, dest_path)
+    need_copy = any(
+        [
+            comparison.left_only,  # Files exist only in source
+            comparison.diff_files,  # Files that differ
+            comparison.common_funny,  # Files that couldn't be compared
+        ]
+    )
+
+    if need_copy:
+        # Get permissions from destination directory
+        dest_mode = dest_path.stat().st_mode
+
+        # Copy directory tree
+        shutil.copytree(configs_src, dest_path, dirs_exist_ok=True)
+
+        # Set permissions on copied files to match destination directory
+        dest_path.chmod(dest_mode)
+        for p in dest_path.glob("**/*"):
+            p.chmod(dest_mode)

    if config.config_file_path.exists():
        config_from_file = load_and_migrate_config(config.config_file_path)
--- a/invokeai/app/services/invocation_stats/invocation_stats_default.py
+++ b/invokeai/app/services/invocation_stats/invocation_stats_default.py
@@ -20,7 +20,7 @@ from invokeai.app.services.invocation_stats.invocation_stats_common import (
    NodeExecutionStatsSummary,
 )
 from invokeai.app.services.invoker import Invoker
-from invokeai.backend.model_manager.load.model_cache.cache_stats import CacheStats
+from invokeai.backend.model_manager.load.model_cache import CacheStats

 # Size of 1GB in bytes.
 GB = 2**30
--- a/invokeai/app/services/model_load/model_load_base.py
+++ b/invokeai/app/services/model_load/model_load_base.py
@@ -7,7 +7,7 @@ from typing import Callable, Optional

 from invokeai.backend.model_manager import AnyModel, AnyModelConfig, SubModelType
 from invokeai.backend.model_manager.load import LoadedModel, LoadedModelWithoutConfig
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase


 class ModelLoadServiceBase(ABC):
@@ -24,7 +24,7 @@ class ModelLoadServiceBase(ABC):

    @property
    @abstractmethod
-    def ram_cache(self) -> ModelCache:
+    def ram_cache(self) -> ModelCacheBase[AnyModel]:
        """Return the RAM cache used by this loader."""

    @abstractmethod
--- a/invokeai/app/services/model_load/model_load_default.py
+++ b/invokeai/app/services/model_load/model_load_default.py
@@ -18,7 +18,7 @@ from invokeai.backend.model_manager.load import (
    ModelLoaderRegistry,
    ModelLoaderRegistryBase,
 )
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
 from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.logging import InvokeAILogger
@@ -30,7 +30,7 @@ class ModelLoadService(ModelLoadServiceBase):
    def __init__(
        self,
        app_config: InvokeAIAppConfig,
-        ram_cache: ModelCache,
+        ram_cache: ModelCacheBase[AnyModel],
        registry: Optional[Type[ModelLoaderRegistryBase]] = ModelLoaderRegistry,
    ):
        """Initialize the model load service."""
@@ -45,7 +45,7 @@ class ModelLoadService(ModelLoadServiceBase):
        self._invoker = invoker

    @property
-    def ram_cache(self) -> ModelCache:
+    def ram_cache(self) -> ModelCacheBase[AnyModel]:
        """Return the RAM cache used by this loader."""
        return self._ram_cache

@@ -78,8 +78,9 @@ class ModelLoadService(ModelLoadServiceBase):
        self, model_path: Path, loader: Optional[Callable[[Path], AnyModel]] = None
    ) -> LoadedModelWithoutConfig:
        cache_key = str(model_path)
+        ram_cache = self.ram_cache
        try:
-            return LoadedModelWithoutConfig(cache_record=self._ram_cache.get(key=cache_key), cache=self._ram_cache)
+            return LoadedModelWithoutConfig(_locker=ram_cache.get(key=cache_key))
        except IndexError:
            pass

@@ -108,5 +109,5 @@ class ModelLoadService(ModelLoadServiceBase):
        )
        assert loader is not None
        raw_model = loader(model_path)
-        self._ram_cache.put(key=cache_key, model=raw_model)
-        return LoadedModelWithoutConfig(cache_record=self._ram_cache.get(key=cache_key), cache=self._ram_cache)
+        ram_cache.put(key=cache_key, model=raw_model)
+        return LoadedModelWithoutConfig(_locker=ram_cache.get(key=cache_key))
--- a/invokeai/app/services/model_manager/model_manager_default.py
+++ b/invokeai/app/services/model_manager/model_manager_default.py
@@ -16,8 +16,7 @@ from invokeai.app.services.model_load.model_load_base import ModelLoadServiceBas
 from invokeai.app.services.model_load.model_load_default import ModelLoadService
 from invokeai.app.services.model_manager.model_manager_base import ModelManagerServiceBase
 from invokeai.app.services.model_records.model_records_base import ModelRecordServiceBase
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
-from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
+from invokeai.backend.model_manager.load import ModelCache, ModelLoaderRegistry
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.logging import InvokeAILogger

--- a/invokeai/app/services/session_processor/session_processor_default.py
+++ b/invokeai/app/services/session_processor/session_processor_default.py
@@ -378,6 +378,9 @@ class DefaultSessionProcessor(SessionProcessorBase):
        self._poll_now()

    async def _on_queue_item_status_changed(self, event: FastAPIEvent[QueueItemStatusChangedEvent]) -> None:
+        # Make sure the cancel event is for the currently processing queue item
+        if self._queue_item and self._queue_item.item_id != event[1].item_id:
+            return
        if self._queue_item and event[1].status in ["completed", "failed", "canceled"]:
            # When the queue item is canceled via HTTP, the queue item status is set to `"canceled"` and this event is
            # emitted. We need to respond to this event and stop graph execution. This is done by setting the cancel
--- a/invokeai/app/services/shared/sqlite_migrator/migrations/migration_11.py
+++ b/invokeai/app/services/shared/sqlite_migrator/migrations/migration_11.py
@@ -35,7 +35,7 @@ class Migration11Callback:

    def _remove_convert_cache(self) -> None:
        """Rename models/.cache to models/.convert_cache."""
-        self._logger.info("Removing .cache directory. Converted models will now be cached in .convert_cache.")
+        self._logger.info("Removing models/.cache directory. Converted models will now be cached in .convert_cache.")
        legacy_convert_path = self._app_config.root_path / "models" / ".cache"
        shutil.rmtree(legacy_convert_path, ignore_errors=True)

--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -30,6 +30,8 @@ def denoise(
    controlnet_extensions: list[XLabsControlNetExtension | InstantXControlNetExtension],
    pos_ip_adapter_extensions: list[XLabsIPAdapterExtension],
    neg_ip_adapter_extensions: list[XLabsIPAdapterExtension],
+    # extra img tokens
+    img_cond: torch.Tensor | None = None,
 ):
    # step 0 is the initial state
    total_steps = len(timesteps) - 1
@@ -69,9 +71,9 @@ def denoise(
        # controlnet_residuals datastructure is efficient in that it likely contains multiple references to the same
        # tensors. Calculating the sum materializes each tensor into its own instance.
        merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
-
+        pred_img = torch.cat((img, img_cond), dim=-1) if img_cond is not None else img
        pred = model(
-            img=img,
+            img=pred_img,
            img_ids=img_ids,
            txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
            txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
--- a/invokeai/backend/flux/flux_tools_sampling_utils.py
+++ b/invokeai/backend/flux/flux_tools_sampling_utils.py
@@ -0,0 +1,27 @@
+import torch
+import numpy as np
+from PIL import Image
+from einops import rearrange
+
+from invokeai.backend.flux.modules.autoencoder import AutoEncoder
+
+def prepare_control(
+    height: int,
+    width: int,
+    seed: int,
+    ae: AutoEncoder,
+    cond_image: Image.Image,
+) -> torch.Tensor:
+    # load and encode the conditioning image
+    img_cond = cond_image.convert("RGB")
+    img_cond = img_cond.resize((width, height), Image.Resampling.LANCZOS)
+    img_cond = np.array(img_cond)
+    img_cond = torch.from_numpy(img_cond).float()
+    img_cond = rearrange(img_cond, "h w c -> 1 c h w")
+    ae_dtype = next(iter(ae.parameters())).dtype
+    ae_device = next(iter(ae.parameters())).device
+    img_cond = img_cond.to(device=ae_device, dtype=ae_dtype)
+    generator = torch.Generator(device=ae_device).manual_seed(seed)
+    img_cond = ae.encode(img_cond, sample=True, generator=generator)
+    img_cond = rearrange(img_cond, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+    return img_cond
--- a/invokeai/backend/flux/math.py
+++ b/invokeai/backend/flux/math.py
@@ -32,4 +32,4 @@ def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tenso
    xk_ = xk.view(*xk.shape[:-1], -1, 1, 2)
    xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
    xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
-    return xq_out.view(*xq.shape), xk_out.view(*xk.shape)
+    return xq_out.view(*xq.shape).type_as(xq), xk_out.view(*xk.shape).type_as(xk)
--- a/invokeai/backend/flux/model.py
+++ b/invokeai/backend/flux/model.py
@@ -4,6 +4,7 @@ from dataclasses import dataclass

 import torch
 from torch import Tensor, nn
+from typing import Optional

 from invokeai.backend.flux.custom_block_processor import (
    CustomDoubleStreamBlockProcessor,
@@ -35,6 +36,7 @@ class FluxParams:
    theta: int
    qkv_bias: bool
    guidance_embed: bool
+    out_channels: Optional[int] = None


 class Flux(nn.Module):
@@ -47,7 +49,7 @@ class Flux(nn.Module):

        self.params = params
        self.in_channels = params.in_channels
-        self.out_channels = self.in_channels
+        self.out_channels = params.out_channels or self.in_channels
        if params.hidden_size % params.num_heads != 0:
            raise ValueError(f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}")
        pe_dim = params.hidden_size // params.num_heads
--- a/invokeai/backend/flux/modules/image_embedders.py
+++ b/invokeai/backend/flux/modules/image_embedders.py
@@ -0,0 +1,50 @@
+import os
+import cv2
+import numpy as np
+import torch
+
+from einops import rearrange, repeat
+from PIL import Image
+from safetensors.torch import load_file as load_sft
+from torch import nn
+from transformers import AutoModelForDepthEstimation, AutoProcessor, SiglipImageProcessor, SiglipVisionModel
+
+class DepthImageEncoder:
+    depth_model_name = "LiheYoung/depth-anything-large-hf"
+    def __init__(self, device):
+        self.device = device
+        self.depth_model = AutoModelForDepthEstimation.from_pretrained(self.depth_model_name).to(device)
+        self.processor = AutoProcessor.from_pretrained(self.depth_model_name)
+    def __call__(self, img: torch.Tensor) -> torch.Tensor:
+        hw = img.shape[-2:]
+        img = torch.clamp(img, -1.0, 1.0)
+        img_byte = ((img + 1.0) * 127.5).byte()
+        img = self.processor(img_byte, return_tensors="pt")["pixel_values"]
+        depth = self.depth_model(img.to(self.device)).predicted_depth
+        depth = repeat(depth, "b h w -> b 3 h w")
+        depth = torch.nn.functional.interpolate(depth, hw, mode="bicubic", antialias=True)
+        depth = depth / 127.5 - 1.0
+        return depth
+
+class CannyImageEncoder:
+    def __init__(
+        self,
+        device,
+        min_t: int = 50,
+        max_t: int = 200,
+    ):
+        self.device = device
+        self.min_t = min_t
+        self.max_t = max_t
+    def __call__(self, img: torch.Tensor) -> torch.Tensor:
+        assert img.shape[0] == 1, "Only batch size 1 is supported"
+        img = rearrange(img[0], "c h w -> h w c")
+        img = torch.clamp(img, -1.0, 1.0)
+        img_np = ((img + 1.0) * 127.5).numpy().astype(np.uint8)
+        # Apply Canny edge detection
+        canny = cv2.Canny(img_np, self.min_t, self.max_t)
+        # Convert back to torch tensor and reshape
+        canny = torch.from_numpy(canny).float() / 127.5 - 1.0
+        canny = rearrange(canny, "h w -> 1 1 h w")
+        canny = repeat(canny, "b 1 ... -> b 3 ...")
+        return canny.to(self.device)
--- a/invokeai/backend/lora/conversions/flux_control_lora_utils.py
+++ b/invokeai/backend/lora/conversions/flux_control_lora_utils.py
@@ -0,0 +1,65 @@
+import re
+import torch
+
+from typing import Any, Dict
+from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
+from invokeai.backend.lora.layers.utils import any_lora_layer_from_state_dict
+from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
+from invokeai.backend.lora.conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
+from invokeai.backend.lora.layers.lora_layer import LoRALayer
+from invokeai.backend.lora.layers.set_parameter_layer import SetParameterLayer
+
+
+# A regex pattern that matches all of the keys in the Flux Dev/Canny LoRA format.
+# Example keys:
+#   guidance_in.in_layer.lora_B.bias
+#   single_blocks.0.linear1.lora_A.weight
+#   double_blocks.0.img_attn.norm.key_norm.scale
+FLUX_STRUCTURAL_TRANSFORMER_KEY_REGEX = r"(final_layer|vector_in|txt_in|time_in|img_in|guidance_in|\w+_blocks)(\.(\d+))?\.(lora_(A|B)|(in|out)_layer|adaLN_modulation|img_attn|img_mlp|img_mod|txt_attn|txt_mlp|txt_mod|linear|linear1|linear2|modulation|norm)\.?(.*)"
+
+def is_state_dict_likely_flux_control(state_dict: Dict[str, Any]) -> bool:
+    """Checks if the provided state dict is likely in the FLUX Control LoRA format.
+
+    This is intended to be a high-precision detector, but it is not guaranteed to have perfect precision. (A
+    perfect-precision detector would require checking all keys against a whitelist and verifying tensor shapes.)
+    """
+    return all(
+        re.match(FLUX_STRUCTURAL_TRANSFORMER_KEY_REGEX, k) or re.match(FLUX_STRUCTURAL_TRANSFORMER_KEY_REGEX, k)
+        for k in state_dict.keys()
+    )
+
+def lora_model_from_flux_control_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRAModelRaw:
+    # converted_state_dict = _convert_lora_bfl_control(state_dict=state_dict)
+    # Group keys by layer.
+    grouped_state_dict: dict[str, dict[str, torch.Tensor]] = {}
+    for key, value in state_dict.items():
+        key_props = key.split(".")
+        # Got it loading using lora_down and lora_up but it didn't seem to match this lora's structure
+        # Leaving this in since it doesn't hurt anything and may be better
+        layer_prop_size = -2 if any(prop in key for prop in ["lora_B", "lora_A"]) else -1
+        layer_name = ".".join(key_props[:layer_prop_size])
+        param_name = ".".join(key_props[layer_prop_size:])
+        if layer_name not in grouped_state_dict:
+            grouped_state_dict[layer_name] = {}
+        grouped_state_dict[layer_name][param_name] = value
+
+    # Create LoRA layers.
+    layers: dict[str, AnyLoRALayer] = {}
+    for layer_key, layer_state_dict in grouped_state_dict.items():
+        # Convert to a full layer diff
+        prefixed_key = f"{FLUX_LORA_TRANSFORMER_PREFIX}{layer_key}"
+        if all(k in layer_state_dict for k in ["lora_A.weight", "lora_B.bias", "lora_B.weight"]):
+            layers[prefixed_key] = LoRALayer(
+                layer_state_dict["lora_B.weight"],
+                None,
+                layer_state_dict["lora_A.weight"],
+                None,
+                layer_state_dict["lora_B.bias"]
+            )
+        elif "scale" in layer_state_dict:
+            layers[prefixed_key] = SetParameterLayer("scale", layer_state_dict["scale"])
+        else:
+            raise AssertionError(f"{layer_key} not expected")
+    # Create and return the LoRAModelRaw.
+    return LoRAModelRaw(layers=layers)
+
--- a/invokeai/backend/lora/layers/any_lora_layer.py
+++ b/invokeai/backend/lora/layers/any_lora_layer.py
@@ -7,5 +7,6 @@ from invokeai.backend.lora.layers.loha_layer import LoHALayer
 from invokeai.backend.lora.layers.lokr_layer import LoKRLayer
 from invokeai.backend.lora.layers.lora_layer import LoRALayer
 from invokeai.backend.lora.layers.norm_layer import NormLayer
+from invokeai.backend.lora.layers.set_parameter_layer import SetParameterLayer

-AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer, ConcatenatedLoRALayer]
+AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer, ConcatenatedLoRALayer, SetParameterLayer]
--- a/invokeai/backend/lora/layers/reshape_weight_layer.py
+++ b/invokeai/backend/lora/layers/reshape_weight_layer.py
@@ -0,0 +1,34 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
+from invokeai.backend.util.calc_tensor_size import calc_tensor_size
+
+
+class ReshapeWeightLayer(LoRALayerBase):
+    # TODO: Just everything in this class 
+    def __init__(self, weight: Optional[torch.Tensor], bias: Optional[torch.Tensor], scale: Optional[torch.Tensor]):
+        super().__init__(alpha=None, bias=bias)
+        self.weight = torch.nn.Parameter(weight) if weight is not None else None
+        self.bias = torch.nn.Parameter(bias) if bias is not None else None
+        self.manual_scale = scale
+
+    def scale(self):
+        return self.manual_scale.float() if self.manual_scale is not None else super().scale()
+
+    def rank(self) -> int | None:
+        return None
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        return orig_weight
+
+    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+        super().to(device=device, dtype=dtype)
+        if self.weight is not None:
+            self.weight = self.weight.to(device=device, dtype=dtype)
+        if self.manual_scale is not None:
+            self.manual_scale = self.manual_scale.to(device=device, dtype=dtype)
+
+    def calc_size(self) -> int:
+        return super().calc_size() + calc_tensor_size(self.manual_scale)
--- a/invokeai/backend/lora/layers/set_parameter_layer.py
+++ b/invokeai/backend/lora/layers/set_parameter_layer.py
@@ -0,0 +1,29 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
+from invokeai.backend.util.calc_tensor_size import calc_tensor_size
+
+
+class SetParameterLayer(LoRALayerBase):
+    def __init__(self, param_name: str, weight: torch.Tensor):
+        super().__init__(None, None)
+        self.weight = weight
+        self.param_name = param_name
+
+    def rank(self) -> int | None:
+        return None
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        return self.weight - orig_weight
+    
+    def get_parameters(self, orig_module: torch.nn.Module) -> Dict[str, torch.Tensor]:
+        return {self.param_name: self.get_weight(orig_module.get_parameter(self.param_name))}
+
+    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+        super().to(device=device, dtype=dtype)
+        self.weight = self.weight.to(device=device, dtype=dtype)
+
+    def calc_size(self) -> int:
+        return super().calc_size() + calc_tensor_size(self.weight)
--- a/invokeai/backend/lora/layers/utils.py
+++ b/invokeai/backend/lora/layers/utils.py
@@ -9,6 +9,7 @@ from invokeai.backend.lora.layers.loha_layer import LoHALayer
 from invokeai.backend.lora.layers.lokr_layer import LoKRLayer
 from invokeai.backend.lora.layers.lora_layer import LoRALayer
 from invokeai.backend.lora.layers.norm_layer import NormLayer
+from invokeai.backend.lora.layers.set_parameter_layer import SetParameterLayer


 def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> AnyLoRALayer:
--- a/invokeai/backend/lora/lora_layer_wrappers.py
+++ b/invokeai/backend/lora/lora_layer_wrappers.py
@@ -0,0 +1,133 @@
+import torch
+
+from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
+from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
+from invokeai.backend.lora.layers.lora_layer import LoRALayer
+
+
+class LoRASidecarWrapper(torch.nn.Module):
+    def __init__(self, orig_module: torch.nn.Module, lora_layers: list[AnyLoRALayer], lora_weights: list[float]):
+        super().__init__()
+        self._orig_module = orig_module
+        self._lora_layers = lora_layers
+        self._lora_weights = lora_weights
+
+    @property
+    def orig_module(self) -> torch.nn.Module:
+        return self._orig_module
+
+    def add_lora_layer(self, lora_layer: AnyLoRALayer, lora_weight: float):
+        self._lora_layers.append(lora_layer)
+        self._lora_weights.append(lora_weight)
+
+    @torch.no_grad()
+    def _get_lora_patched_parameters(
+        self, orig_params: dict[str, torch.Tensor], lora_layers: list[AnyLoRALayer], lora_weights: list[float]
+    ) -> dict[str, torch.Tensor]:
+        params: dict[str, torch.Tensor] = {}
+        for lora_layer, lora_weight in zip(lora_layers, lora_weights, strict=True):
+            layer_params = lora_layer.get_parameters(self._orig_module)
+            for param_name, param_weight in layer_params.items():
+                if orig_params[param_name].shape != param_weight.shape:
+                    param_weight = param_weight.reshape(orig_params[param_name].shape)
+
+                if param_name not in params:
+                    params[param_name] = param_weight * (lora_layer.scale() * lora_weight)
+                else:
+                    params[param_name] += param_weight * (lora_layer.scale() * lora_weight)
+
+        return params
+
+
+class LoRALinearWrapper(LoRASidecarWrapper):
+    def _lora_linear_forward(self, input: torch.Tensor, lora_layer: LoRALayer, lora_weight: float) -> torch.Tensor:
+        """An optimized implementation of the residual calculation for a Linear LoRALayer."""
+        x = torch.nn.functional.linear(input, lora_layer.down)
+        if lora_layer.mid is not None:
+            x = torch.nn.functional.linear(x, lora_layer.mid)
+        x = torch.nn.functional.linear(x, lora_layer.up, bias=lora_layer.bias)
+        x *= lora_weight * lora_layer.scale()
+        return x
+
+    def _concatenated_lora_forward(
+        self, input: torch.Tensor, concatenated_lora_layer: ConcatenatedLoRALayer, lora_weight: float
+    ) -> torch.Tensor:
+        """An optimized implementation of the residual calculation for a Linear ConcatenatedLoRALayer."""
+        x_chunks: list[torch.Tensor] = []
+        for lora_layer in concatenated_lora_layer.lora_layers:
+            x_chunk = torch.nn.functional.linear(input, lora_layer.down)
+            if lora_layer.mid is not None:
+                x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.mid)
+            x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.up, bias=lora_layer.bias)
+            x_chunk *= lora_weight * lora_layer.scale()
+            x_chunks.append(x_chunk)
+
+        # TODO(ryand): Generalize to support concat_axis != 0.
+        assert concatenated_lora_layer.concat_axis == 0
+        x = torch.cat(x_chunks, dim=-1)
+        return x
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        # Split the LoRA layers into those that have optimized implementations and those that don't.
+        optimized_layer_types = (LoRALayer, ConcatenatedLoRALayer)
+        optimized_layers = [
+            (layer, weight)
+            for layer, weight in zip(self._lora_layers, self._lora_weights, strict=True)
+            if isinstance(layer, optimized_layer_types)
+        ]
+        non_optimized_layers = [
+            (layer, weight)
+            for layer, weight in zip(self._lora_layers, self._lora_weights, strict=True)
+            if not isinstance(layer, optimized_layer_types)
+        ]
+
+        # First, calculate the residual for LoRA layers for which there is an optimized implementation.
+        residual = None
+        for lora_layer, lora_weight in optimized_layers:
+            if isinstance(lora_layer, LoRALayer):
+                added_residual = self._lora_linear_forward(input, lora_layer, lora_weight)
+            elif isinstance(lora_layer, ConcatenatedLoRALayer):
+                added_residual = self._concatenated_lora_forward(input, lora_layer, lora_weight)
+            else:
+                raise ValueError(f"Unsupported LoRA layer type: {type(lora_layer)}")
+
+            if residual is None:
+                residual = added_residual
+            else:
+                residual += added_residual
+
+        # Next, calculate the residuals for the LoRA layers for which there is no optimized implementation.
+        if non_optimized_layers:
+            unoptimized_layers, unoptimized_weights = zip(*non_optimized_layers, strict=True)
+            params = self._get_lora_patched_parameters(
+                orig_params={"weight": self._orig_module.weight, "bias": self._orig_module.bias},
+                lora_layers=unoptimized_layers,
+                lora_weights=unoptimized_weights,
+            )
+            added_residual = torch.nn.functional.linear(input, params["weight"], params.get("bias", None))
+            if residual is None:
+                residual = added_residual
+            else:
+                residual += added_residual
+
+        return self.orig_module(input) + residual
+
+
+class LoRAConv1dWrapper(LoRASidecarWrapper):
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        params = self._get_lora_patched_parameters(
+            orig_params={"weight": self._orig_module.weight, "bias": self._orig_module.bias},
+            lora_layers=self._lora_layers,
+            lora_weights=self._lora_weights,
+        )
+        return self.orig_module(input) + torch.nn.functional.conv1d(input, params["weight"], params.get("bias", None))
+
+
+class LoRAConv2dWrapper(LoRASidecarWrapper):
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        params = self._get_lora_patched_parameters(
+            orig_params={"weight": self._orig_module.weight, "bias": self._orig_module.bias},
+            lora_layers=self._lora_layers,
+            lora_weights=self._lora_weights,
+        )
+        return self.orig_module(input) + torch.nn.functional.conv2d(input, params["weight"], params.get("bias", None))
--- a/invokeai/backend/lora/lora_patcher.py
+++ b/invokeai/backend/lora/lora_patcher.py
@@ -4,19 +4,126 @@ from typing import Dict, Iterable, Optional, Tuple
 import torch

 from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-from invokeai.backend.lora.sidecar_layers.concatenated_lora.concatenated_lora_linear_sidecar_layer import (
-    ConcatenatedLoRALinearSidecarLayer,
+from invokeai.backend.lora.lora_layer_wrappers import (
+    LoRAConv1dWrapper,
+    LoRAConv2dWrapper,
+    LoRALinearWrapper,
+    LoRASidecarWrapper,
 )
-from invokeai.backend.lora.sidecar_layers.lora.lora_linear_sidecar_layer import LoRALinearSidecarLayer
-from invokeai.backend.lora.sidecar_layers.lora_sidecar_module import LoRASidecarModule
+from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.original_weights_storage import OriginalWeightsStorage


 class LoRAPatcher:
+    @staticmethod
+    @torch.no_grad()
+    @contextmanager
+    def apply_smart_lora_patches(
+        model: torch.nn.Module,
+        patches: Iterable[Tuple[LoRAModelRaw, float]],
+        prefix: str,
+        dtype: torch.dtype,
+        cached_weights: Optional[Dict[str, torch.Tensor]] = None,
+    ):
+        """Apply 'smart' LoRA patching that chooses whether to use direct patching or a sidecar wrapper for each module."""
+
+        # original_weights are stored for unpatching layers that are directly patched.
+        original_weights = OriginalWeightsStorage(cached_weights)
+        # original_modules are stored for unpatching layers that are wrapped in a LoRASidecarWrapper.
+        original_modules: dict[str, torch.nn.Module] = {}
+        try:
+            for patch, patch_weight in patches:
+                LoRAPatcher._apply_smart_lora_patch(
+                    model=model,
+                    prefix=prefix,
+                    patch=patch,
+                    patch_weight=patch_weight,
+                    original_weights=original_weights,
+                    original_modules=original_modules,
+                    dtype=dtype,
+                )
+
+            yield
+        finally:
+            # Restore directly patched layers.
+            for param_key, weight in original_weights.get_changed_weights():
+                model.get_parameter(param_key).copy_(weight)
+
+            # Restore LoRASidecarWrapper modules.
+            # Note: This logic assumes no nested modules in original_modules.
+            for module_key, orig_module in original_modules.items():
+                module_parent_key, module_name = LoRAPatcher._split_parent_key(module_key)
+                parent_module = model.get_submodule(module_parent_key)
+                LoRAPatcher._set_submodule(parent_module, module_name, orig_module)
+
+    @staticmethod
+    @torch.no_grad()
+    def _apply_smart_lora_patch(
+        model: torch.nn.Module,
+        prefix: str,
+        patch: LoRAModelRaw,
+        patch_weight: float,
+        original_weights: OriginalWeightsStorage,
+        original_modules: dict[str, torch.nn.Module],
+        dtype: torch.dtype,
+    ):
+        """Apply a single LoRA patch to a model using the 'smart' patching strategy that chooses whether to use direct
+        patching or a sidecar wrapper for each module.
+        """
+        if patch_weight == 0:
+            return
+
+        # If the layer keys contain a dot, then they are not flattened, and can be directly used to access model
+        # submodules. If the layer keys do not contain a dot, then they are flattened, meaning that all '.' have been
+        # replaced with '_'. Non-flattened keys are preferred, because they allow submodules to be accessed directly
+        # without searching, but some legacy code still uses flattened keys.
+        layer_keys_are_flattened = "." not in next(iter(patch.layers.keys()))
+
+        prefix_len = len(prefix)
+
+        for layer_key, layer in patch.layers.items():
+            if not layer_key.startswith(prefix):
+                continue
+
+            module_key, module = LoRAPatcher._get_submodule(
+                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
+            )
+
+            # Decide whether to use direct patching or a sidecar wrapper.
+            # Direct patching is preferred, because it results in better runtime speed.
+            # Reasons to use sidecar patching:
+            # - The module is already wrapped in a LoRASidecarWrapper.
+            # - The module is quantized.
+            # - The module is on the CPU (and we don't want to store a second full copy of the original weights on the
+            #   CPU, since this would double the RAM usage)
+            # NOTE: For now, we don't check if the layer is quantized here. We assume that this is checked in the caller
+            # and that the caller will use the 'apply_lora_wrapper_patches' method if the layer is quantized.
+            # TODO(ryand): Handle the case where we are running without a GPU. Should we set a config flag that allows
+            # forcing full patching even on the CPU?
+            if isinstance(module, LoRASidecarWrapper) or LoRAPatcher._is_any_part_of_layer_on_cpu(module):
+                LoRAPatcher._apply_lora_layer_wrapper_patch(
+                    model=model,
+                    module_to_patch=module,
+                    module_to_patch_key=module_key,
+                    patch=layer,
+                    patch_weight=patch_weight,
+                    original_modules=original_modules,
+                    dtype=dtype,
+                )
+            else:
+                LoRAPatcher._apply_lora_layer_patch(
+                    module_to_patch=module,
+                    module_to_patch_key=module_key,
+                    patch=layer,
+                    patch_weight=patch_weight,
+                    original_weights=original_weights,
+                )
+
+    @staticmethod
+    def _is_any_part_of_layer_on_cpu(layer: torch.nn.Module) -> bool:
+        return any(p.device.type == "cpu" for p in layer.parameters())
+
    @staticmethod
    @torch.no_grad()
    @contextmanager
@@ -40,7 +147,7 @@ class LoRAPatcher:
        original_weights = OriginalWeightsStorage(cached_weights)
        try:
            for patch, patch_weight in patches:
-                LoRAPatcher.apply_lora_patch(
+                LoRAPatcher._apply_lora_patch(
                    model=model,
                    prefix=prefix,
                    patch=patch,
@@ -52,11 +159,12 @@ class LoRAPatcher:
            yield
        finally:
            for param_key, weight in original_weights.get_changed_weights():
-                model.get_parameter(param_key).copy_(weight)
+                cur_param = model.get_parameter(param_key)
+                cur_param.data = weight.to(dtype=cur_param.dtype, device=cur_param.device, copy=True)

    @staticmethod
    @torch.no_grad()
-    def apply_lora_patch(
+    def _apply_lora_patch(
        model: torch.nn.Module,
        prefix: str,
        patch: LoRAModelRaw,
@@ -91,48 +199,84 @@ class LoRAPatcher:
                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
            )

-            # All of the LoRA weight calculations will be done on the same device as the module weight.
-            # (Performance will be best if this is a CUDA device.)
-            device = module.weight.device
-            dtype = module.weight.dtype
+            LoRAPatcher._apply_lora_layer_patch(
+                module_to_patch=module,
+                module_to_patch_key=module_key,
+                patch=layer,
+                patch_weight=patch_weight,
+                original_weights=original_weights,
+            )

-            layer_scale = layer.scale()
+    @staticmethod
+    @torch.no_grad()
+    def _apply_lora_layer_patch(
+        module_to_patch: torch.nn.Module,
+        module_to_patch_key: str,
+        patch: AnyLoRALayer,
+        patch_weight: float,
+        original_weights: OriginalWeightsStorage,
+    ):
+        # All of the LoRA weight calculations will be done on the same device as the module weight.
+        # (Performance will be best if this is a CUDA device.)
+        first_param = next(module_to_patch.parameters())
+        device = first_param.device
+        dtype = first_param.dtype

-            # We intentionally move to the target device first, then cast. Experimentally, this was found to
-            # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
-            # same thing in a single call to '.to(...)'.
-            layer.to(device=device)
-            layer.to(dtype=torch.float32)
+        layer_scale = patch.scale()

-            # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
-            # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
-            for param_name, lora_param_weight in layer.get_parameters(module).items():
-                param_key = module_key + "." + param_name
-                module_param = module.get_parameter(param_name)
+        # We intentionally move to the target device first, then cast. Experimentally, this was found to
+        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
+        # same thing in a single call to '.to(...)'.
+        patch.to(device=device)
+        patch.to(dtype=torch.float32)

-                # Save original weight
-                original_weights.save(param_key, module_param)
+        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
+        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
+        for param_name, lora_param_weight in patch.get_parameters(module_to_patch).items():
+            param_key = module_to_patch_key + "." + param_name
+            module_param = module_to_patch.get_parameter(param_name)

-                if module_param.shape != lora_param_weight.shape:
+            # Save original weight
+            original_weights.save(param_key, module_param)
+
+            if module_param.shape != lora_param_weight.shape:
+                if module_param.nelement() == lora_param_weight.nelement():
                    lora_param_weight = lora_param_weight.reshape(module_param.shape)
+                else:
+                    # This condition was added to handle layers in FLUX control LoRAs.
+                    # TODO(ryand): Move the weight update into the LoRA layer so that the LoRAPatcher doesn't need
+                    # to worry about this?
+                    expanded_weight = torch.zeros_like(
+                        lora_param_weight, dtype=module_param.dtype, device=module_param.device
+                    )
+                    slices = tuple(slice(0, dim) for dim in module_param.shape)
+                    expanded_weight[slices] = module_param
+                    setattr(
+                        module,
+                        param_name,
+                        torch.nn.Parameter(expanded_weight, requires_grad=module_param.requires_grad),
+                    )
+                    module_param = expanded_weight

-                lora_param_weight *= patch_weight * layer_scale
-                module_param += lora_param_weight.to(dtype=dtype)
+            lora_param_weight *= patch_weight * layer_scale
+            module_param += lora_param_weight.to(dtype=dtype)

-            layer.to(device=TorchDevice.CPU_DEVICE)
+        patch.to(device=TorchDevice.CPU_DEVICE)

    @staticmethod
    @torch.no_grad()
    @contextmanager
-    def apply_lora_sidecar_patches(
+    def apply_lora_wrapper_patches(
        model: torch.nn.Module,
        patches: Iterable[Tuple[LoRAModelRaw, float]],
        prefix: str,
        dtype: torch.dtype,
    ):
-        """Apply one or more LoRA sidecar patches to a model within a context manager. Sidecar patches incur some
-        overhead compared to normal LoRA patching, but they allow for LoRA layers to applied to base layers in any
-        quantization format.
+        """Apply one or more LoRA wrapper patches to a model within a context manager. Wrapper patches incur some
+        runtime overhead compared to normal LoRA patching, but they enable:
+        - LoRA layers to be applied to quantized models
+        - LoRA layers to be applied to CPU layers without needing to store a full copy of the original weights (i.e.
+          avoid doubling the memory requirements).

        Args:
            model (torch.nn.Module): The model to patch.
@@ -140,14 +284,11 @@ class LoRAPatcher:
                associated weights. An iterator is used so that the LoRA patches do not need to be loaded into memory
                all at once.
            prefix (str): The keys in the patches will be filtered to only include weights with this prefix.
-            dtype (torch.dtype): The compute dtype of the sidecar layers. This cannot easily be inferred from the model,
-                since the sidecar layers are typically applied on top of quantized layers whose weight dtype is
-                different from their compute dtype.
        """
        original_modules: dict[str, torch.nn.Module] = {}
        try:
            for patch, patch_weight in patches:
-                LoRAPatcher._apply_lora_sidecar_patch(
+                LoRAPatcher._apply_lora_wrapper_patch(
                    model=model,
                    prefix=prefix,
                    patch=patch,
@@ -165,7 +306,7 @@ class LoRAPatcher:
                LoRAPatcher._set_submodule(parent_module, module_name, orig_module)

    @staticmethod
-    def _apply_lora_sidecar_patch(
+    def _apply_lora_wrapper_patch(
        model: torch.nn.Module,
        patch: LoRAModelRaw,
        patch_weight: float,
@@ -173,7 +314,7 @@ class LoRAPatcher:
        original_modules: dict[str, torch.nn.Module],
        dtype: torch.dtype,
    ):
-        """Apply a single LoRA sidecar patch to a model."""
+        """Apply a single LoRA wrapper patch to a model."""

        if patch_weight == 0:
            return
@@ -194,28 +335,47 @@ class LoRAPatcher:
                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
            )

-            # Initialize the LoRA sidecar layer.
-            lora_sidecar_layer = LoRAPatcher._initialize_lora_sidecar_layer(module, layer, patch_weight)
+            LoRAPatcher._apply_lora_layer_wrapper_patch(
+                model=model,
+                module_to_patch=module,
+                module_to_patch_key=module_key,
+                patch=layer,
+                patch_weight=patch_weight,
+                original_modules=original_modules,
+                dtype=dtype,
+            )

-            # Replace the original module with a LoRASidecarModule if it has not already been done.
-            if module_key in original_modules:
-                # The module has already been patched with a LoRASidecarModule. Append to it.
-                assert isinstance(module, LoRASidecarModule)
-                lora_sidecar_module = module
-            else:
-                # The module has not yet been patched with a LoRASidecarModule. Create one.
-                lora_sidecar_module = LoRASidecarModule(module, [])
-                original_modules[module_key] = module
-                module_parent_key, module_name = LoRAPatcher._split_parent_key(module_key)
-                module_parent = model.get_submodule(module_parent_key)
-                LoRAPatcher._set_submodule(module_parent, module_name, lora_sidecar_module)
+    @staticmethod
+    @torch.no_grad()
+    def _apply_lora_layer_wrapper_patch(
+        model: torch.nn.Module,
+        module_to_patch: torch.nn.Module,
+        module_to_patch_key: str,
+        patch: AnyLoRALayer,
+        patch_weight: float,
+        original_modules: dict[str, torch.nn.Module],
+        dtype: torch.dtype,
+    ):
+        """Apply a single LoRA wrapper patch to a model."""

-            # Move the LoRA sidecar layer to the same device/dtype as the orig module.
-            # TODO(ryand): Experiment with moving to the device first, then casting. This could be faster.
-            lora_sidecar_layer.to(device=lora_sidecar_module.orig_module.weight.device, dtype=dtype)
+        # Replace the original module with a LoRASidecarWrapper if it has not already been done.
+        if not isinstance(module_to_patch, LoRASidecarWrapper):
+            lora_wrapper_layer = LoRAPatcher._initialize_lora_wrapper_layer(module_to_patch)
+            original_modules[module_to_patch_key] = module_to_patch
+            module_parent_key, module_name = LoRAPatcher._split_parent_key(module_to_patch_key)
+            module_parent = model.get_submodule(module_parent_key)
+            LoRAPatcher._set_submodule(module_parent, module_name, lora_wrapper_layer)
+            orig_module = module_to_patch
+        else:
+            assert module_to_patch_key in original_modules
+            lora_wrapper_layer = module_to_patch
+            orig_module = module_to_patch.orig_module

-            # Add the LoRA sidecar layer to the LoRASidecarModule.
-            lora_sidecar_module.add_lora_layer(lora_sidecar_layer)
+        # Move the LoRA layer to the same device/dtype as the orig module.
+        patch.to(device=orig_module.weight.device, dtype=dtype)
+
+        # Add the LoRA wrapper layer to the LoRASidecarWrapper.
+        lora_wrapper_layer.add_lora_layer(patch, patch_weight)

    @staticmethod
    def _split_parent_key(module_key: str) -> tuple[str, str]:
@@ -236,17 +396,13 @@ class LoRAPatcher:
            raise ValueError(f"Invalid module key: {module_key}")

    @staticmethod
-    def _initialize_lora_sidecar_layer(orig_layer: torch.nn.Module, lora_layer: AnyLoRALayer, patch_weight: float):
-        # TODO(ryand): Add support for more original layer types and LoRA layer types.
-        if isinstance(orig_layer, torch.nn.Linear) or (
-            isinstance(orig_layer, LoRASidecarModule) and isinstance(orig_layer.orig_module, torch.nn.Linear)
-        ):
-            if isinstance(lora_layer, LoRALayer):
-                return LoRALinearSidecarLayer(lora_layer=lora_layer, weight=patch_weight)
-            elif isinstance(lora_layer, ConcatenatedLoRALayer):
-                return ConcatenatedLoRALinearSidecarLayer(concatenated_lora_layer=lora_layer, weight=patch_weight)
-            else:
-                raise ValueError(f"Unsupported Linear LoRA layer type: {type(lora_layer)}")
+    def _initialize_lora_wrapper_layer(orig_layer: torch.nn.Module):
+        if isinstance(orig_layer, torch.nn.Linear):
+            return LoRALinearWrapper(orig_layer, [], [])
+        elif isinstance(orig_layer, torch.nn.Conv1d):
+            return LoRAConv1dWrapper(orig_layer, [], [])
+        elif isinstance(orig_layer, torch.nn.Conv2d):
+            return LoRAConv2dWrapper(orig_layer, [], [])
        else:
            raise ValueError(f"Unsupported layer type: {type(orig_layer)}")

--- a/invokeai/backend/lora/sidecar_layers/init.py
+++ b/invokeai/backend/lora/sidecar_layers/init.py
--- a/invokeai/backend/lora/sidecar_layers/concatenated_lora/init.py
+++ b/invokeai/backend/lora/sidecar_layers/concatenated_lora/init.py
--- a/invokeai/backend/lora/sidecar_layers/concatenated_lora/concatenated_lora_linear_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/concatenated_lora/concatenated_lora_linear_sidecar_layer.py
@@ -1,34 +0,0 @@
-import torch
-
-from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
-
-
-class ConcatenatedLoRALinearSidecarLayer(torch.nn.Module):
-    def __init__(
-        self,
-        concatenated_lora_layer: ConcatenatedLoRALayer,
-        weight: float,
-    ):
-        super().__init__()
-
-        self._concatenated_lora_layer = concatenated_lora_layer
-        self._weight = weight
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        x_chunks: list[torch.Tensor] = []
-        for lora_layer in self._concatenated_lora_layer.lora_layers:
-            x_chunk = torch.nn.functional.linear(input, lora_layer.down)
-            if lora_layer.mid is not None:
-                x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.mid)
-            x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.up, bias=lora_layer.bias)
-            x_chunk *= self._weight * lora_layer.scale()
-            x_chunks.append(x_chunk)
-
-        # TODO(ryand): Generalize to support concat_axis != 0.
-        assert self._concatenated_lora_layer.concat_axis == 0
-        x = torch.cat(x_chunks, dim=-1)
-        return x
-
-    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
-        self._concatenated_lora_layer.to(device=device, dtype=dtype)
-        return self
--- a/invokeai/backend/lora/sidecar_layers/lora/init.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/init.py
--- a/invokeai/backend/lora/sidecar_layers/lora/lora_linear_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/lora_linear_sidecar_layer.py
@@ -1,27 +0,0 @@
-import torch
-
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-
-
-class LoRALinearSidecarLayer(torch.nn.Module):
-    def __init__(
-        self,
-        lora_layer: LoRALayer,
-        weight: float,
-    ):
-        super().__init__()
-
-        self._lora_layer = lora_layer
-        self._weight = weight
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = torch.nn.functional.linear(x, self._lora_layer.down)
-        if self._lora_layer.mid is not None:
-            x = torch.nn.functional.linear(x, self._lora_layer.mid)
-        x = torch.nn.functional.linear(x, self._lora_layer.up, bias=self._lora_layer.bias)
-        x *= self._weight * self._lora_layer.scale()
-        return x
-
-    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
-        self._lora_layer.to(device=device, dtype=dtype)
-        return self
--- a/invokeai/backend/lora/sidecar_layers/lora_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora_sidecar_layer.py
--- a/invokeai/backend/lora/sidecar_layers/lora_sidecar_module.py
+++ b/invokeai/backend/lora/sidecar_layers/lora_sidecar_module.py
@@ -1,24 +0,0 @@
-import torch
-
-
-class LoRASidecarModule(torch.nn.Module):
-    """A LoRA sidecar module that wraps an original module and adds LoRA layers to it."""
-
-    def __init__(self, orig_module: torch.nn.Module, lora_layers: list[torch.nn.Module]):
-        super().__init__()
-        self.orig_module = orig_module
-        self._lora_layers = lora_layers
-
-    def add_lora_layer(self, lora_layer: torch.nn.Module):
-        self._lora_layers.append(lora_layer)
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        x = self.orig_module(input)
-        for lora_layer in self._lora_layers:
-            x += lora_layer(input)
-        return x
-
-    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
-        self._orig_module.to(device=device, dtype=dtype)
-        for lora_layer in self._lora_layers:
-            lora_layer.to(device=device, dtype=dtype)
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -67,6 +67,7 @@ class ModelType(str, Enum):
    Main = "main"
    VAE = "vae"
    LoRA = "lora"
+    StructuralLoRa = "structural_lora"
    ControlNet = "controlnet"  # used by model_probe
    TextualInversion = "embedding"
    IPAdapter = "ip_adapter"
@@ -273,6 +274,18 @@ class LoRALyCORISConfig(LoRAConfigBase):
        return Tag(f"{ModelType.LoRA.value}.{ModelFormat.LyCORIS.value}")


+class StructuralLoRALyCORISConfig(ModelConfigBase):
+    """Model config for Structural LoRA/Lycoris models."""
+
+    type: Literal[ModelType.StructuralLoRa] = ModelType.StructuralLoRa
+    trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None)
+    format: Literal[ModelFormat.LyCORIS] = ModelFormat.LyCORIS
+
+    @staticmethod
+    def get_tag() -> Tag:
+        return Tag(f"{ModelType.StructuralLoRa.value}.{ModelFormat.LyCORIS.value}")
+
+
 class LoRADiffusersConfig(LoRAConfigBase):
    """Model config for LoRA/Diffusers models."""

@@ -535,6 +548,7 @@ AnyModelConfig = Annotated[
        Annotated[ControlNetDiffusersConfig, ControlNetDiffusersConfig.get_tag()],
        Annotated[ControlNetCheckpointConfig, ControlNetCheckpointConfig.get_tag()],
        Annotated[LoRALyCORISConfig, LoRALyCORISConfig.get_tag()],
+        Annotated[StructuralLoRALyCORISConfig, StructuralLoRALyCORISConfig.get_tag()],
        Annotated[LoRADiffusersConfig, LoRADiffusersConfig.get_tag()],
        Annotated[T5EncoderConfig, T5EncoderConfig.get_tag()],
        Annotated[T5EncoderBnbQuantizedLlmInt8bConfig, T5EncoderBnbQuantizedLlmInt8bConfig.get_tag()],
--- a/invokeai/backend/model_manager/load/init.py
+++ b/invokeai/backend/model_manager/load/init.py
@@ -8,7 +8,7 @@ from pathlib import Path

 from invokeai.backend.model_manager.load.load_base import LoadedModel, LoadedModelWithoutConfig, ModelLoaderBase
 from invokeai.backend.model_manager.load.load_default import ModelLoader
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_default import ModelCache
 from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry, ModelLoaderRegistryBase

 # This registers the subclasses that implement loaders of specific model types
--- a/invokeai/backend/model_manager/load/load_base.py
+++ b/invokeai/backend/model_manager/load/load_base.py
@@ -5,6 +5,7 @@ Base class for model loading in InvokeAI.

 from abc import ABC, abstractmethod
 from contextlib import contextmanager
+from dataclasses import dataclass
 from logging import Logger
 from pathlib import Path
 from typing import Any, Dict, Generator, Optional, Tuple
@@ -17,17 +18,19 @@ from invokeai.backend.model_manager.config import (
    AnyModelConfig,
    SubModelType,
 )
-from invokeai.backend.model_manager.load.model_cache.cache_record import CacheRecord
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase, ModelLockerBase


+@dataclass
 class LoadedModelWithoutConfig:
-    """Context manager object that mediates transfer from RAM<->VRAM.
+    """
+    Context manager object that mediates transfer from RAM<->VRAM.

    This is a context manager object that has two distinct APIs:

    1. Older API (deprecated):
-    Use the LoadedModel object directly as a context manager.  It will move the model into VRAM (on CUDA devices), and
+    Use the LoadedModel object directly as a context manager.
+    It will move the model into VRAM (on CUDA devices), and
    return the model in a form suitable for passing to torch.
    Example:
    ```
@@ -37,9 +40,13 @@ class LoadedModelWithoutConfig:
    ```

    2. Newer API (recommended):
-    Call the LoadedModel's `model_on_device()` method in a context. It returns a tuple consisting of a copy of the
-    model's state dict in CPU RAM followed by a copy of the model in VRAM. The state dict is provided to allow LoRAs and
-    other model patchers to return the model to its unpatched state without expensive copy and restore operations.
+    Call the LoadedModel's `model_on_device()` method in a
+    context. It returns a tuple consisting of a copy of
+    the model's state dict in CPU RAM followed by a copy
+    of the model in VRAM. The state dict is provided to allow
+    LoRAs and other model patchers to return the model to
+    its unpatched state without expensive copy and restore
+    operations.

    Example:
    ```
@@ -48,42 +55,43 @@ class LoadedModelWithoutConfig:
        image = vae.decode(latents)[0]
    ```

-    The state_dict should be treated as a read-only object and never modified. Also be aware that some loadable models
-    do not have a state_dict, in which case this value will be None.
+    The state_dict should be treated as a read-only object and
+    never modified. Also be aware that some loadable models do
+    not have a state_dict, in which case this value will be None.
    """

-    def __init__(self, cache_record: CacheRecord, cache: ModelCache):
-        self._cache_record = cache_record
-        self._cache = cache
+    _locker: ModelLockerBase

    def __enter__(self) -> AnyModel:
-        self._cache.lock(self._cache_record.key)
+        """Context entry."""
+        self._locker.lock()
        return self.model

    def __exit__(self, *args: Any, **kwargs: Any) -> None:
-        self._cache.unlock(self._cache_record.key)
+        """Context exit."""
+        self._locker.unlock()

    @contextmanager
    def model_on_device(self) -> Generator[Tuple[Optional[Dict[str, torch.Tensor]], AnyModel], None, None]:
        """Return a tuple consisting of the model's state dict (if it exists) and the locked model on execution device."""
-        self._cache.lock(self._cache_record.key)
+        locked_model = self._locker.lock()
        try:
-            yield (self._cache_record.cached_model.get_cpu_state_dict(), self._cache_record.cached_model.model)
+            state_dict = self._locker.get_state_dict()
+            yield (state_dict, locked_model)
        finally:
-            self._cache.unlock(self._cache_record.key)
+            self._locker.unlock()

    @property
    def model(self) -> AnyModel:
        """Return the model without locking it."""
-        return self._cache_record.cached_model.model
+        return self._locker.model


+@dataclass
 class LoadedModel(LoadedModelWithoutConfig):
    """Context manager object that mediates transfer from RAM<->VRAM."""

-    def __init__(self, config: Optional[AnyModelConfig], cache_record: CacheRecord, cache: ModelCache):
-        super().__init__(cache_record=cache_record, cache=cache)
-        self.config = config
+    config: Optional[AnyModelConfig] = None


 # TODO(MM2):
@@ -102,7 +110,7 @@ class ModelLoaderBase(ABC):
        self,
        app_config: InvokeAIAppConfig,
        logger: Logger,
-        ram_cache: ModelCache,
+        ram_cache: ModelCacheBase[AnyModel],
    ):
        """Initialize the loader."""
        pass
@@ -130,6 +138,6 @@ class ModelLoaderBase(ABC):

    @property
    @abstractmethod
-    def ram_cache(self) -> ModelCache:
+    def ram_cache(self) -> ModelCacheBase[AnyModel]:
        """Return the ram cache associated with this loader."""
        pass
--- a/invokeai/backend/model_manager/load/load_default.py
+++ b/invokeai/backend/model_manager/load/load_default.py
@@ -14,8 +14,7 @@ from invokeai.backend.model_manager import (
 )
 from invokeai.backend.model_manager.config import DiffusersConfigBase
 from invokeai.backend.model_manager.load.load_base import LoadedModel, ModelLoaderBase
-from invokeai.backend.model_manager.load.model_cache.cache_record import CacheRecord
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache, get_model_cache_key
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase, ModelLockerBase
 from invokeai.backend.model_manager.load.model_util import calc_model_size_by_fs
 from invokeai.backend.model_manager.load.optimizations import skip_torch_weight_init
 from invokeai.backend.util.devices import TorchDevice
@@ -29,7 +28,7 @@ class ModelLoader(ModelLoaderBase):
        self,
        app_config: InvokeAIAppConfig,
        logger: Logger,
-        ram_cache: ModelCache,
+        ram_cache: ModelCacheBase[AnyModel],
    ):
        """Initialize the loader."""
        self._app_config = app_config
@@ -55,11 +54,11 @@ class ModelLoader(ModelLoaderBase):
            raise InvalidModelConfigException(f"Files for model '{model_config.name}' not found at {model_path}")

        with skip_torch_weight_init():
-            cache_record = self._load_and_cache(model_config, submodel_type)
-        return LoadedModel(config=model_config, cache_record=cache_record, cache=self._ram_cache)
+            locker = self._load_and_cache(model_config, submodel_type)
+        return LoadedModel(config=model_config, _locker=locker)

    @property
-    def ram_cache(self) -> ModelCache:
+    def ram_cache(self) -> ModelCacheBase[AnyModel]:
        """Return the ram cache associated with this loader."""
        return self._ram_cache

@@ -67,10 +66,10 @@ class ModelLoader(ModelLoaderBase):
        model_base = self._app_config.models_path
        return (model_base / config.path).resolve()

-    def _load_and_cache(self, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> CacheRecord:
+    def _load_and_cache(self, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> ModelLockerBase:
        stats_name = ":".join([config.base, config.type, config.name, (submodel_type or "")])
        try:
-            return self._ram_cache.get(key=get_model_cache_key(config.key, submodel_type), stats_name=stats_name)
+            return self._ram_cache.get(config.key, submodel_type, stats_name=stats_name)
        except IndexError:
            pass

@@ -79,11 +78,16 @@ class ModelLoader(ModelLoaderBase):
        loaded_model = self._load_model(config, submodel_type)

        self._ram_cache.put(
-            get_model_cache_key(config.key, submodel_type),
+            config.key,
+            submodel_type=submodel_type,
            model=loaded_model,
        )

-        return self._ram_cache.get(key=get_model_cache_key(config.key, submodel_type), stats_name=stats_name)
+        return self._ram_cache.get(
+            key=config.key,
+            submodel_type=submodel_type,
+            stats_name=stats_name,
+        )

    def get_size_fs(
        self, config: AnyModelConfig, model_path: Path, submodel_type: Optional[SubModelType] = None
--- a/invokeai/backend/model_manager/load/model_cache/init.py
+++ b/invokeai/backend/model_manager/load/model_cache/init.py
@@ -0,0 +1,6 @@
+"""Init file for ModelCache."""
+
+from .model_cache_base import ModelCacheBase, CacheStats  # noqa F401
+from .model_cache_default import ModelCache  # noqa F401
+
+_all__ = ["ModelCacheBase", "ModelCache", "CacheStats"]
--- a/invokeai/backend/model_manager/load/model_cache/cache_record.py
+++ b/invokeai/backend/model_manager/load/model_cache/cache_record.py
@@ -1,31 +0,0 @@
-from dataclasses import dataclass
-
-from invokeai.backend.model_manager.load.model_cache.cached_model.cached_model_only_full_load import (
-    CachedModelOnlyFullLoad,
-)
-from invokeai.backend.model_manager.load.model_cache.cached_model.cached_model_with_partial_load import (
-    CachedModelWithPartialLoad,
-)
-
-
-@dataclass
-class CacheRecord:
-    """A class that represents a model in the model cache."""
-
-    # Cache key.
-    key: str
-    # Model in memory.
-    cached_model: CachedModelWithPartialLoad | CachedModelOnlyFullLoad
-    # If locks > 0, the model is actively being used, so we should do our best to keep it on the compute device.
-    _locks: int = 0
-
-    def lock(self) -> None:
-        self._locks += 1
-
-    def unlock(self) -> None:
-        self._locks -= 1
-        assert self._locks >= 0
-
-    @property
-    def is_locked(self) -> bool:
-        return self._locks > 0
--- a/invokeai/backend/model_manager/load/model_cache/cache_stats.py
+++ b/invokeai/backend/model_manager/load/model_cache/cache_stats.py
@@ -1,15 +0,0 @@
-from dataclasses import dataclass, field
-from typing import Dict
-
-
-@dataclass
-class CacheStats(object):
-    """Collect statistics on cache performance."""
-
-    hits: int = 0  # cache hits
-    misses: int = 0  # cache misses
-    high_watermark: int = 0  # amount of cache used
-    in_cache: int = 0  # number of models in cache
-    cleared: int = 0  # number of models cleared to make space
-    cache_size: int = 0  # total size of cache
-    loaded_model_sizes: Dict[str, int] = field(default_factory=dict)
--- a/invokeai/backend/model_manager/load/model_cache/cached_model/init.py
+++ b/invokeai/backend/model_manager/load/model_cache/cached_model/init.py
--- a/invokeai/backend/model_manager/load/model_cache/cached_model/cached_model_only_full_load.py
+++ b/invokeai/backend/model_manager/load/model_cache/cached_model/cached_model_only_full_load.py
@@ -1,81 +0,0 @@
-from typing import Any
-
-import torch
-
-
-class CachedModelOnlyFullLoad:
-    """A wrapper around a PyTorch model to handle full loads and unloads between the CPU and the compute device.
-
-    Note: "VRAM" is used throughout this class to refer to the memory on the compute device. It could be CUDA memory,
-    MPS memory, etc.
-    """
-
-    def __init__(self, model: torch.nn.Module | Any, compute_device: torch.device, total_bytes: int):
-        """Initialize a CachedModelOnlyFullLoad.
-
-        Args:
-            model (torch.nn.Module | Any): The model to wrap. Should be on the CPU.
-            compute_device (torch.device): The compute device to move the model to.
-            total_bytes (int): The total size (in bytes) of all the weights in the model.
-        """
-        # model is often a torch.nn.Module, but could be any model type. Throughout this class, we handle both cases.
-        self._model = model
-        self._compute_device = compute_device
-        self._total_bytes = total_bytes
-        self._is_in_vram = False
-
-    @property
-    def model(self) -> torch.nn.Module:
-        return self._model
-
-    def get_cpu_state_dict(self) -> dict[str, torch.Tensor] | None:
-        """Get a read-only copy of the model's state dict in RAM."""
-        # TODO(ryand): Document this better and implement it.
-        return None
-
-    def total_bytes(self) -> int:
-        """Get the total size (in bytes) of all the weights in the model."""
-        return self._total_bytes
-
-    def cur_vram_bytes(self) -> int:
-        """Get the size (in bytes) of the weights that are currently in VRAM."""
-        if self._is_in_vram:
-            return self._total_bytes
-        else:
-            return 0
-
-    def is_in_vram(self) -> bool:
-        """Return true if the model is currently in VRAM."""
-        return self._is_in_vram
-
-    def full_load_to_vram(self) -> int:
-        """Load all weights into VRAM (if supported by the model).
-
-        Returns:
-            The number of bytes loaded into VRAM.
-        """
-        if self._is_in_vram:
-            # Already in VRAM.
-            return 0
-
-        if not hasattr(self._model, "to"):
-            # Model doesn't support moving to a device.
-            return 0
-
-        self._model.to(self._compute_device)
-        self._is_in_vram = True
-        return self._total_bytes
-
-    def full_unload_from_vram(self) -> int:
-        """Unload all weights from VRAM.
-
-        Returns:
-            The number of bytes unloaded from VRAM.
-        """
-        if not self._is_in_vram:
-            # Already in RAM.
-            return 0
-
-        self._model.to("cpu")
-        self._is_in_vram = False
-        return self._total_bytes
--- a/invokeai/backend/model_manager/load/model_cache/cached_model/cached_model_with_partial_load.py
+++ b/invokeai/backend/model_manager/load/model_cache/cached_model/cached_model_with_partial_load.py
@@ -1,139 +0,0 @@
-import torch
-
-from invokeai.backend.model_manager.load.model_cache.torch_function_autocast_context import (
-    add_autocast_to_module_forward,
-)
-from invokeai.backend.util.calc_tensor_size import calc_tensor_size
-
-
-def set_nested_attr(obj: object, attr: str, value: object):
-    """A helper function that extends setattr() to support nested attributes.
-
-    Example:
-        set_nested_attr(model, "module.encoder.conv1.weight", new_conv1_weight)
-    """
-    attrs = attr.split(".")
-    for attr in attrs[:-1]:
-        obj = getattr(obj, attr)
-    setattr(obj, attrs[-1], value)
-
-
-class CachedModelWithPartialLoad:
-    """A wrapper around a PyTorch model to handle partial loads and unloads between the CPU and the compute device.
-
-    Note: "VRAM" is used throughout this class to refer to the memory on the compute device. It could be CUDA memory,
-    MPS memory, etc.
-    """
-
-    def __init__(self, model: torch.nn.Module, compute_device: torch.device):
-        self._model = model
-        self._compute_device = compute_device
-
-        # A CPU read-only copy of the model's state dict.
-        self._cpu_state_dict: dict[str, torch.Tensor] = model.state_dict()
-
-        # Monkey-patch the model to add autocasting to the model's forward method.
-        add_autocast_to_module_forward(model, compute_device)
-
-        # TODO(ryand): Manage a read-only CPU copy of the model state dict.
-        # TODO(ryand): Add memoization for total_bytes and cur_vram_bytes?
-
-        self._total_bytes = sum(calc_tensor_size(p) for p in self._model.parameters())
-        self._cur_vram_bytes: int | None = None
-
-    @property
-    def model(self) -> torch.nn.Module:
-        return self._model
-
-    def get_cpu_state_dict(self) -> dict[str, torch.Tensor] | None:
-        """Get a read-only copy of the model's state dict in RAM."""
-        # TODO(ryand): Document this better.
-        return self._cpu_state_dict
-
-    def total_bytes(self) -> int:
-        """Get the total size (in bytes) of all the weights in the model."""
-        return self._total_bytes
-
-    def cur_vram_bytes(self) -> int:
-        """Get the size (in bytes) of the weights that are currently in VRAM."""
-        if self._cur_vram_bytes is None:
-            self._cur_vram_bytes = sum(
-                calc_tensor_size(p) for p in self._model.parameters() if p.device.type == self._compute_device.type
-            )
-        return self._cur_vram_bytes
-
-    def full_load_to_vram(self) -> int:
-        """Load all weights into VRAM."""
-        return self.partial_load_to_vram(self.total_bytes())
-
-    def full_unload_from_vram(self) -> int:
-        """Unload all weights from VRAM."""
-        return self.partial_unload_from_vram(self.total_bytes())
-
-    @torch.no_grad()
-    def partial_load_to_vram(self, vram_bytes_to_load: int) -> int:
-        """Load more weights into VRAM without exceeding vram_bytes_to_load.
-
-        Returns:
-            The number of bytes loaded into VRAM.
-        """
-        vram_bytes_loaded = 0
-
-        # TODO(ryand): Iterate over buffers too?
-        for key, param in self._model.named_parameters():
-            # Skip parameters that are already on the compute device.
-            if param.device.type == self._compute_device.type:
-                continue
-
-            # Check the size of the parameter.
-            param_size = calc_tensor_size(param)
-            if vram_bytes_loaded + param_size > vram_bytes_to_load:
-                # TODO(ryand): Should we just break here? If we couldn't fit this parameter into VRAM, is it really
-                # worth continuing to search for a smaller parameter that would fit?
-                continue
-
-            # Copy the parameter to the compute device.
-            # We use the 'overwrite' strategy from torch.nn.Module._apply().
-            # TODO(ryand): For some edge cases (e.g. quantized models?), we may need to support other strategies (e.g.
-            # swap).
-            assert isinstance(param, torch.nn.Parameter)
-            assert param.is_leaf
-            out_param = torch.nn.Parameter(param.to(self._compute_device, copy=True), requires_grad=param.requires_grad)
-            set_nested_attr(self._model, key, out_param)
-            # We did not port the param.grad handling from torch.nn.Module._apply(), because we do not expect to be
-            # handling gradients. We assert that this assumption is true.
-            assert param.grad is None
-
-            vram_bytes_loaded += param_size
-
-        if self._cur_vram_bytes is not None:
-            self._cur_vram_bytes += vram_bytes_loaded
-
-        return vram_bytes_loaded
-
-    @torch.no_grad()
-    def partial_unload_from_vram(self, vram_bytes_to_free: int) -> int:
-        """Unload weights from VRAM until vram_bytes_to_free bytes are freed. Or the entire model is unloaded.
-
-        Returns:
-            The number of bytes unloaded from VRAM.
-        """
-        vram_bytes_freed = 0
-
-        # TODO(ryand): Iterate over buffers too?
-        for key, param in self._model.named_parameters():
-            if vram_bytes_freed >= vram_bytes_to_free:
-                break
-
-            if param.device.type != self._compute_device.type:
-                continue
-
-            # Create a new parameter, but inject the existing CPU tensor into it.
-            out_param = torch.nn.Parameter(self._cpu_state_dict[key], requires_grad=param.requires_grad)
-            set_nested_attr(self._model, key, out_param)
-            vram_bytes_freed += calc_tensor_size(param)
-
-        if self._cur_vram_bytes is not None:
-            self._cur_vram_bytes -= vram_bytes_freed
-
-        return vram_bytes_freed
--- a/invokeai/backend/model_manager/load/model_cache/model_cache.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache.py
@@ -1,534 +0,0 @@
-import gc
-from logging import Logger
-from typing import Dict, List, Optional
-
-import torch
-
-from invokeai.backend.model_manager import AnyModel, SubModelType
-from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot
-from invokeai.backend.model_manager.load.model_cache.cache_record import CacheRecord
-from invokeai.backend.model_manager.load.model_cache.cache_stats import CacheStats
-from invokeai.backend.model_manager.load.model_cache.cached_model.cached_model_only_full_load import (
-    CachedModelOnlyFullLoad,
-)
-from invokeai.backend.model_manager.load.model_cache.cached_model.cached_model_with_partial_load import (
-    CachedModelWithPartialLoad,
-)
-from invokeai.backend.model_manager.load.model_util import calc_model_size_by_data
-from invokeai.backend.util.devices import TorchDevice
-from invokeai.backend.util.logging import InvokeAILogger
-from invokeai.backend.util.prefix_logger_adapter import PrefixedLoggerAdapter
-
-# Size of a GB in bytes.
-GB = 2**30
-
-# Size of a MB in bytes.
-MB = 2**20
-
-
-# TODO(ryand): Where should this go? The ModelCache shouldn't be concerned with submodels.
-def get_model_cache_key(model_key: str, submodel_type: Optional[SubModelType] = None) -> str:
-    """Get the cache key for a model based on the optional submodel type."""
-    if submodel_type:
-        return f"{model_key}:{submodel_type.value}"
-    else:
-        return model_key
-
-
-class ModelCache:
-    """A cache for managing models in memory.
-
-    The cache is based on two levels of model storage:
-    - execution_device: The device where most models are executed (typically "cuda", "mps", or "cpu").
-    - storage_device: The device where models are offloaded when not in active use (typically "cpu").
-
-    The model cache is based on the following assumptions:
-    - storage_device_mem_size > execution_device_mem_size
-    - disk_to_storage_device_transfer_time >> storage_device_to_execution_device_transfer_time
-
-    A copy of all models in the cache is always kept on the storage_device. A subset of the models also have a copy on
-    the execution_device.
-
-    Models are moved between the storage_device and the execution_device as necessary. Cache size limits are enforced
-    on both the storage_device and the execution_device. The execution_device cache uses a smallest-first offload
-    policy. The storage_device cache uses a least-recently-used (LRU) offload policy.
-
-    Note: Neither of these offload policies has really been compared against alternatives. It's likely that different
-    policies would be better, although the optimal policies are likely heavily dependent on usage patterns and HW
-    configuration.
-
-    The cache returns context manager generators designed to load the model into the execution device (often GPU) within
-    the context, and unload outside the context.
-
-    Example usage:
-    ```
-    cache = ModelCache(max_cache_size=7.5, max_vram_cache_size=6.0)
-    with cache.get_model('runwayml/stable-diffusion-1-5') as SD1:
-        do_something_on_gpu(SD1)
-    ```
-    """
-
-    def __init__(
-        self,
-        max_cache_size: float,
-        max_vram_cache_size: float,
-        execution_device: torch.device = torch.device("cuda"),
-        storage_device: torch.device = torch.device("cpu"),
-        lazy_offloading: bool = True,
-        log_memory_usage: bool = False,
-        logger: Optional[Logger] = None,
-    ):
-        """
-        Initialize the model RAM cache.
-
-        :param max_cache_size: Maximum size of the storage_device cache in GBs.
-        :param max_vram_cache_size: Maximum size of the execution_device cache in GBs.
-        :param execution_device: Torch device to load active model into [torch.device('cuda')]
-        :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
-        :param lazy_offloading: Keep model in VRAM until another model needs to be loaded
-        :param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache
-            operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
-            snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
-            behaviour.
-        :param logger: InvokeAILogger to use (otherwise creates one)
-        """
-        # allow lazy offloading only when vram cache enabled
-        # TODO(ryand): Think about what lazy_offloading should mean in the new model cache.
-        self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0
-        self._max_cache_size: float = max_cache_size
-        self._max_vram_cache_size: float = max_vram_cache_size
-        self._execution_device: torch.device = execution_device
-        self._storage_device: torch.device = storage_device
-        self._logger = PrefixedLoggerAdapter(
-            logger or InvokeAILogger.get_logger(self.__class__.__name__), "MODEL CACHE"
-        )
-        self._log_memory_usage = log_memory_usage
-        self._stats: Optional[CacheStats] = None
-
-        self._cached_models: Dict[str, CacheRecord] = {}
-        self._cache_stack: List[str] = []
-
-    @property
-    def max_cache_size(self) -> float:
-        """Return the cap on cache size."""
-        return self._max_cache_size
-
-    @max_cache_size.setter
-    def max_cache_size(self, value: float) -> None:
-        """Set the cap on cache size."""
-        self._max_cache_size = value
-
-    @property
-    def max_vram_cache_size(self) -> float:
-        """Return the cap on vram cache size."""
-        return self._max_vram_cache_size
-
-    @max_vram_cache_size.setter
-    def max_vram_cache_size(self, value: float) -> None:
-        """Set the cap on vram cache size."""
-        self._max_vram_cache_size = value
-
-    @property
-    def stats(self) -> Optional[CacheStats]:
-        """Return collected CacheStats object."""
-        return self._stats
-
-    @stats.setter
-    def stats(self, stats: CacheStats) -> None:
-        """Set the CacheStats object for collecting cache statistics."""
-        self._stats = stats
-
-    def put(self, key: str, model: AnyModel) -> None:
-        """Add a model to the cache."""
-        if key in self._cached_models:
-            self._logger.debug(
-                f"Attempted to add model {key} ({model.__class__.__name__}), but it already exists in the cache. No action necessary."
-            )
-            return
-
-        size = calc_model_size_by_data(self._logger, model)
-        self.make_room(size)
-
-        # Wrap model.
-        if isinstance(model, torch.nn.Module):
-            wrapped_model = CachedModelWithPartialLoad(model, self._execution_device)
-        else:
-            wrapped_model = CachedModelOnlyFullLoad(model, self._execution_device, size)
-
-        # running_on_cpu = self._execution_device == torch.device("cpu")
-        # state_dict = model.state_dict() if isinstance(model, torch.nn.Module) and not running_on_cpu else None
-        cache_record = CacheRecord(key=key, cached_model=wrapped_model)
-        self._cached_models[key] = cache_record
-        self._cache_stack.append(key)
-        self._logger.debug(
-            f"Added model {key} (Type: {model.__class__.__name__}, Wrap mode: {wrapped_model.__class__.__name__}, Model size: {size/MB:.2f}MB)"
-        )
-
-    def get(self, key: str, stats_name: Optional[str] = None) -> CacheRecord:
-        """Retrieve a model from the cache.
-
-        :param key: Model key
-        :param stats_name: A human-readable id for the model for the purposes of stats reporting.
-
-        Raises IndexError if the model is not in the cache.
-        """
-        if key in self._cached_models:
-            if self.stats:
-                self.stats.hits += 1
-        else:
-            if self.stats:
-                self.stats.misses += 1
-            self._logger.debug(f"Cache miss: {key}")
-            raise IndexError(f"The model with key {key} is not in the cache.")
-
-        cache_entry = self._cached_models[key]
-
-        # more stats
-        if self.stats:
-            stats_name = stats_name or key
-            self.stats.cache_size = int(self._max_cache_size * GB)
-            self.stats.high_watermark = max(self.stats.high_watermark, self._get_ram_in_use())
-            self.stats.in_cache = len(self._cached_models)
-            self.stats.loaded_model_sizes[stats_name] = max(
-                self.stats.loaded_model_sizes.get(stats_name, 0), cache_entry.cached_model.total_bytes()
-            )
-
-        # this moves the entry to the top (right end) of the stack
-        self._cache_stack = [k for k in self._cache_stack if k != key]
-        self._cache_stack.append(key)
-
-        self._logger.debug(f"Cache hit: {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
-
-        return cache_entry
-
-    def lock(self, key: str) -> None:
-        """Lock a model for use and move it into VRAM."""
-        cache_entry = self._cached_models[key]
-        cache_entry.lock()
-
-        self._logger.debug(f"Locking model {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
-
-        try:
-            self._load_locked_model(cache_entry)
-            self._logger.debug(
-                f"Finished locking model {key} (Type: {cache_entry.cached_model.model.__class__.__name__})"
-            )
-        except torch.cuda.OutOfMemoryError:
-            self._logger.warning("Insufficient GPU memory to load model. Aborting")
-            cache_entry.unlock()
-            raise
-        except Exception:
-            cache_entry.unlock()
-            raise
-
-        self._log_cache_state()
-
-    def unlock(self, key: str) -> None:
-        """Unlock a model."""
-        cache_entry = self._cached_models[key]
-        cache_entry.unlock()
-        self._logger.debug(f"Unlocked model {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
-
-    def _load_locked_model(self, cache_entry: CacheRecord) -> None:
-        """Helper function for self.lock(). Loads a locked model into VRAM."""
-        vram_available = self._get_vram_available()
-
-        # The amount of additional VRAM that will be used if we fully load the model into VRAM.
-        model_cur_vram_bytes = cache_entry.cached_model.cur_vram_bytes()
-        model_total_bytes = cache_entry.cached_model.total_bytes()
-        model_vram_needed = model_total_bytes - model_cur_vram_bytes
-
-        self._logger.debug(
-            f"Before unloading: {self._get_vram_state_str(model_cur_vram_bytes, model_total_bytes, vram_available)}"
-        )
-
-        # Make room for the model in VRAM.
-        # 1. If the model can fit entirely in VRAM, then make enough room for it to be loaded fully.
-        # 2. If the model can't fit fully into VRAM, then unload all other models and load as much of the model as
-        #    possible.
-        vram_bytes_freed = self._offload_unlocked_models(model_vram_needed)
-        self._logger.debug(f"Unloaded models (if necessary): vram_bytes_freed={(vram_bytes_freed/MB):.2f}MB")
-
-        # Check the updated vram_available after offloading.
-        vram_available = self._get_vram_available()
-        self._logger.debug(
-            f"After unloading: {self._get_vram_state_str(model_cur_vram_bytes, model_total_bytes, vram_available)}"
-        )
-
-        # Move as much of the model as possible into VRAM.
-        model_bytes_loaded = 0
-        if isinstance(cache_entry.cached_model, CachedModelWithPartialLoad):
-            model_bytes_loaded = cache_entry.cached_model.partial_load_to_vram(vram_available)
-        elif isinstance(cache_entry.cached_model, CachedModelOnlyFullLoad):  # type: ignore
-            # Partial load is not supported, so we have not choice but to try and fit it all into VRAM.
-            model_bytes_loaded = cache_entry.cached_model.full_load_to_vram()
-        else:
-            raise ValueError(f"Unsupported cached model type: {type(cache_entry.cached_model)}")
-
-        model_cur_vram_bytes = cache_entry.cached_model.cur_vram_bytes()
-        vram_available = self._get_vram_available()
-        self._logger.debug(f"Loaded model onto execution device: model_bytes_loaded={(model_bytes_loaded/MB):.2f}MB, ")
-        self._logger.debug(
-            f"After loading: {self._get_vram_state_str(model_cur_vram_bytes, model_total_bytes, vram_available)}"
-        )
-
-    def _get_vram_available(self) -> int:
-        """Get the amount of VRAM available in the cache."""
-        return int(self._max_vram_cache_size * GB) - self._get_vram_in_use()
-
-    def _get_vram_in_use(self) -> int:
-        """Get the amount of VRAM currently in use."""
-        return sum(ce.cached_model.cur_vram_bytes() for ce in self._cached_models.values())
-
-    def _get_ram_available(self) -> int:
-        """Get the amount of RAM available in the cache."""
-        return int(self._max_cache_size * GB) - self._get_ram_in_use()
-
-    def _get_ram_in_use(self) -> int:
-        """Get the amount of RAM currently in use."""
-        return sum(ce.cached_model.total_bytes() for ce in self._cached_models.values())
-
-    def _capture_memory_snapshot(self) -> Optional[MemorySnapshot]:
-        if self._log_memory_usage:
-            return MemorySnapshot.capture()
-        return None
-
-    def _get_vram_state_str(self, model_cur_vram_bytes: int, model_total_bytes: int, vram_available: int) -> str:
-        """Helper function for preparing a VRAM state log string."""
-        model_cur_vram_bytes_percent = model_cur_vram_bytes / model_total_bytes if model_total_bytes > 0 else 0
-        return (
-            f"model_total={model_total_bytes/MB:.0f} MB, "
-            + f"model_vram={model_cur_vram_bytes/MB:.0f} MB ({model_cur_vram_bytes_percent:.1%} %), "
-            + f"vram_total={int(self._max_vram_cache_size * GB)/MB:.0f} MB, "
-            + f"vram_available={(vram_available/MB):.0f} MB, "
-        )
-
-    def _offload_unlocked_models(self, vram_bytes_to_free: int) -> int:
-        """Offload models from the execution_device until vram_bytes_to_free bytes are freed, or all models are
-        offloaded. Of course, locked models are not offloaded.
-
-        Returns:
-            int: The number of bytes freed.
-        """
-        self._logger.debug(f"Offloading unlocked models with goal of freeing {vram_bytes_to_free/MB:.2f}MB of VRAM.")
-        vram_bytes_freed = 0
-        # TODO(ryand): Give more thought to the offloading policy used here.
-        cache_entries_increasing_size = sorted(self._cached_models.values(), key=lambda x: x.cached_model.total_bytes())
-        for cache_entry in cache_entries_increasing_size:
-            if vram_bytes_freed >= vram_bytes_to_free:
-                break
-            if cache_entry.is_locked:
-                continue
-
-            if isinstance(cache_entry.cached_model, CachedModelWithPartialLoad):
-                cache_entry_bytes_freed = cache_entry.cached_model.partial_unload_from_vram(
-                    vram_bytes_to_free - vram_bytes_freed
-                )
-            elif isinstance(cache_entry.cached_model, CachedModelOnlyFullLoad):  # type: ignore
-                cache_entry_bytes_freed = cache_entry.cached_model.full_unload_from_vram()
-            else:
-                raise ValueError(f"Unsupported cached model type: {type(cache_entry.cached_model)}")
-            if cache_entry_bytes_freed > 0:
-                self._logger.debug(
-                    f"Unloaded {cache_entry.key} from VRAM to free {(cache_entry_bytes_freed/MB):.0f} MB."
-                )
-            vram_bytes_freed += cache_entry_bytes_freed
-
-        TorchDevice.empty_cache()
-        return vram_bytes_freed
-
-    # def _move_model_to_device(self, cache_entry: CacheRecord, target_device: torch.device) -> None:
-    #     """Move model into the indicated device.
-
-    #     :param cache_entry: The CacheRecord for the model
-    #     :param target_device: The torch.device to move the model into
-
-    #     May raise a torch.cuda.OutOfMemoryError
-    #     """
-    #     self._logger.debug(f"Called to move {cache_entry.key} to {target_device}")
-    #     source_device = cache_entry.device
-
-    #     # Note: We compare device types only so that 'cuda' == 'cuda:0'.
-    #     # This would need to be revised to support multi-GPU.
-    #     if torch.device(source_device).type == torch.device(target_device).type:
-    #         return
-
-    #     # Some models don't have a `to` method, in which case they run in RAM/CPU.
-    #     if not hasattr(cache_entry.model, "to"):
-    #         return
-
-    #     # This roundabout method for moving the model around is done to avoid
-    #     # the cost of moving the model from RAM to VRAM and then back from VRAM to RAM.
-    #     # When moving to VRAM, we copy (not move) each element of the state dict from
-    #     # RAM to a new state dict in VRAM, and then inject it into the model.
-    #     # This operation is slightly faster than running `to()` on the whole model.
-    #     #
-    #     # When the model needs to be removed from VRAM we simply delete the copy
-    #     # of the state dict in VRAM, and reinject the state dict that is cached
-    #     # in RAM into the model. So this operation is very fast.
-    #     start_model_to_time = time.time()
-    #     snapshot_before = self._capture_memory_snapshot()
-
-    #     try:
-    #         if cache_entry.state_dict is not None:
-    #             assert hasattr(cache_entry.model, "load_state_dict")
-    #             if target_device == self._storage_device:
-    #                 cache_entry.model.load_state_dict(cache_entry.state_dict, assign=True)
-    #             else:
-    #                 new_dict: Dict[str, torch.Tensor] = {}
-    #                 for k, v in cache_entry.state_dict.items():
-    #                     new_dict[k] = v.to(target_device, copy=True)
-    #                 cache_entry.model.load_state_dict(new_dict, assign=True)
-    #         cache_entry.model.to(target_device)
-    #         cache_entry.device = target_device
-    #     except Exception as e:  # blow away cache entry
-    #         self._delete_cache_entry(cache_entry)
-    #         raise e
-
-    #     snapshot_after = self._capture_memory_snapshot()
-    #     end_model_to_time = time.time()
-    #     self._logger.debug(
-    #         f"Moved model '{cache_entry.key}' from {source_device} to"
-    #         f" {target_device} in {(end_model_to_time-start_model_to_time):.2f}s."
-    #         f"Estimated model size: {(cache_entry.size/GB):.3f} GB."
-    #         f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
-    #     )
-
-    #     if (
-    #         snapshot_before is not None
-    #         and snapshot_after is not None
-    #         and snapshot_before.vram is not None
-    #         and snapshot_after.vram is not None
-    #     ):
-    #         vram_change = abs(snapshot_before.vram - snapshot_after.vram)
-
-    #         # If the estimated model size does not match the change in VRAM, log a warning.
-    #         if not math.isclose(
-    #             vram_change,
-    #             cache_entry.size,
-    #             rel_tol=0.1,
-    #             abs_tol=10 * MB,
-    #         ):
-    #             self._logger.debug(
-    #                 f"Moving model '{cache_entry.key}' from {source_device} to"
-    #                 f" {target_device} caused an unexpected change in VRAM usage. The model's"
-    #                 " estimated size may be incorrect. Estimated model size:"
-    #                 f" {(cache_entry.size/GB):.3f} GB.\n"
-    #                 f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
-    #             )
-
-    def _log_cache_state(self, title: str = "Model cache state:", include_entry_details: bool = True):
-        ram_size_bytes = self._max_cache_size * GB
-        ram_in_use_bytes = self._get_ram_in_use()
-        ram_in_use_bytes_percent = ram_in_use_bytes / ram_size_bytes if ram_size_bytes > 0 else 0
-        ram_available_bytes = self._get_ram_available()
-        ram_available_bytes_percent = ram_available_bytes / ram_size_bytes if ram_size_bytes > 0 else 0
-
-        vram_size_bytes = self._max_vram_cache_size * GB
-        vram_in_use_bytes = self._get_vram_in_use()
-        vram_in_use_bytes_percent = vram_in_use_bytes / vram_size_bytes if vram_size_bytes > 0 else 0
-        vram_available_bytes = self._get_vram_available()
-        vram_available_bytes_percent = vram_available_bytes / vram_size_bytes if vram_size_bytes > 0 else 0
-
-        log = f"{title}\n"
-
-        log_format = "  {:<30} Limit: {:>7.1f} MB, Used: {:>7.1f} MB ({:>5.1%}), Available: {:>7.1f} MB ({:>5.1%})\n"
-        log += log_format.format(
-            f"Storage Device ({self._storage_device.type})",
-            ram_size_bytes / MB,
-            ram_in_use_bytes / MB,
-            ram_in_use_bytes_percent,
-            ram_available_bytes / MB,
-            ram_available_bytes_percent,
-        )
-        log += log_format.format(
-            f"Compute Device ({self._execution_device.type})",
-            vram_size_bytes / MB,
-            vram_in_use_bytes / MB,
-            vram_in_use_bytes_percent,
-            vram_available_bytes / MB,
-            vram_available_bytes_percent,
-        )
-
-        if torch.cuda.is_available():
-            log += "  {:<30} {} MB\n".format("CUDA Memory Allocated:", torch.cuda.memory_allocated() / MB)
-        log += "  {:<30} {}\n".format("Total models:", len(self._cached_models))
-
-        if include_entry_details and len(self._cached_models) > 0:
-            log += "  Models:\n"
-            log_format = (
-                "    {:<80} total={:>7.1f} MB, vram={:>7.1f} MB ({:>5.1%}), ram={:>7.1f} MB ({:>5.1%}), locked={}\n"
-            )
-            for cache_record in self._cached_models.values():
-                total_bytes = cache_record.cached_model.total_bytes()
-                cur_vram_bytes = cache_record.cached_model.cur_vram_bytes()
-                cur_vram_bytes_percent = cur_vram_bytes / total_bytes if total_bytes > 0 else 0
-                cur_ram_bytes = total_bytes - cur_vram_bytes
-                cur_ram_bytes_percent = cur_ram_bytes / total_bytes if total_bytes > 0 else 0
-
-                log += log_format.format(
-                    f"{cache_record.key} ({cache_record.cached_model.model.__class__.__name__}):",
-                    total_bytes / MB,
-                    cur_vram_bytes / MB,
-                    cur_vram_bytes_percent,
-                    cur_ram_bytes / MB,
-                    cur_ram_bytes_percent,
-                    cache_record.is_locked,
-                )
-
-        self._logger.debug(log)
-
-    def make_room(self, bytes_needed: int) -> None:
-        """Make enough room in the cache to accommodate a new model of indicated size.
-
-        Note: This function deletes all of the cache's internal references to a model in order to free it. If there are
-        external references to the model, there's nothing that the cache can do about it, and those models will not be
-        garbage-collected.
-        """
-        self._logger.debug(f"Making room for {bytes_needed/MB:.2f}MB of RAM.")
-        self._log_cache_state(title="Before dropping models:")
-
-        ram_bytes_available = self._get_ram_available()
-        ram_bytes_to_free = max(0, bytes_needed - ram_bytes_available)
-
-        ram_bytes_freed = 0
-        pos = 0
-        models_cleared = 0
-        while ram_bytes_freed < ram_bytes_to_free and pos < len(self._cache_stack):
-            model_key = self._cache_stack[pos]
-            cache_entry = self._cached_models[model_key]
-
-            if not cache_entry.is_locked:
-                ram_bytes_freed += cache_entry.cached_model.total_bytes()
-                self._logger.debug(
-                    f"Dropping {model_key} from RAM cache to free {(cache_entry.cached_model.total_bytes()/MB):.2f}MB."
-                )
-                self._delete_cache_entry(cache_entry)
-                del cache_entry
-                models_cleared += 1
-            else:
-                pos += 1
-
-        if models_cleared > 0:
-            # There would likely be some 'garbage' to be collected regardless of whether a model was cleared or not, but
-            # there is a significant time cost to calling `gc.collect()`, so we want to use it sparingly. (The time cost
-            # is high even if no garbage gets collected.)
-            #
-            # Calling gc.collect(...) when a model is cleared seems like a good middle-ground:
-            # - If models had to be cleared, it's a signal that we are close to our memory limit.
-            # - If models were cleared, there's a good chance that there's a significant amount of garbage to be
-            #   collected.
-            #
-            # Keep in mind that gc is only responsible for handling reference cycles. Most objects should be cleaned up
-            # immediately when their reference count hits 0.
-            if self.stats:
-                self.stats.cleared = models_cleared
-            gc.collect()
-
-        TorchDevice.empty_cache()
-        self._logger.debug(f"Dropped {models_cleared} models to free {ram_bytes_freed/MB:.2f}MB of RAM.")
-        self._log_cache_state(title="After dropping models:")
-
-    def _delete_cache_entry(self, cache_entry: CacheRecord) -> None:
-        self._cache_stack.remove(cache_entry.key)
-        del self._cached_models[cache_entry.key]
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
@@ -0,0 +1,221 @@
+# Copyright (c) 2024 Lincoln D. Stein and the InvokeAI Development team
+# TODO: Add Stalker's proper name to copyright
+"""
+Manage a RAM cache of diffusion/transformer models for fast switching.
+They are moved between GPU VRAM and CPU RAM as necessary. If the cache
+grows larger than a preset maximum, then the least recently used
+model will be cleared and (re)loaded from disk when next needed.
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from logging import Logger
+from typing import Dict, Generic, Optional, TypeVar
+
+import torch
+
+from invokeai.backend.model_manager.config import AnyModel, SubModelType
+
+
+class ModelLockerBase(ABC):
+    """Base class for the model locker used by the loader."""
+
+    @abstractmethod
+    def lock(self) -> AnyModel:
+        """Lock the contained model and move it into VRAM."""
+        pass
+
+    @abstractmethod
+    def unlock(self) -> None:
+        """Unlock the contained model, and remove it from VRAM."""
+        pass
+
+    @abstractmethod
+    def get_state_dict(self) -> Optional[Dict[str, torch.Tensor]]:
+        """Return the state dict (if any) for the cached model."""
+        pass
+
+    @property
+    @abstractmethod
+    def model(self) -> AnyModel:
+        """Return the model."""
+        pass
+
+
+T = TypeVar("T")
+
+
+@dataclass
+class CacheRecord(Generic[T]):
+    """
+    Elements of the cache:
+
+    key: Unique key for each model, same as used in the models database.
+    model: Model in memory.
+    state_dict: A read-only copy of the model's state dict in RAM. It will be
+                used as a template for creating a copy in the VRAM.
+    size: Size of the model
+    loaded: True if the model's state dict is currently in VRAM
+
+    Before a model is executed, the state_dict template is copied into VRAM,
+    and then injected into the model. When the model is finished, the VRAM
+    copy of the state dict is deleted, and the RAM version is reinjected
+    into the model.
+
+    The state_dict should be treated as a read-only attribute. Do not attempt
+    to patch or otherwise modify it. Instead, patch the copy of the state_dict
+    after it is loaded into the execution device (e.g. CUDA) using the `LoadedModel`
+    context manager call `model_on_device()`.
+    """
+
+    key: str
+    model: T
+    device: torch.device
+    state_dict: Optional[Dict[str, torch.Tensor]]
+    size: int
+    loaded: bool = False
+    _locks: int = 0
+
+    def lock(self) -> None:
+        """Lock this record."""
+        self._locks += 1
+
+    def unlock(self) -> None:
+        """Unlock this record."""
+        self._locks -= 1
+        assert self._locks >= 0
+
+    @property
+    def locked(self) -> bool:
+        """Return true if record is locked."""
+        return self._locks > 0
+
+
+@dataclass
+class CacheStats(object):
+    """Collect statistics on cache performance."""
+
+    hits: int = 0  # cache hits
+    misses: int = 0  # cache misses
+    high_watermark: int = 0  # amount of cache used
+    in_cache: int = 0  # number of models in cache
+    cleared: int = 0  # number of models cleared to make space
+    cache_size: int = 0  # total size of cache
+    loaded_model_sizes: Dict[str, int] = field(default_factory=dict)
+
+
+class ModelCacheBase(ABC, Generic[T]):
+    """Virtual base class for RAM model cache."""
+
+    @property
+    @abstractmethod
+    def storage_device(self) -> torch.device:
+        """Return the storage device (e.g. "CPU" for RAM)."""
+        pass
+
+    @property
+    @abstractmethod
+    def execution_device(self) -> torch.device:
+        """Return the exection device (e.g. "cuda" for VRAM)."""
+        pass
+
+    @property
+    @abstractmethod
+    def lazy_offloading(self) -> bool:
+        """Return true if the cache is configured to lazily offload models in VRAM."""
+        pass
+
+    @property
+    @abstractmethod
+    def max_cache_size(self) -> float:
+        """Return the maximum size the RAM cache can grow to."""
+        pass
+
+    @max_cache_size.setter
+    @abstractmethod
+    def max_cache_size(self, value: float) -> None:
+        """Set the cap on vram cache size."""
+
+    @property
+    @abstractmethod
+    def max_vram_cache_size(self) -> float:
+        """Return the maximum size the VRAM cache can grow to."""
+        pass
+
+    @max_vram_cache_size.setter
+    @abstractmethod
+    def max_vram_cache_size(self, value: float) -> float:
+        """Set the maximum size the VRAM cache can grow to."""
+        pass
+
+    @abstractmethod
+    def offload_unlocked_models(self, size_required: int) -> None:
+        """Offload from VRAM any models not actively in use."""
+        pass
+
+    @abstractmethod
+    def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> None:
+        """Move model into the indicated device."""
+        pass
+
+    @property
+    @abstractmethod
+    def stats(self) -> Optional[CacheStats]:
+        """Return collected CacheStats object."""
+        pass
+
+    @stats.setter
+    @abstractmethod
+    def stats(self, stats: CacheStats) -> None:
+        """Set the CacheStats object for collectin cache statistics."""
+        pass
+
+    @property
+    @abstractmethod
+    def logger(self) -> Logger:
+        """Return the logger used by the cache."""
+        pass
+
+    @abstractmethod
+    def make_room(self, size: int) -> None:
+        """Make enough room in the cache to accommodate a new model of indicated size."""
+        pass
+
+    @abstractmethod
+    def put(
+        self,
+        key: str,
+        model: T,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> None:
+        """Store model under key and optional submodel_type."""
+        pass
+
+    @abstractmethod
+    def get(
+        self,
+        key: str,
+        submodel_type: Optional[SubModelType] = None,
+        stats_name: Optional[str] = None,
+    ) -> ModelLockerBase:
+        """
+        Retrieve model using key and optional submodel_type.
+
+        :param key: Opaque model key
+        :param submodel_type: Type of the submodel to fetch
+        :param stats_name: A human-readable id for the model for the purposes of
+        stats reporting.
+
+        This may raise an IndexError if the model is not in the cache.
+        """
+        pass
+
+    @abstractmethod
+    def cache_size(self) -> int:
+        """Get the total size of the models currently cached."""
+        pass
+
+    @abstractmethod
+    def print_cuda_stats(self) -> None:
+        """Log debugging information on CUDA usage."""
+        pass
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@@ -0,0 +1,426 @@
+# Copyright (c) 2024 Lincoln D. Stein and the InvokeAI Development team
+# TODO: Add Stalker's proper name to copyright
+""" """
+
+import gc
+import math
+import time
+from contextlib import suppress
+from logging import Logger
+from typing import Dict, List, Optional
+
+import torch
+
+from invokeai.backend.model_manager import AnyModel, SubModelType
+from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot, get_pretty_snapshot_diff
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import (
+    CacheRecord,
+    CacheStats,
+    ModelCacheBase,
+    ModelLockerBase,
+)
+from invokeai.backend.model_manager.load.model_cache.model_locker import ModelLocker
+from invokeai.backend.model_manager.load.model_util import calc_model_size_by_data
+from invokeai.backend.util.devices import TorchDevice
+from invokeai.backend.util.logging import InvokeAILogger
+
+# Size of a GB in bytes.
+GB = 2**30
+
+# Size of a MB in bytes.
+MB = 2**20
+
+
+class ModelCache(ModelCacheBase[AnyModel]):
+    """A cache for managing models in memory.
+
+    The cache is based on two levels of model storage:
+    - execution_device: The device where most models are executed (typically "cuda", "mps", or "cpu").
+    - storage_device: The device where models are offloaded when not in active use (typically "cpu").
+
+    The model cache is based on the following assumptions:
+    - storage_device_mem_size > execution_device_mem_size
+    - disk_to_storage_device_transfer_time >> storage_device_to_execution_device_transfer_time
+
+    A copy of all models in the cache is always kept on the storage_device. A subset of the models also have a copy on
+    the execution_device.
+
+    Models are moved between the storage_device and the execution_device as necessary. Cache size limits are enforced
+    on both the storage_device and the execution_device. The execution_device cache uses a smallest-first offload
+    policy. The storage_device cache uses a least-recently-used (LRU) offload policy.
+
+    Note: Neither of these offload policies has really been compared against alternatives. It's likely that different
+    policies would be better, although the optimal policies are likely heavily dependent on usage patterns and HW
+    configuration.
+
+    The cache returns context manager generators designed to load the model into the execution device (often GPU) within
+    the context, and unload outside the context.
+
+    Example usage:
+    ```
+    cache = ModelCache(max_cache_size=7.5, max_vram_cache_size=6.0)
+    with cache.get_model('runwayml/stable-diffusion-1-5') as SD1:
+        do_something_on_gpu(SD1)
+    ```
+    """
+
+    def __init__(
+        self,
+        max_cache_size: float,
+        max_vram_cache_size: float,
+        execution_device: torch.device = torch.device("cuda"),
+        storage_device: torch.device = torch.device("cpu"),
+        precision: torch.dtype = torch.float16,
+        lazy_offloading: bool = True,
+        log_memory_usage: bool = False,
+        logger: Optional[Logger] = None,
+    ):
+        """
+        Initialize the model RAM cache.
+
+        :param max_cache_size: Maximum size of the storage_device cache in GBs.
+        :param max_vram_cache_size: Maximum size of the execution_device cache in GBs.
+        :param execution_device: Torch device to load active model into [torch.device('cuda')]
+        :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
+        :param precision: Precision for loaded models [torch.float16]
+        :param lazy_offloading: Keep model in VRAM until another model needs to be loaded
+        :param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache
+            operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
+            snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
+            behaviour.
+        :param logger: InvokeAILogger to use (otherwise creates one)
+        """
+        # allow lazy offloading only when vram cache enabled
+        self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0
+        self._max_cache_size: float = max_cache_size
+        self._max_vram_cache_size: float = max_vram_cache_size
+        self._execution_device: torch.device = execution_device
+        self._storage_device: torch.device = storage_device
+        self._logger = logger or InvokeAILogger.get_logger(self.__class__.__name__)
+        self._log_memory_usage = log_memory_usage
+        self._stats: Optional[CacheStats] = None
+
+        self._cached_models: Dict[str, CacheRecord[AnyModel]] = {}
+        self._cache_stack: List[str] = []
+
+    @property
+    def logger(self) -> Logger:
+        """Return the logger used by the cache."""
+        return self._logger
+
+    @property
+    def lazy_offloading(self) -> bool:
+        """Return true if the cache is configured to lazily offload models in VRAM."""
+        return self._lazy_offloading
+
+    @property
+    def storage_device(self) -> torch.device:
+        """Return the storage device (e.g. "CPU" for RAM)."""
+        return self._storage_device
+
+    @property
+    def execution_device(self) -> torch.device:
+        """Return the exection device (e.g. "cuda" for VRAM)."""
+        return self._execution_device
+
+    @property
+    def max_cache_size(self) -> float:
+        """Return the cap on cache size."""
+        return self._max_cache_size
+
+    @max_cache_size.setter
+    def max_cache_size(self, value: float) -> None:
+        """Set the cap on cache size."""
+        self._max_cache_size = value
+
+    @property
+    def max_vram_cache_size(self) -> float:
+        """Return the cap on vram cache size."""
+        return self._max_vram_cache_size
+
+    @max_vram_cache_size.setter
+    def max_vram_cache_size(self, value: float) -> None:
+        """Set the cap on vram cache size."""
+        self._max_vram_cache_size = value
+
+    @property
+    def stats(self) -> Optional[CacheStats]:
+        """Return collected CacheStats object."""
+        return self._stats
+
+    @stats.setter
+    def stats(self, stats: CacheStats) -> None:
+        """Set the CacheStats object for collectin cache statistics."""
+        self._stats = stats
+
+    def cache_size(self) -> int:
+        """Get the total size of the models currently cached."""
+        total = 0
+        for cache_record in self._cached_models.values():
+            total += cache_record.size
+        return total
+
+    def put(
+        self,
+        key: str,
+        model: AnyModel,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> None:
+        """Store model under key and optional submodel_type."""
+        key = self._make_cache_key(key, submodel_type)
+        if key in self._cached_models:
+            return
+        size = calc_model_size_by_data(self.logger, model)
+        self.make_room(size)
+
+        running_on_cpu = self.execution_device == torch.device("cpu")
+        state_dict = model.state_dict() if isinstance(model, torch.nn.Module) and not running_on_cpu else None
+        cache_record = CacheRecord(key=key, model=model, device=self.storage_device, state_dict=state_dict, size=size)
+        self._cached_models[key] = cache_record
+        self._cache_stack.append(key)
+
+    def get(
+        self,
+        key: str,
+        submodel_type: Optional[SubModelType] = None,
+        stats_name: Optional[str] = None,
+    ) -> ModelLockerBase:
+        """
+        Retrieve model using key and optional submodel_type.
+
+        :param key: Opaque model key
+        :param submodel_type: Type of the submodel to fetch
+        :param stats_name: A human-readable id for the model for the purposes of
+        stats reporting.
+
+        This may raise an IndexError if the model is not in the cache.
+        """
+        key = self._make_cache_key(key, submodel_type)
+        if key in self._cached_models:
+            if self.stats:
+                self.stats.hits += 1
+        else:
+            if self.stats:
+                self.stats.misses += 1
+            raise IndexError(f"The model with key {key} is not in the cache.")
+
+        cache_entry = self._cached_models[key]
+
+        # more stats
+        if self.stats:
+            stats_name = stats_name or key
+            self.stats.cache_size = int(self._max_cache_size * GB)
+            self.stats.high_watermark = max(self.stats.high_watermark, self.cache_size())
+            self.stats.in_cache = len(self._cached_models)
+            self.stats.loaded_model_sizes[stats_name] = max(
+                self.stats.loaded_model_sizes.get(stats_name, 0), cache_entry.size
+            )
+
+        # this moves the entry to the top (right end) of the stack
+        with suppress(Exception):
+            self._cache_stack.remove(key)
+        self._cache_stack.append(key)
+        return ModelLocker(
+            cache=self,
+            cache_entry=cache_entry,
+        )
+
+    def _capture_memory_snapshot(self) -> Optional[MemorySnapshot]:
+        if self._log_memory_usage:
+            return MemorySnapshot.capture()
+        return None
+
+    def _make_cache_key(self, model_key: str, submodel_type: Optional[SubModelType] = None) -> str:
+        if submodel_type:
+            return f"{model_key}:{submodel_type.value}"
+        else:
+            return model_key
+
+    def offload_unlocked_models(self, size_required: int) -> None:
+        """Offload models from the execution_device to make room for size_required.
+
+        :param size_required: The amount of space to clear in the execution_device cache, in bytes.
+        """
+        reserved = self._max_vram_cache_size * GB
+        vram_in_use = torch.cuda.memory_allocated() + size_required
+        self.logger.debug(f"{(vram_in_use/GB):.2f}GB VRAM needed for models; max allowed={(reserved/GB):.2f}GB")
+        for _, cache_entry in sorted(self._cached_models.items(), key=lambda x: x[1].size):
+            if vram_in_use <= reserved:
+                break
+            if not cache_entry.loaded:
+                continue
+            if not cache_entry.locked:
+                self.move_model_to_device(cache_entry, self.storage_device)
+                cache_entry.loaded = False
+                vram_in_use = torch.cuda.memory_allocated() + size_required
+                self.logger.debug(
+                    f"Removing {cache_entry.key} from VRAM to free {(cache_entry.size/GB):.2f}GB; vram free = {(torch.cuda.memory_allocated()/GB):.2f}GB"
+                )
+
+        TorchDevice.empty_cache()
+
+    def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> None:
+        """Move model into the indicated device.
+
+        :param cache_entry: The CacheRecord for the model
+        :param target_device: The torch.device to move the model into
+
+        May raise a torch.cuda.OutOfMemoryError
+        """
+        self.logger.debug(f"Called to move {cache_entry.key} to {target_device}")
+        source_device = cache_entry.device
+
+        # Note: We compare device types only so that 'cuda' == 'cuda:0'.
+        # This would need to be revised to support multi-GPU.
+        if torch.device(source_device).type == torch.device(target_device).type:
+            return
+
+        # Some models don't have a `to` method, in which case they run in RAM/CPU.
+        if not hasattr(cache_entry.model, "to"):
+            return
+
+        # This roundabout method for moving the model around is done to avoid
+        # the cost of moving the model from RAM to VRAM and then back from VRAM to RAM.
+        # When moving to VRAM, we copy (not move) each element of the state dict from
+        # RAM to a new state dict in VRAM, and then inject it into the model.
+        # This operation is slightly faster than running `to()` on the whole model.
+        #
+        # When the model needs to be removed from VRAM we simply delete the copy
+        # of the state dict in VRAM, and reinject the state dict that is cached
+        # in RAM into the model. So this operation is very fast.
+        start_model_to_time = time.time()
+        snapshot_before = self._capture_memory_snapshot()
+
+        try:
+            if cache_entry.state_dict is not None:
+                assert hasattr(cache_entry.model, "load_state_dict")
+                if target_device == self.storage_device:
+                    cache_entry.model.load_state_dict(cache_entry.state_dict, assign=True)
+                else:
+                    new_dict: Dict[str, torch.Tensor] = {}
+                    for k, v in cache_entry.state_dict.items():
+                        new_dict[k] = v.to(target_device, copy=True)
+                    cache_entry.model.load_state_dict(new_dict, assign=True)
+            cache_entry.model.to(target_device)
+            cache_entry.device = target_device
+        except Exception as e:  # blow away cache entry
+            self._delete_cache_entry(cache_entry)
+            raise e
+
+        snapshot_after = self._capture_memory_snapshot()
+        end_model_to_time = time.time()
+        self.logger.debug(
+            f"Moved model '{cache_entry.key}' from {source_device} to"
+            f" {target_device} in {(end_model_to_time-start_model_to_time):.2f}s."
+            f"Estimated model size: {(cache_entry.size/GB):.3f} GB."
+            f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
+        )
+
+        if (
+            snapshot_before is not None
+            and snapshot_after is not None
+            and snapshot_before.vram is not None
+            and snapshot_after.vram is not None
+        ):
+            vram_change = abs(snapshot_before.vram - snapshot_after.vram)
+
+            # If the estimated model size does not match the change in VRAM, log a warning.
+            if not math.isclose(
+                vram_change,
+                cache_entry.size,
+                rel_tol=0.1,
+                abs_tol=10 * MB,
+            ):
+                self.logger.debug(
+                    f"Moving model '{cache_entry.key}' from {source_device} to"
+                    f" {target_device} caused an unexpected change in VRAM usage. The model's"
+                    " estimated size may be incorrect. Estimated model size:"
+                    f" {(cache_entry.size/GB):.3f} GB.\n"
+                    f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
+                )
+
+    def print_cuda_stats(self) -> None:
+        """Log CUDA diagnostics."""
+        vram = "%4.2fG" % (torch.cuda.memory_allocated() / GB)
+        ram = "%4.2fG" % (self.cache_size() / GB)
+
+        in_ram_models = 0
+        in_vram_models = 0
+        locked_in_vram_models = 0
+        for cache_record in self._cached_models.values():
+            if hasattr(cache_record.model, "device"):
+                if cache_record.model.device == self.storage_device:
+                    in_ram_models += 1
+                else:
+                    in_vram_models += 1
+                if cache_record.locked:
+                    locked_in_vram_models += 1
+
+                self.logger.debug(
+                    f"Current VRAM/RAM usage: {vram}/{ram}; models_in_ram/models_in_vram(locked) ="
+                    f" {in_ram_models}/{in_vram_models}({locked_in_vram_models})"
+                )
+
+    def make_room(self, size: int) -> None:
+        """Make enough room in the cache to accommodate a new model of indicated size.
+
+        Note: This function deletes all of the cache's internal references to a model in order to free it. If there are
+        external references to the model, there's nothing that the cache can do about it, and those models will not be
+        garbage-collected.
+        """
+        bytes_needed = size
+        maximum_size = self.max_cache_size * GB  # stored in GB, convert to bytes
+        current_size = self.cache_size()
+
+        if current_size + bytes_needed > maximum_size:
+            self.logger.debug(
+                f"Max cache size exceeded: {(current_size/GB):.2f}/{self.max_cache_size:.2f} GB, need an additional"
+                f" {(bytes_needed/GB):.2f} GB"
+            )
+
+        self.logger.debug(f"Before making_room: cached_models={len(self._cached_models)}")
+
+        pos = 0
+        models_cleared = 0
+        while current_size + bytes_needed > maximum_size and pos < len(self._cache_stack):
+            model_key = self._cache_stack[pos]
+            cache_entry = self._cached_models[model_key]
+            device = cache_entry.model.device if hasattr(cache_entry.model, "device") else None
+            self.logger.debug(
+                f"Model: {model_key}, locks: {cache_entry._locks}, device: {device}, loaded: {cache_entry.loaded}"
+            )
+
+            if not cache_entry.locked:
+                self.logger.debug(
+                    f"Removing {model_key} from RAM cache to free at least {(size/GB):.2f} GB (-{(cache_entry.size/GB):.2f} GB)"
+                )
+                current_size -= cache_entry.size
+                models_cleared += 1
+                self._delete_cache_entry(cache_entry)
+                del cache_entry
+
+            else:
+                pos += 1
+
+        if models_cleared > 0:
+            # There would likely be some 'garbage' to be collected regardless of whether a model was cleared or not, but
+            # there is a significant time cost to calling `gc.collect()`, so we want to use it sparingly. (The time cost
+            # is high even if no garbage gets collected.)
+            #
+            # Calling gc.collect(...) when a model is cleared seems like a good middle-ground:
+            # - If models had to be cleared, it's a signal that we are close to our memory limit.
+            # - If models were cleared, there's a good chance that there's a significant amount of garbage to be
+            #   collected.
+            #
+            # Keep in mind that gc is only responsible for handling reference cycles. Most objects should be cleaned up
+            # immediately when their reference count hits 0.
+            if self.stats:
+                self.stats.cleared = models_cleared
+            gc.collect()
+
+        TorchDevice.empty_cache()
+        self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
+
+    def _delete_cache_entry(self, cache_entry: CacheRecord[AnyModel]) -> None:
+        self._cache_stack.remove(cache_entry.key)
+        del self._cached_models[cache_entry.key]
--- a/invokeai/backend/model_manager/load/model_cache/model_locker.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_locker.py
@@ -0,0 +1,64 @@
+"""
+Base class and implementation of a class that moves models in and out of VRAM.
+"""
+
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.model_manager import AnyModel
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import (
+    CacheRecord,
+    ModelCacheBase,
+    ModelLockerBase,
+)
+
+
+class ModelLocker(ModelLockerBase):
+    """Internal class that mediates movement in and out of GPU."""
+
+    def __init__(self, cache: ModelCacheBase[AnyModel], cache_entry: CacheRecord[AnyModel]):
+        """
+        Initialize the model locker.
+
+        :param cache: The ModelCache object
+        :param cache_entry: The entry in the model cache
+        """
+        self._cache = cache
+        self._cache_entry = cache_entry
+
+    @property
+    def model(self) -> AnyModel:
+        """Return the model without moving it around."""
+        return self._cache_entry.model
+
+    def get_state_dict(self) -> Optional[Dict[str, torch.Tensor]]:
+        """Return the state dict (if any) for the cached model."""
+        return self._cache_entry.state_dict
+
+    def lock(self) -> AnyModel:
+        """Move the model into the execution device (GPU) and lock it."""
+        self._cache_entry.lock()
+        try:
+            if self._cache.lazy_offloading:
+                self._cache.offload_unlocked_models(self._cache_entry.size)
+            self._cache.move_model_to_device(self._cache_entry, self._cache.execution_device)
+            self._cache_entry.loaded = True
+            self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}")
+            self._cache.print_cuda_stats()
+        except torch.cuda.OutOfMemoryError:
+            self._cache.logger.warning("Insufficient GPU memory to load model. Aborting")
+            self._cache_entry.unlock()
+            raise
+        except Exception:
+            self._cache_entry.unlock()
+            raise
+
+        return self.model
+
+    def unlock(self) -> None:
+        """Call upon exit from context."""
+        self._cache_entry.unlock()
+        if not self._cache.lazy_offloading:
+            self._cache.offload_unlocked_models(0)
+            self._cache.print_cuda_stats()
--- a/invokeai/backend/model_manager/load/model_cache/torch_function_autocast_context.py
+++ b/invokeai/backend/model_manager/load/model_cache/torch_function_autocast_context.py
@@ -1,33 +0,0 @@
-from typing import Any, Callable
-
-import torch
-from torch.overrides import TorchFunctionMode
-
-
-def add_autocast_to_module_forward(m: torch.nn.Module, to_device: torch.device):
-    """Monkey-patch m.forward(...) with a new forward(...) method that activates device autocasting for its duration."""
-    old_forward = m.forward
-
-    def new_forward(*args: Any, **kwargs: Any):
-        with TorchFunctionAutocastDeviceContext(to_device):
-            return old_forward(*args, **kwargs)
-
-    m.forward = new_forward
-
-
-def _cast_to_device_and_run(
-    func: Callable[..., Any], args: tuple[Any, ...], kwargs: dict[str, Any], to_device: torch.device
-):
-    args_on_device = [a.to(to_device) if isinstance(a, torch.Tensor) else a for a in args]
-    kwargs_on_device = {k: v.to(to_device) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
-    return func(*args_on_device, **kwargs_on_device)
-
-
-class TorchFunctionAutocastDeviceContext(TorchFunctionMode):
-    def __init__(self, to_device: torch.device):
-        self._to_device = to_device
-
-    def __torch_function__(
-        self, func: Callable[..., Any], types, args: tuple[Any, ...] = (), kwargs: dict[str, Any] | None = None
-    ):
-        return _cast_to_device_and_run(func, args, kwargs or {}, self._to_device)
--- a/invokeai/backend/model_manager/load/model_loaders/lora.py
+++ b/invokeai/backend/model_manager/load/model_loaders/lora.py
@@ -13,8 +13,9 @@ from invokeai.backend.lora.conversions.flux_diffusers_lora_conversion_utils impo
    lora_model_from_flux_diffusers_state_dict,
 )
 from invokeai.backend.lora.conversions.flux_kohya_lora_conversion_utils import (
-    lora_model_from_flux_kohya_state_dict,
+    is_state_dict_likely_in_flux_kohya_format, lora_model_from_flux_kohya_state_dict,
 )
+from invokeai.backend.lora.conversions.flux_control_lora_utils import is_state_dict_likely_flux_control, lora_model_from_flux_control_state_dict
 from invokeai.backend.lora.conversions.sd_lora_conversion_utils import lora_model_from_sd_state_dict
 from invokeai.backend.lora.conversions.sdxl_lora_conversion_utils import convert_sdxl_keys_to_diffusers_format
 from invokeai.backend.model_manager import (
@@ -26,12 +27,13 @@ from invokeai.backend.model_manager import (
    SubModelType,
 )
 from invokeai.backend.model_manager.load.load_default import ModelLoader
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
 from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry


@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.LoRA, format=ModelFormat.Diffusers)
@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.LoRA, format=ModelFormat.LyCORIS)
+@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.StructuralLoRa, format=ModelFormat.LyCORIS)
 class LoRALoader(ModelLoader):
    """Class to load LoRA models."""

@@ -40,7 +42,7 @@ class LoRALoader(ModelLoader):
        self,
        app_config: InvokeAIAppConfig,
        logger: Logger,
-        ram_cache: ModelCache,
+        ram_cache: ModelCacheBase[AnyModel],
    ):
        """Initialize the loader."""
        super().__init__(app_config, logger, ram_cache)
@@ -75,7 +77,10 @@ class LoRALoader(ModelLoader):
                # https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth_lora_flux.py#L1194
                model = lora_model_from_flux_diffusers_state_dict(state_dict=state_dict, alpha=None)
            elif config.format == ModelFormat.LyCORIS:
-                model = lora_model_from_flux_kohya_state_dict(state_dict=state_dict)
+                if is_state_dict_likely_in_flux_kohya_format(state_dict=state_dict):
+                    model = lora_model_from_flux_kohya_state_dict(state_dict=state_dict)
+                elif is_state_dict_likely_flux_control(state_dict=state_dict):
+                    model = lora_model_from_flux_control_state_dict(state_dict=state_dict)
            else:
                raise ValueError(f"LoRA model is in unsupported FLUX format: {config.format}")
        elif self._model_base in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]:
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@@ -25,7 +25,6 @@ from invokeai.backend.model_manager.config import (
    DiffusersConfigBase,
    MainCheckpointConfig,
 )
-from invokeai.backend.model_manager.load.model_cache.model_cache import get_model_cache_key
 from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
 from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader
 from invokeai.backend.util.silence_warnings import SilenceWarnings
@@ -133,5 +132,5 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
            if subtype == submodel_type:
                continue
            if submodel := getattr(pipeline, subtype.value, None):
-                self._ram_cache.put(get_model_cache_key(config.key, subtype), model=submodel)
+                self._ram_cache.put(config.key, submodel_type=subtype, model=submodel)
        return getattr(pipeline, submodel_type.value)
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -18,6 +18,7 @@ from invokeai.backend.flux.ip_adapter.state_dict_utils import is_state_dict_xlab
 from invokeai.backend.lora.conversions.flux_diffusers_lora_conversion_utils import (
    is_state_dict_likely_in_flux_diffusers_format,
 )
+from invokeai.backend.lora.conversions.flux_control_lora_utils import is_state_dict_likely_flux_control
 from invokeai.backend.lora.conversions.flux_kohya_lora_conversion_utils import is_state_dict_likely_in_flux_kohya_format
 from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
 from invokeai.backend.model_manager.config import (
@@ -258,6 +259,18 @@ class ModelProbe(object):
        ckpt = checkpoint if checkpoint else read_checkpoint_meta(model_path, scan=True)
        ckpt = ckpt.get("state_dict", ckpt)

+        if isinstance(ckpt, dict) and "img_in.lora_A.weight" in ckpt and "img_in.lora_B.weight" in ckpt:
+            tensor_a, tensor_b = ckpt["img_in.lora_A.weight"], ckpt["img_in.lora_B.weight"]
+            if (
+                tensor_a is not None
+                and isinstance(tensor_a, torch.Tensor)
+                and tensor_a.shape[1] == 128
+                and tensor_b is not None
+                and isinstance(tensor_b, torch.Tensor)
+                and tensor_b.shape[0] == 3072
+            ):
+                return ModelType.StructuralLoRa
+
        for key in [str(k) for k in ckpt.keys()]:
            if key.startswith(
                (
@@ -624,8 +637,10 @@ class LoRACheckpointProbe(CheckpointProbeBase):
        return ModelFormat.LyCORIS

    def get_base_type(self) -> BaseModelType:
-        if is_state_dict_likely_in_flux_kohya_format(self.checkpoint) or is_state_dict_likely_in_flux_diffusers_format(
-            self.checkpoint
+        if (
+            is_state_dict_likely_in_flux_kohya_format(self.checkpoint)
+            or is_state_dict_likely_in_flux_diffusers_format(self.checkpoint)
+            or is_state_dict_likely_flux_control(self.checkpoint)
        ):
            return BaseModelType.Flux

@@ -1046,6 +1061,7 @@ ModelProbe.register_probe("diffusers", ModelType.SpandrelImageToImage, SpandrelI
 ModelProbe.register_probe("checkpoint", ModelType.Main, PipelineCheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.VAE, VaeCheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.LoRA, LoRACheckpointProbe)
+ModelProbe.register_probe("checkpoint", ModelType.StructuralLoRa, LoRACheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.TextualInversion, TextualInversionCheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.ControlNet, ControlNetCheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.IPAdapter, IPAdapterCheckpointProbe)
--- a/invokeai/backend/model_manager/util/model_util.py
+++ b/invokeai/backend/model_manager/util/model_util.py
@@ -52,16 +52,15 @@ def read_checkpoint_meta(path: Union[str, Path], scan: bool = True) -> Dict[str,
        except Exception:
            # TODO: create issue for support "meta"?
            checkpoint = safetensors.torch.load_file(path, device="cpu")
+    elif str(path).endswith(".gguf"):
+        # The GGUF reader used here uses numpy memmap, so these tensors are not loaded into memory during this function
+        checkpoint = gguf_sd_loader(Path(path), compute_dtype=torch.float32)
    else:
        if scan:
            scan_result = scan_file_path(path)
            if scan_result.infected_files != 0 or scan_result.scan_err:
                raise Exception(f'The model file "{path}" is potentially infected by malware. Aborting import.')
-        if str(path).endswith(".gguf"):
-            # The GGUF reader used here uses numpy memmap, so these tensors are not loaded into memory during this function
-            checkpoint = gguf_sd_loader(Path(path), compute_dtype=torch.float32)
-        else:
-            checkpoint = torch.load(path, map_location=torch.device("meta"))
+        checkpoint = torch.load(path, map_location=torch.device("meta"))
    return checkpoint


--- a/invokeai/backend/util/prefix_logger_adapter.py
+++ b/invokeai/backend/util/prefix_logger_adapter.py
@@ -1,12 +0,0 @@
-import logging
-from typing import Any, MutableMapping
-
-
-# Issue with type hints related to LoggerAdapter: https://github.com/python/typeshed/issues/7855
-class PrefixedLoggerAdapter(logging.LoggerAdapter):  # type: ignore
-    def __init__(self, logger: logging.Logger, prefix: str):
-        super().__init__(logger, {})
-        self.prefix = prefix
-
-    def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> tuple[str, MutableMapping[str, Any]]:
-        return f"[{self.prefix}] {msg}", kwargs
--- a/invokeai/frontend/web/README.md
+++ b/invokeai/frontend/web/README.md
@@ -1,3 +1,3 @@
 # Invoke UI

-<https://invoke-ai.github.io/InvokeAI/contributing/frontend/OVERVIEW/>
+<https://invoke-ai.github.io/InvokeAI/contributing/frontend/>
--- a/invokeai/frontend/web/public/locales/de.json
+++ b/invokeai/frontend/web/public/locales/de.json
@@ -642,12 +642,6 @@
        "remixImage": "Remix des Bilds erstellen",
        "imageActions": "Weitere Bildaktionen",
        "invoke": {
-            "layer": {
-                "t2iAdapterIncompatibleBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, Bbox-Breite ist {{width}}",
-                "t2iAdapterIncompatibleScaledBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, Skalierte Bbox-Breite ist {{width}}",
-                "t2iAdapterIncompatibleScaledBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, Skalierte Bbox-Höhe ist {{height}}",
-                "t2iAdapterIncompatibleBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, Bbox-Höhe ist {{height}}"
-            },
            "fluxModelIncompatibleScaledBboxWidth": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), Skalierte Bbox-Breite ist {{width}}",
            "fluxModelIncompatibleScaledBboxHeight": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), Skalierte Bbox-Höhe ist {{height}}",
            "fluxModelIncompatibleBboxWidth": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), Bbox-Breite ist {{width}}",
--- a/invokeai/frontend/web/public/locales/en.json
+++ b/invokeai/frontend/web/public/locales/en.json
@@ -809,6 +809,7 @@
        "starterBundleHelpText": "Easily install all models needed to get started with a base model, including a main model, controlnets, IP adapters, and more. Selecting a bundle will skip any models that you already have installed.",
        "starterModels": "Starter Models",
        "starterModelsInModelManager": "Starter Models can be found in Model Manager",
+        "structuralLora": "Structural LoRA",
        "syncModels": "Sync Models",
        "textualInversions": "Textual Inversions",
        "triggerPhrases": "Trigger Phrases",
@@ -2133,8 +2134,8 @@
    "whatsNew": {
        "whatsNewInInvoke": "What's New in Invoke",
        "items": [
-            "<StrongComponent>Workflows</StrongComponent>: Run a workflow for a collection of images using the new <StrongComponent>Image Batch</StrongComponent> node.",
-            "<StrongComponent>FLUX</StrongComponent>: Support for XLabs IP Adapter v2."
+            "<StrongComponent>FLUX Regional Guidance (beta)</StrongComponent>: Our beta release of FLUX Regional Guidance is live for regional prompt control.",
+            "<StrongComponent>Various UX Improvements</StrongComponent>: A number of small UX and Quality of Life improvements throughout the app."
        ],
        "readReleaseNotes": "Read Release Notes",
        "watchRecentReleaseVideos": "Watch Recent Release Videos",
--- a/invokeai/frontend/web/public/locales/fr.json
+++ b/invokeai/frontend/web/public/locales/fr.json
@@ -317,18 +317,6 @@
        "info": "Info",
        "showOptionsPanel": "Afficher le panneau latéral (O ou T)",
        "invoke": {
-            "layer": {
-                "rgNoPromptsOrIPAdapters": "aucun prompts ou IP Adapters",
-                "t2iAdapterIncompatibleScaledBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, la largeur de la bounding box mise à l'échelle est {{width}}",
-                "t2iAdapterIncompatibleScaledBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, la hauteur de la bounding box mise à l'échelle est {{height}}",
-                "ipAdapterNoModelSelected": "aucun IP adapter sélectionné",
-                "ipAdapterNoImageSelected": "aucune image d'IP adapter sélectionnée",
-                "controlAdapterIncompatibleBaseModel": "modèle de base de Control Adapter incompatible",
-                "t2iAdapterIncompatibleBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, la hauteur de la bounding box est {{height}}",
-                "t2iAdapterIncompatibleBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, la largeur de la bounding box est {{width}}",
-                "ipAdapterIncompatibleBaseModel": "modèle de base d'IP adapter incompatible",
-                "controlAdapterNoModelSelected": "aucun modèle de Control Adapter sélectionné"
-            },
            "noPrompts": "Aucun prompts généré",
            "missingInputForField": "{{nodeLabel}} -> {{fieldLabel}} entrée manquante",
            "missingFieldTemplate": "Modèle de champ manquant",
--- a/invokeai/frontend/web/public/locales/it.json
+++ b/invokeai/frontend/web/public/locales/it.json
@@ -663,25 +663,8 @@
            "addingImagesTo": "Aggiungi immagini a",
            "systemDisconnected": "Sistema disconnesso",
            "missingNodeTemplate": "Modello di nodo mancante",
-            "missingInputForField": "{{nodeLabel}} -> {{fieldLabel}} ingresso mancante",
+            "missingInputForField": "{{nodeLabel}} -> {{fieldLabel}}: ingresso mancante",
            "missingFieldTemplate": "Modello di campo mancante",
-            "layer": {
-                "controlAdapterNoModelSelected": "Nessun modello di adattatore di controllo selezionato",
-                "controlAdapterIncompatibleBaseModel": "Il modello base dell'adattatore di controllo non è compatibile",
-                "ipAdapterNoModelSelected": "Nessun adattatore IP selezionato",
-                "ipAdapterIncompatibleBaseModel": "Il modello base dell'adattatore IP non è compatibile",
-                "ipAdapterNoImageSelected": "Nessuna immagine dell'adattatore IP selezionata",
-                "rgNoPromptsOrIPAdapters": "Nessun prompt o adattatore IP",
-                "t2iAdapterIncompatibleBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, larghezza riquadro è {{width}}",
-                "t2iAdapterIncompatibleBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, altezza riquadro è {{height}}",
-                "t2iAdapterIncompatibleScaledBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, larghezza del riquadro scalato {{width}}",
-                "t2iAdapterIncompatibleScaledBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, altezza del riquadro scalato {{height}}",
-                "rgNegativePromptNotSupported": "prompt negativo non supportato per il modello base selezionato",
-                "rgAutoNegativeNotSupported": "auto-negativo non supportato per il modello base selezionato",
-                "emptyLayer": "livello vuoto",
-                "unsupportedModel": "livello non supportato per il modello base selezionato",
-                "rgReferenceImagesNotSupported": "immagini di riferimento regionali non supportate per il modello base selezionato"
-            },
            "fluxModelIncompatibleBboxHeight": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), altezza riquadro è {{height}}",
            "fluxModelIncompatibleBboxWidth": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), larghezza riquadro è {{width}}",
            "fluxModelIncompatibleScaledBboxWidth": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), larghezza del riquadro scalato è {{width}}",
@@ -689,10 +672,10 @@
            "noT5EncoderModelSelected": "Nessun modello di encoder T5 selezionato per la generazione con FLUX",
            "noCLIPEmbedModelSelected": "Nessun modello CLIP Embed selezionato per la generazione con FLUX",
            "noFLUXVAEModelSelected": "Nessun modello VAE selezionato per la generazione con FLUX",
-            "canvasIsTransforming": "La tela sta trasformando",
-            "canvasIsRasterizing": "La tela sta rasterizzando",
-            "canvasIsCompositing": "La tela è in fase di composizione",
-            "canvasIsFiltering": "La tela sta filtrando",
+            "canvasIsTransforming": "La tela è occupata (sta trasformando)",
+            "canvasIsRasterizing": "La tela è occupata (sta rasterizzando)",
+            "canvasIsCompositing": "La tela è occupata (in composizione)",
+            "canvasIsFiltering": "La tela è occupata (sta filtrando)",
            "collectionTooManyItems": "{{nodeLabel}} -> {{fieldLabel}}: troppi elementi, massimo {{maxItems}}",
            "canvasIsSelectingObject": "La tela è occupata (selezione dell'oggetto)",
            "collectionTooFewItems": "{{nodeLabel}} -> {{fieldLabel}}: troppi pochi elementi, minimo {{minItems}}",
@@ -1207,8 +1190,8 @@
        "controlNetBeginEnd": {
            "heading": "Percentuale passi Inizio / Fine",
            "paragraphs": [
-                "La parte del processo di rimozione del rumore in cui verrà applicato l'adattatore di controllo.",
-                "In genere, gli adattatori di controllo applicati all'inizio del processo guidano la composizione, mentre quelli applicati alla fine guidano i dettagli.",
+                "Questa impostazione determina quale parte del processo di rimozione del rumore (generazione) incorpora la guida da questo livello.",
+                "• Passo iniziale (%): specifica quando iniziare ad applicare la guida da questo livello durante il processo di generazione.",
                "• Passo finale (%): specifica quando interrompere l'applicazione della guida di questo livello e ripristinare la guida generale dal modello e altre impostazioni."
            ]
        },
@@ -1492,9 +1475,9 @@
            ]
        },
        "ipAdapterMethod": {
-            "heading": "Metodo",
+            "heading": "Modalità",
            "paragraphs": [
-                "Metodo con cui applicare l'adattatore IP corrente."
+                "La modalità definisce il modo in cui l'immagine di riferimento guiderà il processo di generazione."
            ]
        },
        "scale": {
@@ -1816,7 +1799,7 @@
            "full": "Stile e Composizione",
            "style": "Solo Stile",
            "composition": "Solo Composizione",
-            "ipAdapterMethod": "Metodo Adattatore IP",
+            "ipAdapterMethod": "Modalità",
            "fullDesc": "Applica lo stile visivo (colori, texture) e la composizione (disposizione, struttura).",
            "styleDesc": "Applica lo stile visivo (colori, texture) senza considerare la disposizione.",
            "compositionDesc": "Replica disposizione e struttura ignorando lo stile di riferimento."
@@ -2071,7 +2054,24 @@
        "asControlLayer": "Come $t(controlLayers.controlLayer)",
        "asControlLayerResize": "Come $t(controlLayers.controlLayer) (Ridimensiona)",
        "newSession": "Nuova sessione",
-        "resetCanvasLayers": "Ripristina livelli Tela"
+        "resetCanvasLayers": "Ripristina livelli Tela",
+        "referenceImageRegional": "Immagine di riferimento (regionale)",
+        "referenceImageGlobal": "Immagine di riferimento (globale)",
+        "warnings": {
+            "controlAdapterNoModelSelected": "nessun modello selezionato per il livello di controllo",
+            "controlAdapterNoControl": "nessun controllo selezionato/disegnato",
+            "ipAdapterNoModelSelected": "nessun modello di immagine di riferimento selezionato",
+            "rgNoPromptsOrIPAdapters": "nessun prompt testuale o immagini di riferimento",
+            "rgReferenceImagesNotSupported": "Immagini di riferimento regionali non supportate per il modello base selezionato",
+            "rgNoRegion": "nessuna regione disegnata",
+            "problemsFound": "Problemi riscontrati",
+            "unsupportedModel": "livello non supportato per il modello base selezionato",
+            "controlAdapterIncompatibleBaseModel": "modello di base del livello di controllo incompatibile",
+            "rgNegativePromptNotSupported": "Prompt negativo non supportato per il modello base selezionato",
+            "ipAdapterIncompatibleBaseModel": "modello base dell'immagine di riferimento incompatibile",
+            "ipAdapterNoImageSelected": "nessuna immagine di riferimento selezionata",
+            "rgAutoNegativeNotSupported": "Auto-Negativo non supportato per il modello base selezionato"
+        }
    },
    "ui": {
        "tabs": {
@@ -2171,8 +2171,8 @@
        "watchRecentReleaseVideos": "Guarda i video su questa versione",
        "watchUiUpdatesOverview": "Guarda le novità dell'interfaccia",
        "items": [
-            "<StrongComponent>Flussi di lavoro</StrongComponent>: esegui un flusso di lavoro per una raccolta di immagini utilizzando il nuovo nodo <StrongComponent>Lotto di immagini</StrongComponent>.",
-            "<StrongComponent>Tela</StrongComponent>: elaborazione semplificata del livello di controllo e impostazioni di controllo predefinite migliorate."
+            "<StrongComponent>FLUX Regional Guidance (beta)</StrongComponent>: la nostra versione beta di FLUX Regional Guidance è attiva per il controllo dei prompt regionali.",
+            "<StrongComponent>Vari miglioramenti dell'esperienza utente</StrongComponent>: numerosi piccoli miglioramenti dell'esperienza utente e della qualità della vita in tutta l'app."
        ]
    },
    "system": {
--- a/invokeai/frontend/web/public/locales/nl.json
+++ b/invokeai/frontend/web/public/locales/nl.json
@@ -230,15 +230,7 @@
            "systemDisconnected": "Systeem is niet verbonden",
            "missingNodeTemplate": "Knooppuntsjabloon ontbreekt",
            "missingFieldTemplate": "Veldsjabloon ontbreekt",
-            "addingImagesTo": "Bezig met toevoegen van afbeeldingen aan",
-            "layer": {
-                "controlAdapterNoModelSelected": "geen controle-adaptermodel geselecteerd",
-                "controlAdapterIncompatibleBaseModel": "niet-compatibele basismodel voor controle-adapter",
-                "ipAdapterIncompatibleBaseModel": "niet-compatibele basismodel voor IP-adapter",
-                "ipAdapterNoImageSelected": "geen afbeelding voor IP-adapter geselecteerd",
-                "rgNoPromptsOrIPAdapters": "geen tekstprompts of IP-adapters",
-                "ipAdapterNoModelSelected": "geen IP-adapter geselecteerd"
-            }
+            "addingImagesTo": "Bezig met toevoegen van afbeeldingen aan"
        },
        "patchmatchDownScaleSize": "Verklein",
        "useCpuNoise": "Gebruik CPU-ruis",
--- a/invokeai/frontend/web/public/locales/ru.json
+++ b/invokeai/frontend/web/public/locales/ru.json
@@ -648,18 +648,6 @@
            "missingFieldTemplate": "Отсутствует шаблон поля",
            "addingImagesTo": "Добавление изображений в",
            "invoke": "Создать",
-            "layer": {
-                "ipAdapterNoModelSelected": "IP адаптер не выбран",
-                "controlAdapterNoModelSelected": "не выбрана модель адаптера контроля",
-                "controlAdapterIncompatibleBaseModel": "несовместимая базовая модель адаптера контроля",
-                "rgNoPromptsOrIPAdapters": "нет текстовых запросов или IP-адаптеров",
-                "ipAdapterIncompatibleBaseModel": "несовместимая базовая модель IP-адаптера",
-                "ipAdapterNoImageSelected": "изображение IP-адаптера не выбрано",
-                "t2iAdapterIncompatibleScaledBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, масштабированная ширина рамки {{width}}",
-                "t2iAdapterIncompatibleBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, высота рамки {{height}}",
-                "t2iAdapterIncompatibleBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, ширина рамки {{width}}",
-                "t2iAdapterIncompatibleScaledBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, масштабированная высота рамки {{height}}"
-            },
            "fluxModelIncompatibleBboxWidth": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), ширина рамки {{width}}",
            "fluxModelIncompatibleBboxHeight": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), высота рамки {{height}}",
            "fluxModelIncompatibleScaledBboxHeight": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), масштабированная высота рамки {{height}}",
--- a/invokeai/frontend/web/public/locales/vi.json
+++ b/invokeai/frontend/web/public/locales/vi.json
@@ -1410,23 +1410,6 @@
        "processImage": "Xử Lý Hình Ảnh",
        "useSize": "Dùng Kích Thước",
        "invoke": {
-            "layer": {
-                "t2iAdapterIncompatibleScaledBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, tỉ lệ chiều dài hộp giới hạn là {{height}}",
-                "ipAdapterNoModelSelected": "không có IP Adapter được lựa chọn",
-                "ipAdapterNoImageSelected": "không có ảnh IP Adapter được lựa chọn",
-                "t2iAdapterIncompatibleBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, chiều dài hộp giới hạn là {{height}}",
-                "t2iAdapterIncompatibleScaledBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, tỉ lệ chiều rộng hộp giới hạn là {{width}}",
-                "t2iAdapterIncompatibleBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}, chiều rộng hộp giới hạn là {{width}}",
-                "rgNoPromptsOrIPAdapters": "không có lệnh chữ hoặc IP Adapter",
-                "controlAdapterIncompatibleBaseModel": "model cơ sở của Control Adapter không tương thích",
-                "ipAdapterIncompatibleBaseModel": "dạng model cơ sở của IP Adapter không tương thích",
-                "controlAdapterNoModelSelected": "không có model Control Adapter được chọn",
-                "emptyLayer": "layer trống",
-                "rgAutoNegativeNotSupported": "trình tự động đảo chiều không được hỗ trợ cho model cơ sở đang dùng",
-                "rgNegativePromptNotSupported": "lệnh tiêu cực không được hỗ trợ cho model cơ sở đang dùng",
-                "unsupportedModel": "layer không được hỗ trợ cho model cơ sở đang dùng",
-                "rgReferenceImagesNotSupported": "ảnh mẫu khu vực không được hỗ trợ cho model cơ sở đang dùng"
-            },
            "fluxModelIncompatibleBboxWidth": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), chiều rộng hộp giới hạn là {{width}}",
            "noModelSelected": "Không có model được lựa chọn",
            "fluxModelIncompatibleScaledBboxHeight": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), tỉ lệ chiều dài hộp giới hạn là {{height}}",
@@ -1931,7 +1914,24 @@
        "asControlLayer": "Như $t(controlLayers.controlLayer)",
        "asControlLayerResize": "Như $t(controlLayers.controlLayer) (Thay Đổi Kích Thước)",
        "newSession": "Phiên Làm Việc Mới",
-        "resetGenerationSettings": "Khởi Động Lại Cài Đặt Tạo Sinh"
+        "resetGenerationSettings": "Khởi Động Lại Cài Đặt Tạo Sinh",
+        "referenceImageRegional": "Ảnh Mẫu (Khu Vực)",
+        "referenceImageGlobal": "Ảnh Mẫu (Toàn Vùng)",
+        "warnings": {
+            "problemsFound": "Phát hiện vấn đề",
+            "unsupportedModel": "layer không được hỗ trợ cho model cơ sở này",
+            "controlAdapterNoModelSelected": "không có model được chọn cho Layer Chỉnh Sửa Được",
+            "controlAdapterNoControl": "chưa chọn/vẽ điều khiển",
+            "ipAdapterIncompatibleBaseModel": "model cơ sở cho Ảnh Mẫu không tương thích",
+            "ipAdapterNoImageSelected": "chưa chọn Ảnh Mẫu",
+            "controlAdapterIncompatibleBaseModel": "model cơ sở cho Layer Chỉnh Sửa Được không tương thích",
+            "ipAdapterNoModelSelected": "không có model được chọn cho Ảnh Mẫu",
+            "rgNoPromptsOrIPAdapters": "không có lệnh hoặc Ảnh Mẫu",
+            "rgNegativePromptNotSupported": "Lệnh Tiêu Cực không được hỗ trợ cho model cơ sở được chọn",
+            "rgReferenceImagesNotSupported": "Ảnh Mẫu Khu Vực không được hỗ trợ cho model cơ sở được chọn",
+            "rgAutoNegativeNotSupported": "Tự Động Đảo Chiều không được hỗ trợ cho model cơ sở được chọn",
+            "rgNoRegion": "không có khu vực được vẽ"
+        }
    },
    "stylePresets": {
        "negativePrompt": "Lệnh Tiêu Cực",
@@ -2156,8 +2156,8 @@
        "watchRecentReleaseVideos": "Xem Video Phát Hành Mới Nhất",
        "watchUiUpdatesOverview": "Xem Tổng Quan Về Những Cập Nhật Cho Giao Diện Người Dùng",
        "items": [
-            "<StrongComponent>Workflows</StrongComponent>: Chạy một workflow cho nhiều ảnh bằng node <StrongComponent>Ảnh Hàng Loạt</StrongComponent> mới.",
-            "<StrongComponent>FLUX</StrongComponent>: Hỗ trợ cho XLabs IP Adapter v2."
+            "<StrongComponent>Hướng Dẫn Khu Vực FLUX (beta)</StrongComponent>: Bản beta của Hướng Dẫn Khu Vực FLUX của chúng ta đã có mắt tại bảng điều khiển lệnh khu vực.",
+            "<StrongComponent>Nhiều Cải Tiến Ở UX</StrongComponent>: Một số nâng cấp nhỏ ở trải nghiệm và chất lượng người dùng trên toàn bộ ứng dụng."
        ]
    },
    "upsell": {
--- a/invokeai/frontend/web/public/locales/zh_CN.json
+++ b/invokeai/frontend/web/public/locales/zh_CN.json
@@ -661,18 +661,6 @@
            "missingFieldTemplate": "缺失模板",
            "addingImagesTo": "添加图像到",
            "noPrompts": "没有已生成的提示词",
-            "layer": {
-                "ipAdapterNoModelSelected": "未选择IP adapter",
-                "controlAdapterNoModelSelected": "未选择Control Adapter模型",
-                "rgNoPromptsOrIPAdapters": "无文本提示或IP Adapters",
-                "controlAdapterIncompatibleBaseModel": "Control Adapter的基础模型不兼容",
-                "ipAdapterIncompatibleBaseModel": "IP Adapter的基础模型不兼容",
-                "ipAdapterNoImageSelected": "未选择IP Adapter图像",
-                "t2iAdapterIncompatibleBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}，边界框宽度为 {{width}}",
-                "t2iAdapterIncompatibleScaledBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}，缩放后的边界框高度为 {{height}}",
-                "t2iAdapterIncompatibleBboxHeight": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}，边界框高度为 {{height}}",
-                "t2iAdapterIncompatibleScaledBboxWidth": "$t(parameters.invoke.layer.t2iAdapterRequiresDimensionsToBeMultipleOf) {{multiple}}，缩放后的边界框宽度为 {{width}}"
-            },
            "canvasIsFiltering": "画布正在过滤",
            "fluxModelIncompatibleScaledBboxHeight": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16)，缩放后的边界框高度为 {{height}}",
            "noCLIPEmbedModelSelected": "未为FLUX生成选择CLIP嵌入模型",
--- a/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts
+++ b/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts
@@ -24,6 +24,7 @@ import type {
  ParameterSeed,
  ParameterSteps,
  ParameterStrength,
+  ParameterStructuralLoRAModel,
  ParameterT5EncoderModel,
  ParameterVAEModel,
 } from 'features/parameters/types/parameterSchemas';
@@ -75,6 +76,7 @@ export type ParamsState = {
  clipEmbedModel: ParameterCLIPEmbedModel | null;
  clipLEmbedModel: ParameterCLIPLEmbedModel | null;
  clipGEmbedModel: ParameterCLIPGEmbedModel | null;
+  structuralLora: ParameterStructuralLoRAModel | null;
 };

 const initialState: ParamsState = {
@@ -121,6 +123,7 @@ const initialState: ParamsState = {
  clipEmbedModel: null,
  clipLEmbedModel: null,
  clipGEmbedModel: null,
+  structuralLora: null,
 };

 export const paramsSlice = createSlice({
@@ -195,6 +198,9 @@ export const paramsSlice = createSlice({
    t5EncoderModelSelected: (state, action: PayloadAction<ParameterT5EncoderModel | null>) => {
      state.t5EncoderModel = action.payload;
    },
+    structuralLoRAModelSelected: (state, action: PayloadAction<ParameterStructuralLoRAModel | null>) => {
+      state.structuralLora = action.payload;
+    },
    clipEmbedModelSelected: (state, action: PayloadAction<ParameterCLIPEmbedModel | null>) => {
      state.clipEmbedModel = action.payload;
    },
--- a/invokeai/frontend/web/src/features/metadata/util/parsers.ts
+++ b/invokeai/frontend/web/src/features/metadata/util/parsers.ts
@@ -46,6 +46,7 @@ import type {
  ParameterSeed,
  ParameterSteps,
  ParameterStrength,
+  ParameterStructuralLoRAModel,
  ParameterVAEModel,
  ParameterWidth,
 } from 'features/parameters/types/parameterSchemas';
@@ -80,6 +81,7 @@ import {
  isLoRAModelConfig,
  isNonRefinerMainModelConfig,
  isRefinerMainModelModelConfig,
+  isStructuralLoRAModelConfig,
  isT2IAdapterModelConfig,
  isVAEModelConfig,
 } from 'services/api/types';
@@ -226,6 +228,14 @@ const parseVAEModel: MetadataParseFunc<ParameterVAEModel> = async (metadata) =>
  return modelIdentifier;
 };

+const parseStructuralLoRAModel: MetadataParseFunc<ParameterStructuralLoRAModel> = async (metadata) => {
+  const slora = await getProperty(metadata, 'structural_lora', undefined);
+  const key = await getModelKey(slora, 'structural_lora');
+  const sloraModelConfig = await fetchModelConfigWithTypeGuard(key, isStructuralLoRAModelConfig);
+  const modelIdentifier = zModelIdentifierField.parse(sloraModelConfig);
+  return modelIdentifier;
+};
+
 const parseLoRA: MetadataParseFunc<LoRA> = async (metadataItem) => {
  // Previously, the LoRA model identifier parts were stored in the LoRA metadata: `{key: ..., weight: 0.75}`
  const modelV1 = await getProperty(metadataItem, 'lora', undefined);
@@ -671,6 +681,7 @@ export const parsers = {
  mainModel: parseMainModel,
  refinerModel: parseRefinerModel,
  vaeModel: parseVAEModel,
+  structuralLora: parseStructuralLoRAModel,
  lora: parseLoRA,
  loras: parseAllLoRAs,
  controlNet: parseControlNet,
--- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelList.tsx
+++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelList.tsx
@@ -18,6 +18,7 @@ import {
  useMainModels,
  useRefinerModels,
  useSpandrelImageToImageModels,
+  useStructuralLoRAModel,
  useT2IAdapterModels,
  useT5EncoderModels,
  useVAEModels,
@@ -92,6 +93,12 @@ const ModelList = () => {
    [t5EncoderModels, searchTerm, filteredModelType]
  );

+  const [structuralLoRAModels, { isLoading: isLoadingStructuralLoRAModels }] = useStructuralLoRAModel();
+  const filteredStructuralLoRAModels = useMemo(
+    () => modelsFilter(structuralLoRAModels, searchTerm, filteredModelType),
+    [structuralLoRAModels, searchTerm, filteredModelType]
+  );
+
  const [clipEmbedModels, { isLoading: isLoadingClipEmbedModels }] = useCLIPEmbedModels({ excludeSubmodels: true });
  const filteredClipEmbedModels = useMemo(
    () => modelsFilter(clipEmbedModels, searchTerm, filteredModelType),
@@ -118,7 +125,8 @@ const ModelList = () => {
      filteredVAEModels.length +
      filteredSpandrelImageToImageModels.length +
      t5EncoderModels.length +
-      clipEmbedModels.length
+      clipEmbedModels.length +
+      structuralLoRAModels.length
    );
  }, [
    filteredControlNetModels.length,
@@ -133,6 +141,7 @@ const ModelList = () => {
    filteredSpandrelImageToImageModels.length,
    t5EncoderModels.length,
    clipEmbedModels.length,
+    structuralLoRAModels.length,
  ]);

  return (
@@ -195,6 +204,15 @@ const ModelList = () => {
        {!isLoadingT5EncoderModels && filteredT5EncoderModels.length > 0 && (
          <ModelListWrapper title={t('modelManager.t5Encoder')} modelList={filteredT5EncoderModels} key="t5-encoder" />
        )}
+        {/* Structural Lora List */}
+        {isLoadingStructuralLoRAModels && <FetchingModelsLoader loadingMessage="Loading Structural Loras..." />}
+        {!isLoadingStructuralLoRAModels && filteredStructuralLoRAModels.length > 0 && (
+          <ModelListWrapper
+            title={t('modelManager.structuralLora')}
+            modelList={filteredStructuralLoRAModels}
+            key="structural-lora"
+          />
+        )}
        {/* Clip Embed List */}
        {isLoadingClipEmbedModels && <FetchingModelsLoader loadingMessage="Loading Clip Embed Models..." />}
        {!isLoadingClipEmbedModels && filteredClipEmbedModels.length > 0 && (
--- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelTypeFilter.tsx
+++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelTypeFilter.tsx
@@ -24,6 +24,7 @@ export const ModelTypeFilter = memo(() => {
      ip_adapter: t('common.ipAdapter'),
      clip_vision: 'CLIP Vision',
      spandrel_image_to_image: t('modelManager.spandrelImageToImage'),
+      structural_lora: t('modelManager.structuralLora'),
    }),
    [t]
  );
--- a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx
@@ -51,6 +51,8 @@ import {
  isSpandrelImageToImageModelFieldInputTemplate,
  isStringFieldInputInstance,
  isStringFieldInputTemplate,
+  isStructuralLoRAModelFieldInputInstance,
+  isStructuralLoRAModelFieldInputTemplate,
  isT2IAdapterModelFieldInputInstance,
  isT2IAdapterModelFieldInputTemplate,
  isT5EncoderModelFieldInputInstance,
@@ -81,6 +83,7 @@ import SD3MainModelFieldInputComponent from './inputs/SD3MainModelFieldInputComp
 import SDXLMainModelFieldInputComponent from './inputs/SDXLMainModelFieldInputComponent';
 import SpandrelImageToImageModelFieldInputComponent from './inputs/SpandrelImageToImageModelFieldInputComponent';
 import StringFieldInputComponent from './inputs/StringFieldInputComponent';
+import StructuralLoRAModelFieldInputComponent from './inputs/StructuralLoraModelFieldInputComponent';
 import T2IAdapterModelFieldInputComponent from './inputs/T2IAdapterModelFieldInputComponent';
 import T5EncoderModelFieldInputComponent from './inputs/T5EncoderModelFieldInputComponent';
 import VAEModelFieldInputComponent from './inputs/VAEModelFieldInputComponent';
@@ -156,6 +159,15 @@ const InputFieldRenderer = ({ nodeId, fieldName }: InputFieldProps) => {
    return <CLIPGEmbedModelFieldInputComponent nodeId={nodeId} field={fieldInstance} fieldTemplate={fieldTemplate} />;
  }

+  if (
+    isStructuralLoRAModelFieldInputInstance(fieldInstance) &&
+    isStructuralLoRAModelFieldInputTemplate(fieldTemplate)
+  ) {
+    return (
+      <StructuralLoRAModelFieldInputComponent nodeId={nodeId} field={fieldInstance} fieldTemplate={fieldTemplate} />
+    );
+  }
+
  if (isFluxVAEModelFieldInputInstance(fieldInstance) && isFluxVAEModelFieldInputTemplate(fieldTemplate)) {
    return <FluxVAEModelFieldInputComponent nodeId={nodeId} field={fieldInstance} fieldTemplate={fieldTemplate} />;
  }
--- a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/StructuralLoraModelFieldInputComponent.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/StructuralLoraModelFieldInputComponent.tsx
@@ -0,0 +1,65 @@
+import { Combobox, Flex, FormControl, Tooltip } from '@invoke-ai/ui-library';
+import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
+import { useGroupedModelCombobox } from 'common/hooks/useGroupedModelCombobox';
+import { fieldStructuralLoRAModelValueChanged } from 'features/nodes/store/nodesSlice';
+import type {
+  StructuralLoRAModelFieldInputInstance,
+  StructuralLoRAModelFieldInputTemplate,
+} from 'features/nodes/types/field';
+import { memo, useCallback } from 'react';
+import { useTranslation } from 'react-i18next';
+import { useStructuralLoRAModel } from 'services/api/hooks/modelsByType';
+import { isStructuralLoRAModelConfig, type StructuralLoRAModelConfig } from 'services/api/types';
+
+import type { FieldComponentProps } from './types';
+
+type Props = FieldComponentProps<StructuralLoRAModelFieldInputInstance, StructuralLoRAModelFieldInputTemplate>;
+
+const StructuralLoRAModelFieldInputComponent = (props: Props) => {
+  const { nodeId, field } = props;
+  const { t } = useTranslation();
+  const disabledTabs = useAppSelector((s) => s.config.disabledTabs);
+  const dispatch = useAppDispatch();
+  const [modelConfigs, { isLoading }] = useStructuralLoRAModel();
+
+  const _onChange = useCallback(
+    (value: StructuralLoRAModelConfig | null) => {
+      if (!value) {
+        return;
+      }
+      dispatch(
+        fieldStructuralLoRAModelValueChanged({
+          nodeId,
+          fieldName: field.name,
+          value,
+        })
+      );
+    },
+    [dispatch, field.name, nodeId]
+  );
+  const { options, value, onChange, placeholder, noOptionsMessage } = useGroupedModelCombobox({
+    modelConfigs: modelConfigs.filter((config) => isStructuralLoRAModelConfig(config)),
+    onChange: _onChange,
+    isLoading,
+    selectedModel: field.value,
+  });
+  const required = props.fieldTemplate.required;
+
+  return (
+    <Flex w="full" alignItems="center" gap={2}>
+      <Tooltip label={!disabledTabs.includes('models') && t('modelManager.starterModelsInModelManager')}>
+        <FormControl className="nowheel nodrag" isDisabled={!options.length} isInvalid={!value && required}>
+          <Combobox
+            value={value}
+            placeholder={required ? placeholder : `(Optional) ${placeholder}`}
+            options={options}
+            onChange={onChange}
+            noOptionsMessage={noOptionsMessage}
+          />
+        </FormControl>
+      </Tooltip>
+    </Flex>
+  );
+};
+
+export default memo(StructuralLoRAModelFieldInputComponent);
--- a/invokeai/frontend/web/src/features/nodes/store/nodesSlice.ts
+++ b/invokeai/frontend/web/src/features/nodes/store/nodesSlice.ts
@@ -28,6 +28,7 @@ import type {
  SpandrelImageToImageModelFieldValue,
  StatefulFieldValue,
  StringFieldValue,
+  StructuralLoRAModelFieldValue,
  T2IAdapterModelFieldValue,
  T5EncoderModelFieldValue,
  VAEModelFieldValue,
@@ -55,6 +56,7 @@ import {
  zSpandrelImageToImageModelFieldValue,
  zStatefulFieldValue,
  zStringFieldValue,
+  zStructuralLoRAModelFieldValue,
  zT2IAdapterModelFieldValue,
  zT5EncoderModelFieldValue,
  zVAEModelFieldValue,
@@ -369,6 +371,9 @@ export const nodesSlice = createSlice({
    fieldCLIPGEmbedValueChanged: (state, action: FieldValueAction<CLIPGEmbedModelFieldValue>) => {
      fieldValueReducer(state, action, zCLIPGEmbedModelFieldValue);
    },
+    fieldStructuralLoRAModelValueChanged: (state, action: FieldValueAction<StructuralLoRAModelFieldValue>) => {
+      fieldValueReducer(state, action, zStructuralLoRAModelFieldValue);
+    },
    fieldFluxVAEModelValueChanged: (state, action: FieldValueAction<FluxVAEModelFieldValue>) => {
      fieldValueReducer(state, action, zFluxVAEModelFieldValue);
    },
@@ -438,6 +443,7 @@ export const {
  fieldCLIPEmbedValueChanged,
  fieldCLIPLEmbedValueChanged,
  fieldCLIPGEmbedValueChanged,
+  fieldStructuralLoRAModelValueChanged,
  fieldFluxVAEModelValueChanged,
  nodeEditorReset,
  nodeIsIntermediateChanged,
--- a/invokeai/frontend/web/src/features/nodes/types/common.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/common.ts
@@ -69,6 +69,7 @@ const zModelType = z.enum([
  'main',
  'vae',
  'lora',
+  'structural_lora',
  'controlnet',
  't2i_adapter',
  'ip_adapter',
--- a/invokeai/frontend/web/src/features/nodes/types/field.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/field.ts
@@ -178,6 +178,10 @@ const zCLIPGEmbedModelFieldType = zFieldTypeBase.extend({
  name: z.literal('CLIPGEmbedModelField'),
  originalType: zStatelessFieldType.optional(),
 });
+const zStructuralLoRAModelFieldType = zFieldTypeBase.extend({
+  name: z.literal('StructuralLoRAModelField'),
+  originalType: zStatelessFieldType.optional(),
+});
 const zFluxVAEModelFieldType = zFieldTypeBase.extend({
  name: z.literal('FluxVAEModelField'),
  originalType: zStatelessFieldType.optional(),
@@ -210,6 +214,7 @@ const zStatefulFieldType = z.union([
  zCLIPEmbedModelFieldType,
  zCLIPLEmbedModelFieldType,
  zCLIPGEmbedModelFieldType,
+  zStructuralLoRAModelFieldType,
  zFluxVAEModelFieldType,
  zColorFieldType,
  zSchedulerFieldType,
@@ -864,6 +869,29 @@ export const isCLIPGEmbedModelFieldInputTemplate = (val: unknown): val is CLIPGE

 // #endregion

+// #region StructuralLoRAModelField
+
+export const zStructuralLoRAModelFieldValue = zModelIdentifierField.optional();
+const zStructuralLoRAModelFieldInputInstance = zFieldInputInstanceBase.extend({
+  value: zStructuralLoRAModelFieldValue,
+});
+const zStructuralLoRAModelFieldInputTemplate = zFieldInputTemplateBase.extend({
+  type: zStructuralLoRAModelFieldType,
+  originalType: zFieldType.optional(),
+  default: zStructuralLoRAModelFieldValue,
+});
+
+export type StructuralLoRAModelFieldValue = z.infer<typeof zCLIPLEmbedModelFieldValue>;
+
+export type StructuralLoRAModelFieldInputInstance = z.infer<typeof zStructuralLoRAModelFieldInputInstance>;
+export type StructuralLoRAModelFieldInputTemplate = z.infer<typeof zStructuralLoRAModelFieldInputTemplate>;
+export const isStructuralLoRAModelFieldInputInstance = (val: unknown): val is StructuralLoRAModelFieldInputInstance =>
+  zStructuralLoRAModelFieldInputInstance.safeParse(val).success;
+export const isStructuralLoRAModelFieldInputTemplate = (val: unknown): val is StructuralLoRAModelFieldInputTemplate =>
+  zStructuralLoRAModelFieldInputTemplate.safeParse(val).success;
+
+// #endregion
+
 // #region SchedulerField

 export const zSchedulerFieldValue = zSchedulerField.optional();
@@ -959,6 +987,7 @@ export const zStatefulFieldValue = z.union([
  zCLIPEmbedModelFieldValue,
  zCLIPLEmbedModelFieldValue,
  zCLIPGEmbedModelFieldValue,
+  zStructuralLoRAModelFieldValue,
  zColorFieldValue,
  zSchedulerFieldValue,
 ]);
@@ -1030,6 +1059,7 @@ const zStatefulFieldInputTemplate = z.union([
  zCLIPEmbedModelFieldInputTemplate,
  zCLIPLEmbedModelFieldInputTemplate,
  zCLIPGEmbedModelFieldInputTemplate,
+  zStructuralLoRAModelFieldInputTemplate,
  zColorFieldInputTemplate,
  zSchedulerFieldInputTemplate,
  zStatelessFieldInputTemplate,
--- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addIPAdapters.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addIPAdapters.ts
@@ -17,7 +17,9 @@ type AddIPAdaptersArg = {
 };

 export const addIPAdapters = ({ entities, g, collector, model }: AddIPAdaptersArg): AddIPAdaptersResult => {
-  const validIPAdapters = entities.filter((entity) => getGlobalReferenceImageWarnings(entity, model).length === 0);
+  const validIPAdapters = entities
+    .filter((entity) => entity.isEnabled)
+    .filter((entity) => getGlobalReferenceImageWarnings(entity, model).length === 0);

  const result: AddIPAdaptersResult = {
    addedIPAdapters: 0,
--- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addRegions.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addRegions.ts
@@ -63,12 +63,9 @@ export const addRegions = async ({
  const isSDXL = model.base === 'sdxl';
  const isFLUX = model.base === 'flux';

-  const validRegions = regions.filter((rg) => {
-    if (!rg.isEnabled) {
-      return false;
-    }
-    return getRegionalGuidanceWarnings(rg, model).length === 0;
-  });
+  const validRegions = regions
+    .filter((entity) => entity.isEnabled)
+    .filter((entity) => getRegionalGuidanceWarnings(entity, model).length === 0);

  const results: AddedRegionResult[] = [];

--- a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts
@@ -28,6 +28,7 @@ const FIELD_VALUE_FALLBACK_MAP: Record<StatefulFieldType['name'], FieldValue> =
  CLIPEmbedModelField: undefined,
  CLIPLEmbedModelField: undefined,
  CLIPGEmbedModelField: undefined,
+  StructuralLoRAModelField: undefined,
 };

 export const buildFieldInputInstance = (id: string, template: FieldInputTemplate): FieldInputInstance => {
--- a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts
@@ -28,6 +28,7 @@ import type {
  StatefulFieldType,
  StatelessFieldInputTemplate,
  StringFieldInputTemplate,
+  StructuralLoRAModelFieldInputTemplate,
  T2IAdapterModelFieldInputTemplate,
  T5EncoderModelFieldInputTemplate,
  VAEModelFieldInputTemplate,
@@ -300,6 +301,20 @@ const buildCLIPGEmbedModelFieldInputTemplate: FieldInputTemplateBuilder<CLIPGEmb
  return template;
 };

+const buildStructuralLoRAModelFieldInputTemplate: FieldInputTemplateBuilder<StructuralLoRAModelFieldInputTemplate> = ({
+  schemaObject,
+  baseField,
+  fieldType,
+}) => {
+  const template: StructuralLoRAModelFieldInputTemplate = {
+    ...baseField,
+    type: fieldType,
+    default: schemaObject.default ?? undefined,
+  };
+
+  return template;
+};
+
 const buildFluxVAEModelFieldInputTemplate: FieldInputTemplateBuilder<FluxVAEModelFieldInputTemplate> = ({
  schemaObject,
  baseField,
@@ -526,6 +541,7 @@ export const TEMPLATE_BUILDER_MAP: Record<StatefulFieldType['name'], FieldInputT
  CLIPLEmbedModelField: buildCLIPLEmbedModelFieldInputTemplate,
  CLIPGEmbedModelField: buildCLIPGEmbedModelFieldInputTemplate,
  FluxVAEModelField: buildFluxVAEModelFieldInputTemplate,
+  StructuralLoRAModelField: buildStructuralLoRAModelFieldInputTemplate,
 } as const;

 export const buildFieldInputTemplate = (
--- a/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts
+++ b/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts
@@ -113,6 +113,11 @@ export const zParameterVAEModel = zModelIdentifierField;
 export type ParameterVAEModel = z.infer<typeof zParameterVAEModel>;
 // #endregion

+// #region Structural Lora Model
+export const zParameterStructuralLoRAModel = zModelIdentifierField;
+export type ParameterStructuralLoRAModel = z.infer<typeof zParameterStructuralLoRAModel>;
+// #endregion
+
 // #region T5Encoder Model
 export const zParameterT5EncoderModel = zModelIdentifierField;
 export type ParameterT5EncoderModel = z.infer<typeof zParameterT5EncoderModel>;
--- a/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsLanguageSelect.tsx
+++ b/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsLanguageSelect.tsx
@@ -31,7 +31,7 @@ const optionsObject: Record<Language, string> = {
  sv: 'Svenska',
  tr: 'Türkçe',
  ua: 'Украї́нська',
-  vi: 'tiếng Việt',
+  vi: 'Tiếng Việt',
  zh_CN: '简体中文',
  zh_Hant: '漢語',
 };
--- a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts
+++ b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts
@@ -23,6 +23,7 @@ import {
  isSD3MainModelModelConfig,
  isSDXLMainModelModelConfig,
  isSpandrelImageToImageModelConfig,
+  isStructuralLoRAModelConfig,
  isT2IAdapterModelConfig,
  isT5EncoderModelConfig,
  isTIModelConfig,
@@ -58,6 +59,7 @@ export const useFluxModels = buildModelsHook(isFluxMainModelModelConfig);
 export const useSD3Models = buildModelsHook(isSD3MainModelModelConfig);
 export const useSDXLModels = buildModelsHook(isSDXLMainModelModelConfig);
 export const useLoRAModels = buildModelsHook(isLoRAModelConfig);
+export const useStructuralLoRAModel = buildModelsHook(isStructuralLoRAModelConfig);
 export const useControlNetAndT2IAdapterModels = buildModelsHook(isControlNetOrT2IAdapterModelConfig);
 export const useControlNetModels = buildModelsHook(isControlNetModelConfig);
 export const useT2IAdapterModels = buildModelsHook(isT2IAdapterModelConfig);
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
--- a/invokeai/frontend/web/src/services/api/types.ts
+++ b/invokeai/frontend/web/src/services/api/types.ts
@@ -44,6 +44,7 @@ export type BaseModelType = S['BaseModelType'];

 // Model Configs

+export type StructuralLoRAModelConfig = S['StructuralLoRALyCORISConfig'];
 // TODO(MM2): Can we make key required in the pydantic model?
 export type LoRAModelConfig = S['LoRADiffusersConfig'] | S['LoRALyCORISConfig'];
 // TODO(MM2): Can we rename this from Vae -> VAE
@@ -63,6 +64,7 @@ export type CheckpointModelConfig = S['MainCheckpointConfig'];
 type CLIPVisionDiffusersConfig = S['CLIPVisionDiffusersConfig'];
 export type MainModelConfig = DiffusersModelConfig | CheckpointModelConfig;
 export type AnyModelConfig =
+  | StructuralLoRAModelConfig
  | LoRAModelConfig
  | VAEModelConfig
  | ControlNetModelConfig
@@ -114,6 +116,10 @@ export const isLoRAModelConfig = (config: AnyModelConfig): config is LoRAModelCo
  return config.type === 'lora';
 };

+export const isStructuralLoRAModelConfig = (config: AnyModelConfig): config is StructuralLoRAModelConfig => {
+  return config.type === 'structural_lora';
+};
+
 export const isVAEModelConfig = (config: AnyModelConfig, excludeSubmodels?: boolean): config is VAEModelConfig => {
  return config.type === 'vae' || (!excludeSubmodels && config.type === 'main' && checkSubmodels(['vae'], config));
 };
--- a/invokeai/version/invokeai_version.py
+++ b/invokeai/version/invokeai_version.py
@@ -1 +1 @@
-__version__ = "5.4.3rc1"
+__version__ = "5.4.3"
--- a/tests/backend/lora/conversions/lora_state_dicts/flux_control_lora_format.py
+++ b/tests/backend/lora/conversions/lora_state_dicts/flux_control_lora_format.py
--- a/tests/backend/lora/conversions/test_flux_control_lora_conversion_utils.py
+++ b/tests/backend/lora/conversions/test_flux_control_lora_conversion_utils.py
@@ -0,0 +1,70 @@
+import pytest
+import torch
+
+from invokeai.backend.lora.conversions.flux_control_lora_utils import (
+    is_state_dict_likely_flux_control,
+    lora_model_from_flux_control_state_dict,
+)
+from invokeai.backend.lora.conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
+from tests.backend.lora.conversions.lora_state_dicts.flux_control_lora_format import (
+    state_dict_keys as flux_control_lora_state_dict_keys,
+)
+from tests.backend.lora.conversions.lora_state_dicts.flux_lora_diffusers_format import (
+    state_dict_keys as flux_diffusers_state_dict_keys,
+)
+from tests.backend.lora.conversions.lora_state_dicts.utils import keys_to_mock_state_dict
+
+
+@pytest.mark.parametrize("sd_keys", [flux_control_lora_state_dict_keys])
+def test_is_state_dict_likely_in_flux_control_format_true(sd_keys: dict[str, list[int]]):
+    """Test that is_state_dict_likely_flux_control() can identify a state dict in the FLUX Control LoRA format."""
+    # Construct a state dict that is in the Diffusers FLUX LoRA format.
+    state_dict = keys_to_mock_state_dict(sd_keys)
+
+    assert is_state_dict_likely_flux_control(state_dict)
+
+@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys])
+def test_is_state_dict_likely_in_flux_control_format_false(sd_keys: dict[str, list[int]]):
+    """Test that is_state_dict_likely_flux_control() returns False for a state dict that is in the Diffusers
+    FLUX LoRA format.
+    """
+    # Construct a state dict that is not in the FLUX Control LoRA format.
+    state_dict = keys_to_mock_state_dict(sd_keys)
+
+    assert not is_state_dict_likely_flux_control(state_dict)
+
+
+@pytest.mark.parametrize("sd_keys", [flux_control_lora_state_dict_keys])
+def test_lora_model_from_flux_control_state_dict(sd_keys: dict[str, list[int]]):
+    """Test that lora_model_from_flux_control_state_dict() can load a state dict in the FLUX Control LoRA format."""
+    # Construct a state dict that is in the FLUX Control LoRA format.
+    state_dict = keys_to_mock_state_dict(sd_keys)
+    # Load the state dict into a LoRAModelRaw object.
+    model = lora_model_from_flux_control_state_dict(state_dict)
+
+    # Check that the model has the correct number of LoRA layers.
+    expected_lora_layers: set[str] = set()
+    for k in sd_keys:
+        k = k.replace("lora_A.weight", "")
+        k = k.replace("lora_B.weight", "")
+        k = k.replace("lora_B.bias", "")
+        k = k.replace(".scale", "")
+        expected_lora_layers.add(k)
+    # Drop the K/V/proj_mlp weights because these are all concatenated into a single layer in the BFL format (we keep
+    # the Q weights so that we count these layers once).
+    assert len(model.layers) == len(expected_lora_layers)
+    assert all(k.startswith(FLUX_LORA_TRANSFORMER_PREFIX) for k in model.layers.keys())
+
+
+def test_lora_model_from_flux_control_state_dict_extra_keys_error():
+    """Test that lora_model_from_flux_control_state_dict() raises an error if the input state_dict contains unexpected
+    keys that we don't handle.
+    """
+    # Construct a state dict that is in the FLUX Control LoRA format.
+    state_dict = keys_to_mock_state_dict(flux_control_lora_state_dict_keys)
+    # Add an unexpected key.
+    state_dict["transformer.single_transformer_blocks.0.unexpected_key.lora_A.weight"] = torch.empty(1)
+
+    # Check that an error is raised.
+    with pytest.raises(AssertionError):
+        lora_model_from_flux_control_state_dict(state_dict)
--- a/tests/backend/lora/sidecar_layers/concatenated_lora/test_concatenated_lora_linear_sidecar_layer.py
+++ b/tests/backend/lora/sidecar_layers/concatenated_lora/test_concatenated_lora_linear_sidecar_layer.py
@@ -1,49 +0,0 @@
-import copy
-
-import torch
-
-from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-from invokeai.backend.lora.sidecar_layers.concatenated_lora.concatenated_lora_linear_sidecar_layer import (
-    ConcatenatedLoRALinearSidecarLayer,
-)
-from invokeai.backend.lora.sidecar_layers.lora_sidecar_module import LoRASidecarModule
-
-
-def test_concatenated_lora_linear_sidecar_layer():
-    """Test that a ConcatenatedLoRALinearSidecarLayer is equivalent to patching a linear layer with the ConcatenatedLoRA
-    layer.
-    """
-
-    # Create a linear layer.
-    in_features = 5
-    sub_layer_out_features = [5, 10, 15]
-    linear = torch.nn.Linear(in_features, sum(sub_layer_out_features))
-
-    # Create a ConcatenatedLoRA layer.
-    rank = 4
-    sub_layers: list[LoRALayer] = []
-    for out_features in sub_layer_out_features:
-        down = torch.randn(rank, in_features)
-        up = torch.randn(out_features, rank)
-        bias = torch.randn(out_features)
-        sub_layers.append(LoRALayer(up=up, mid=None, down=down, alpha=1.0, bias=bias))
-    concatenated_lora_layer = ConcatenatedLoRALayer(sub_layers, concat_axis=0)
-
-    # Patch the ConcatenatedLoRA layer into the linear layer.
-    linear_patched = copy.deepcopy(linear)
-    linear_patched.weight.data += (
-        concatenated_lora_layer.get_weight(linear_patched.weight) * concatenated_lora_layer.scale()
-    )
-    linear_patched.bias.data += concatenated_lora_layer.get_bias(linear_patched.bias) * concatenated_lora_layer.scale()
-
-    # Create a ConcatenatedLoRALinearSidecarLayer.
-    concatenated_lora_linear_sidecar_layer = ConcatenatedLoRALinearSidecarLayer(concatenated_lora_layer, weight=1.0)
-    linear_with_sidecar = LoRASidecarModule(linear, [concatenated_lora_linear_sidecar_layer])
-
-    # Run the ConcatenatedLoRA-patched linear layer and the ConcatenatedLoRALinearSidecarLayer and assert they are
-    # equal.
-    input = torch.randn(1, in_features)
-    output_patched = linear_patched(input)
-    output_sidecar = linear_with_sidecar(input)
-    assert torch.allclose(output_patched, output_sidecar, atol=1e-6)
--- a/tests/backend/lora/sidecar_layers/lora/test_lora_linear_sidecar_layer.py
+++ b/tests/backend/lora/sidecar_layers/lora/test_lora_linear_sidecar_layer.py
@@ -1,38 +0,0 @@
-import copy
-
-import torch
-
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-from invokeai.backend.lora.sidecar_layers.lora.lora_linear_sidecar_layer import LoRALinearSidecarLayer
-from invokeai.backend.lora.sidecar_layers.lora_sidecar_module import LoRASidecarModule
-
-
-@torch.no_grad()
-def test_lora_linear_sidecar_layer():
-    """Test that a LoRALinearSidecarLayer is equivalent to patching a linear layer with the LoRA layer."""
-
-    # Create a linear layer.
-    in_features = 10
-    out_features = 20
-    linear = torch.nn.Linear(in_features, out_features)
-
-    # Create a LoRA layer.
-    rank = 4
-    down = torch.randn(rank, in_features)
-    up = torch.randn(out_features, rank)
-    bias = torch.randn(out_features)
-    lora_layer = LoRALayer(up=up, mid=None, down=down, alpha=1.0, bias=bias)
-
-    # Patch the LoRA layer into the linear layer.
-    linear_patched = copy.deepcopy(linear)
-    linear_patched.weight.data += lora_layer.get_weight(linear_patched.weight) * lora_layer.scale()
-    linear_patched.bias.data += lora_layer.get_bias(linear_patched.bias) * lora_layer.scale()
-    # Create a LoRALinearSidecarLayer.
-    lora_linear_sidecar_layer = LoRALinearSidecarLayer(lora_layer, weight=1.0)
-    linear_with_sidecar = LoRASidecarModule(linear, [lora_linear_sidecar_layer])
-
-    # Run the LoRA-patched linear layer and the LoRALinearSidecarLayer and assert they are equal.
-    input = torch.randn(1, in_features)
-    output_patched = linear_patched(input)
-    output_sidecar = linear_with_sidecar(input)
-    assert torch.allclose(output_patched, output_sidecar, atol=1e-6)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ryan Dick	3ed6e65a6e	Enable LoRAPatcher.apply_smart_lora_patches(...) throughout the stack.	2024-12-12 22:41:50 +00:00
Ryan Dick	52c9646f84	(minor) Rename num_layers -> num_loras in unit tests.	2024-12-12 22:41:50 +00:00
Ryan Dick	7662f0522b	Add test_apply_smart_lora_patches_to_partially_loaded_model(...).	2024-12-12 22:41:50 +00:00
Ryan Dick	e50fe69839	Add LoRAPatcher.smart_apply_lora_patches()	2024-12-12 22:41:50 +00:00
Ryan Dick	5a9f884620	Refactor LoRAPatcher slightly in preparation for a 'smart' patcher.	2024-12-12 22:41:46 +00:00
Ryan Dick	edc72d1739	Fix LoRAPatcher.apply_lora_wrapper_patches(...)	2024-12-12 22:33:07 +00:00
Ryan Dick	23f521dc7c	Finish consolidating LoRA sidecar wrapper implementations.	2024-12-12 22:33:07 +00:00
Ryan Dick	3d6b93efdd	Begin to consolidate the LoRA sidecar and LoRA layer wrapper implementations.	2024-12-12 22:33:07 +00:00
Ryan Dick	3f28d3afad	Fix bias handling in LoRAModuleWrapper and add unit test that checks that all LoRA patching methods produce the same outputs.	2024-12-12 22:33:07 +00:00
Ryan Dick	9353bfbdd6	Add LoRA wrapper patching to LoRAPatcher.	2024-12-12 22:33:07 +00:00
Ryan Dick	93f2bc6118	Add LoRA wrapper layer.	2024-12-12 22:33:07 +00:00
Ryan Dick	9019026d6d	Fixes to get FLUX Control LoRA working.	2024-12-12 00:19:39 +00:00
Brandon Rising	c195b326ec	Lots of updates centered around using the lora patcher rather than changing the modules in the transformer model	2024-12-11 14:14:50 -05:00
Brandon Rising	2f460d2a45	Support bnb quantized nf4 flux models, Use controlnet vae, only support 1 structural lora per transformer. various other refractors and bugfixes	2024-12-10 03:26:29 -05:00
Brandon Rising	4473cba512	Initial setup for flux tools control loras	2024-12-09 16:01:29 -05:00
Eugene Brodsky	4c94d41fa9	(chore) ruff format	2024-12-04 17:02:08 +00:00
Eugene Brodsky	4036244ee9	(app) clarify log message when migrating old .cache	2024-12-04 17:02:08 +00:00
Eugene Brodsky	d06232d9ba	(config) ensure legacy model configs and node template are writable by the user even if the source files are read-only	2024-12-04 17:02:08 +00:00
Eugene Brodsky	bacbdfb8fc	(docker) add comments in docker-entrypoint.sh and ensure variables are not null in bash expansion	2024-12-04 17:02:08 +00:00
Eugene Brodsky	59f42f4682	(pkg) reduce max supported python version as we have not yet tested 3.12 well enough	2024-12-04 17:02:08 +00:00
Eugene Brodsky	a636ac2899	(docker) use 'uv' to manage python installation and the invoke dependencies, since Ubuntu 24.04 comes with Python 3.12 which we do not yet support	2024-12-04 17:02:08 +00:00
Richard Lyons	bd478360d9	Upgrade docker build to ubuntu 24	2024-12-04 17:02:08 +00:00
Richard Lyons	ac0db07649	Fix docker deployment	2024-12-04 17:02:08 +00:00
psychedelicious	b7132ce9e7	fix(ui): capitalization for vietnamese language	2024-12-03 14:52:28 -08:00
psychedelicious	90f30e7748	chore: bump version to v5.4.3	2024-12-03 14:50:09 -08:00
Riccardo Giovanetti	6b86a66bc7	translationBot(ui): update translation (Italian) Currently translated at 99.3% (1633 of 1643 strings) Co-authored-by: Riccardo Giovanetti <riccardo.giovanetti@gmail.com> Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/ Translation: InvokeAI/Web UI	2024-12-03 13:16:12 -08:00
Linos	aa97e626e9	translationBot(ui): update translation (Vietnamese) Currently translated at 100.0% (1643 of 1643 strings) translationBot(ui): update translation (Vietnamese) Currently translated at 99.8% (1641 of 1643 strings) Co-authored-by: Linos <linos.coding@gmail.com> Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/vi/ Translation: InvokeAI/Web UI	2024-12-03 13:13:26 -08:00
Ryan Dick	c90736093f	Revert FLUX performance improvement that fails on MacOS (#7423 ) ## Summary https://github.com/invoke-ai/InvokeAI/issues/7422 As reported in the above ticket, a recent FLUX performance improvement caused a regression on MacOS. This PR reverts the offending part of the change. ## Related Issues / Discussions - Closes #7422 - Original perf improvement: https://github.com/invoke-ai/InvokeAI/pull/7399 ## QA Instructions I don't have a Mac capable of running this test, so trusting the report in #7422 that this fixes the problem. ## Checklist - [x] _The PR has a short but descriptive title, suitable for a changelog_ - [x] _Tests added / updated (if applicable)_ - [x] _Documentation added / updated (if applicable)_ - [ ] _Updated `What's New` copy (if doing a release after this PR)_	2024-12-03 10:58:00 -05:00
Ryan Dick	0bff4ace1b	Revert performance improvement, because it caused flux inference to fail on Mac: https://github.com/invoke-ai/InvokeAI/issues/7422	2024-12-03 15:18:58 +00:00
psychedelicious	5eb382074e	tweak(ui): slightly clearer logic for skipping regional guidance	2024-12-02 23:46:21 -05:00
psychedelicious	46aa930526	fix(ui): skip disabled ref images	2024-12-02 23:46:21 -05:00
psychedelicious	3305bad0c2	fix(app): queue item id check before setting cancel flag should use `!=` instead of `is not` The `is` operator compares references, not values. Thanks to a wonderfully unintuitive quirk of python, `is` works on integers from `-5` to `256`, inclusive. Whenever integers in this range are used for a value, internally python returns a reference to a stable object in memory. When integers outside this range are used as a value, python creates a new object in memory for that integer. See `PyLong_FromLong` documentation here: https://docs.python.org/3/c-api/long.html Tying this back to our session processor, we were using `is` to compare the queue item ids for equality. Our queue item ids start at 0, and each queue item created increments this by one. So this comparison works only for the first 256 queue items on the machine. Starting with the 257th queue item, the comparison starts returning `False`, and cancelation gets weird. Easy fix - use `!=` instead of `is not`.	2024-12-02 23:22:58 -05:00
psychedelicious	13703d8f55	chore: bump version to v5.4.3rc2	2024-12-02 15:02:30 -08:00
psychedelicious	60d838d0a5	chore(ui): update whats new copy	2024-12-02 15:02:30 -08:00
Riccardo Giovanetti	2a157a44bf	translationBot(ui): update translation (Italian) Currently translated at 99.3% (1633 of 1643 strings) Co-authored-by: Riccardo Giovanetti <riccardo.giovanetti@gmail.com> Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/ Translation: InvokeAI/Web UI	2024-12-02 14:52:05 -08:00
James Reynolds	d61b5833c2	Fix documentation broken links and remove whitespace at end of lines	2024-12-02 14:49:53 -08:00
Jonathan	c094838c6a	Update model_util.py	2024-12-02 14:35:02 -08:00
Hosted Weblate	2d334c8dd8	translationBot(ui): update translation files Updated by "Cleanup translation files" hook in Weblate. Co-authored-by: Hosted Weblate <hosted@weblate.org> Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ Translation: InvokeAI/Web UI	2024-12-02 14:05:51 -08:00
Mary Hipp	a6be26e174	fix(worker): only apply processor cancel logic if cancel event is for current queue item	2024-12-02 14:03:05 -08:00