Compare commits

..

1 Commits

Author SHA1 Message Date
Eugene Brodsky
bb066f6c33 (ci) remove python 3.10 from the test matrix; comment out GPU tests for now 2025-03-28 15:03:13 -04:00
344 changed files with 6309 additions and 11275 deletions

View File

@@ -1,11 +1,9 @@
*
!invokeai
!pyproject.toml
!uv.lock
!docker/docker-entrypoint.sh
!LICENSE
**/dist
**/node_modules
**/__pycache__
**/*.egg-info
**/*.egg-info

8
.github/CODEOWNERS vendored
View File

@@ -2,11 +2,11 @@
/.github/workflows/ @lstein @blessedcoolant @hipsterusername @ebr @jazzhaiku
# documentation
/docs/ @lstein @blessedcoolant @hipsterusername @psychedelicious
/mkdocs.yml @lstein @blessedcoolant @hipsterusername @psychedelicious
/docs/ @lstein @blessedcoolant @hipsterusername @Millu
/mkdocs.yml @lstein @blessedcoolant @hipsterusername @Millu
# nodes
/invokeai/app/ @blessedcoolant @psychedelicious @brandonrising @hipsterusername @jazzhaiku
/invokeai/app/ @Kyle0654 @blessedcoolant @psychedelicious @brandonrising @hipsterusername @jazzhaiku
# installation and configuration
/pyproject.toml @lstein @blessedcoolant @hipsterusername
@@ -22,7 +22,7 @@
/invokeai/backend @blessedcoolant @psychedelicious @lstein @maryhipp @hipsterusername
# generation, model management, postprocessing
/invokeai/backend @lstein @blessedcoolant @brandonrising @hipsterusername @jazzhaiku
/invokeai/backend @damian0815 @lstein @blessedcoolant @gregghelt2 @StAlKeR7779 @brandonrising @ryanjdick @hipsterusername @jazzhaiku
# front ends
/invokeai/frontend/CLI @lstein @hipsterusername

View File

@@ -97,8 +97,6 @@ jobs:
context: .
file: docker/Dockerfile
platforms: ${{ env.PLATFORMS }}
build-args: |
GPU_DRIVER=${{ matrix.gpu-driver }}
push: ${{ github.ref == 'refs/heads/main' || github.ref_type == 'tag' || github.event.inputs.push-to-registry }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@@ -1,6 +1,6 @@
# Builds and uploads python build artifacts.
# Builds and uploads the installer and python build artifacts.
name: build wheel
name: build installer
on:
workflow_dispatch:
@@ -17,7 +17,7 @@ jobs:
- name: setup python
uses: actions/setup-python@v5
with:
python-version: '3.12'
python-version: '3.10'
cache: pip
cache-dependency-path: pyproject.toml
@@ -27,12 +27,19 @@ jobs:
- name: setup frontend
uses: ./.github/actions/install-frontend-deps
- name: build wheel
id: build_wheel
run: ./scripts/build_wheel.sh
- name: create installer
id: create_installer
run: ./create_installer.sh
working-directory: installer
- name: upload python distribution artifact
uses: actions/upload-artifact@v4
with:
name: dist
path: ${{ steps.build_wheel.outputs.DIST_PATH }}
path: ${{ steps.create_installer.outputs.DIST_PATH }}
- name: upload installer artifact
uses: actions/upload-artifact@v4
with:
name: installer
path: ${{ steps.create_installer.outputs.INSTALLER_PATH }}

View File

@@ -34,9 +34,6 @@ on:
jobs:
python-checks:
env:
# uv requires a venv by default - but for this, we can simply use the system python
UV_SYSTEM_PYTHON: 1
runs-on: ubuntu-latest
timeout-minutes: 5 # expected run time: <1 min
steps:
@@ -60,19 +57,25 @@ jobs:
- '!invokeai/frontend/web/**'
- 'tests/**'
- name: setup uv
- name: setup python
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
uses: astral-sh/setup-uv@v5
uses: actions/setup-python@v5
with:
version: '0.6.10'
enable-cache: true
python-version: '3.12'
cache: pip
cache-dependency-path: pyproject.toml
- name: install ruff
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
run: pip install ruff==0.11.2
shell: bash
- name: ruff check
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
run: uv tool run ruff@0.11.2 check --output-format=github .
run: ruff check --output-format=github .
shell: bash
- name: ruff format
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
run: uv tool run ruff@0.11.2 format --check .
run: ruff format --check .
shell: bash

View File

@@ -40,16 +40,24 @@ jobs:
matrix:
python-version:
- '3.11'
- '3.12'
platform:
# - linux-cuda-12_6
# - linux-rocm-6_2
- linux-cpu
- macos-default
- windows-cpu
include:
# - platform: linux-cuda-12_6
# os: ubuntu-24.04
# github-env: $GITHUB_ENV
# - platform: linux-rocm-6_2
# os: ubuntu-24.04
# extra-index-url: 'https://download.pytorch.org/whl/rocm6.2'
# github-env: $GITHUB_ENV
- platform: linux-cpu
os: ubuntu-24.04
extra-index-url: 'https://download.pytorch.org/whl/cpu'
github-env: $GITHUB_ENV
extra-index-url: 'https://download.pytorch.org/whl/cpu'
- platform: macos-default
os: macOS-14
github-env: $GITHUB_ENV
@@ -61,8 +69,6 @@ jobs:
timeout-minutes: 15 # expected run time: 2-6 min, depending on platform
env:
PIP_USE_PEP517: '1'
UV_SYSTEM_PYTHON: 1
steps:
- name: checkout
# https://github.com/nschloe/action-cached-lfs-checkout
@@ -85,25 +91,20 @@ jobs:
- '!invokeai/frontend/web/**'
- 'tests/**'
- name: setup uv
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
uses: astral-sh/setup-uv@v5
with:
version: '0.6.10'
enable-cache: true
python-version: ${{ matrix.python-version }}
- name: setup python
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: pip
cache-dependency-path: pyproject.toml
- name: install dependencies
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
env:
UV_INDEX: ${{ matrix.extra-index-url }}
run: uv pip install --editable ".[test]"
PIP_EXTRA_INDEX_URL: ${{ matrix.extra-index-url }}
run: >
pip3 install --editable=".[test]"
- name: run pytest
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}

View File

@@ -49,7 +49,7 @@ jobs:
always_run: true
build:
uses: ./.github/workflows/build-wheel.yml
uses: ./.github/workflows/build-installer.yml
publish-testpypi:
runs-on: ubuntu-latest

View File

@@ -54,25 +54,17 @@ jobs:
- 'pyproject.toml'
- 'invokeai/**'
- name: setup uv
if: ${{ steps.changed-files.outputs.src_any_changed == 'true' || inputs.always_run == true }}
uses: astral-sh/setup-uv@v5
with:
version: '0.6.10'
enable-cache: true
python-version: '3.11'
- name: setup python
if: ${{ steps.changed-files.outputs.src_any_changed == 'true' || inputs.always_run == true }}
uses: actions/setup-python@v5
with:
python-version: '3.11'
python-version: '3.10'
cache: pip
cache-dependency-path: pyproject.toml
- name: install dependencies
- name: install python dependencies
if: ${{ steps.changed-files.outputs.src_any_changed == 'true' || inputs.always_run == true }}
env:
UV_INDEX: ${{ matrix.extra-index-url }}
run: uv pip install --editable .
run: pip3 install --use-pep517 --editable="."
- name: install frontend dependencies
if: ${{ steps.changed-files.outputs.src_any_changed == 'true' || inputs.always_run == true }}
@@ -85,7 +77,7 @@ jobs:
- name: generate schema
if: ${{ steps.changed-files.outputs.src_any_changed == 'true' || inputs.always_run == true }}
run: cd invokeai/frontend/web && uv run ../../../scripts/generate_openapi_schema.py | pnpm typegen
run: make frontend-typegen
shell: bash
- name: compare files

View File

@@ -1,68 +0,0 @@
# Check the `uv` lockfile for consistency with `pyproject.toml`.
#
# If this check fails, you should run `uv lock` to update the lockfile.
name: 'uv lock checks'
on:
push:
branches:
- 'main'
pull_request:
types:
- 'ready_for_review'
- 'opened'
- 'synchronize'
merge_group:
workflow_dispatch:
inputs:
always_run:
description: 'Always run the checks'
required: true
type: boolean
default: true
workflow_call:
inputs:
always_run:
description: 'Always run the checks'
required: true
type: boolean
default: true
jobs:
uv-lock-checks:
env:
# uv requires a venv by default - but for this, we can simply use the system python
UV_SYSTEM_PYTHON: 1
runs-on: ubuntu-latest
timeout-minutes: 5 # expected run time: <1 min
steps:
- name: checkout
uses: actions/checkout@v4
- name: check for changed python files
if: ${{ inputs.always_run != true }}
id: changed-files
# Pinned to the _hash_ for v45.0.9 to prevent supply-chain attacks.
# See:
# - CVE-2025-30066
# - https://www.stepsecurity.io/blog/harden-runner-detection-tj-actions-changed-files-action-is-compromised
# - https://github.com/tj-actions/changed-files/issues/2463
uses: tj-actions/changed-files@a284dc1814e3fd07f2e34267fc8f81227ed29fb8
with:
files_yaml: |
uvlock-pyprojecttoml:
- 'pyproject.toml'
- 'uv.lock'
- name: setup uv
if: ${{ steps.changed-files.outputs.uvlock-pyprojecttoml_any_changed == 'true' || inputs.always_run == true }}
uses: astral-sh/setup-uv@v5
with:
version: '0.6.10'
enable-cache: true
- name: check lockfile
if: ${{ steps.changed-files.outputs.uvlock-pyprojecttoml_any_changed == 'true' || inputs.always_run == true }}
run: uv lock --locked # this will exit with 1 if the lockfile is not consistent with pyproject.toml
shell: bash

2
.nvmrc
View File

@@ -1 +1 @@
v22.14.0
v22.12.0

View File

@@ -4,29 +4,21 @@ repos:
hooks:
- id: black
name: black
stages: [pre-commit]
stages: [commit]
language: system
entry: black
types: [python]
- id: flake8
name: flake8
stages: [pre-commit]
stages: [commit]
language: system
entry: flake8
types: [python]
- id: isort
name: isort
stages: [pre-commit]
stages: [commit]
language: system
entry: isort
types: [python]
- id: uvlock
name: uv lock
stages: [pre-commit]
language: system
entry: uv lock
files: ^pyproject\.toml$
pass_filenames: false
types: [python]

View File

@@ -16,7 +16,7 @@ help:
@echo "frontend-build Build the frontend in order to run on localhost:9090"
@echo "frontend-dev Run the frontend in developer mode on localhost:5173"
@echo "frontend-typegen Generate types for the frontend from the OpenAPI schema"
@echo "wheel Build the wheel for the current version"
@echo "installer-zip Build the installer .zip file for the current version"
@echo "tag-release Tag the GitHub repository with the current version (use at release time only!)"
@echo "openapi Generate the OpenAPI schema for the app, outputting to stdout"
@echo "docs Serve the mkdocs site with live reload"
@@ -64,13 +64,13 @@ frontend-dev:
frontend-typegen:
cd invokeai/frontend/web && python ../../../scripts/generate_openapi_schema.py | pnpm typegen
# Tag the release
wheel:
cd scripts && ./build_wheel.sh
# Installer zip file
installer-zip:
cd installer && ./create_installer.sh
# Tag the release
tag-release:
cd scripts && ./tag_release.sh
cd installer && ./tag_release.sh
# Generate the OpenAPI Schema for the app
openapi:

View File

@@ -1,6 +1,77 @@
# syntax=docker/dockerfile:1.4
#### Web UI ------------------------------------
## Builder stage
FROM library/ubuntu:24.04 AS builder
ARG DEBIAN_FRONTEND=noninteractive
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt update && apt-get install -y \
build-essential \
git
# Install `uv` for package management
COPY --from=ghcr.io/astral-sh/uv:0.6.0 /uv /uvx /bin/
ENV VIRTUAL_ENV=/opt/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
ENV INVOKEAI_SRC=/opt/invokeai
ENV PYTHON_VERSION=3.11
ENV UV_PYTHON=3.11
ENV UV_COMPILE_BYTECODE=1
ENV UV_LINK_MODE=copy
ENV UV_PROJECT_ENVIRONMENT="$VIRTUAL_ENV"
ENV UV_INDEX="https://download.pytorch.org/whl/cu124"
ARG GPU_DRIVER=cuda
# unused but available
ARG BUILDPLATFORM
# Switch to the `ubuntu` user to work around dependency issues with uv-installed python
RUN mkdir -p ${VIRTUAL_ENV} && \
mkdir -p ${INVOKEAI_SRC} && \
chmod -R a+w /opt && \
mkdir ~ubuntu/.cache && chown ubuntu: ~ubuntu/.cache
USER ubuntu
# Install python
RUN --mount=type=cache,target=/home/ubuntu/.cache/uv,uid=1000,gid=1000 \
uv python install ${PYTHON_VERSION}
WORKDIR ${INVOKEAI_SRC}
# Install project's dependencies as a separate layer so they aren't rebuilt every commit.
# bind-mount instead of copy to defer adding sources to the image until next layer.
#
# NOTE: there are no pytorch builds for arm64 + cuda, only cpu
# x86_64/CUDA is the default
RUN --mount=type=cache,target=/home/ubuntu/.cache/uv,uid=1000,gid=1000 \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=invokeai/version,target=invokeai/version \
if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then \
UV_INDEX="https://download.pytorch.org/whl/cpu"; \
elif [ "$GPU_DRIVER" = "rocm" ]; then \
UV_INDEX="https://download.pytorch.org/whl/rocm6.1"; \
fi && \
uv sync --no-install-project
# Now that the bulk of the dependencies have been installed, copy in the project files that change more frequently.
COPY invokeai invokeai
COPY pyproject.toml .
RUN --mount=type=cache,target=/home/ubuntu/.cache/uv,uid=1000,gid=1000 \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then \
UV_INDEX="https://download.pytorch.org/whl/cpu"; \
elif [ "$GPU_DRIVER" = "rocm" ]; then \
UV_INDEX="https://download.pytorch.org/whl/rocm6.1"; \
fi && \
uv sync
#### Build the Web UI ------------------------------------
FROM docker.io/node:22-slim AS web-builder
ENV PNPM_HOME="/pnpm"
@@ -14,100 +85,69 @@ RUN --mount=type=cache,target=/pnpm/store \
pnpm install --frozen-lockfile
RUN npx vite build
## Backend ---------------------------------------
#### Runtime stage ---------------------------------------
FROM library/ubuntu:24.04
FROM library/ubuntu:24.04 AS runtime
ARG DEBIAN_FRONTEND=noninteractive
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
RUN --mount=type=cache,target=/var/cache/apt \
--mount=type=cache,target=/var/lib/apt \
apt update && apt install -y --no-install-recommends \
ca-certificates \
git \
gosu \
libglib2.0-0 \
libgl1 \
libglx-mesa0 \
build-essential \
libopencv-dev \
libstdc++-10-dev
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
VIRTUAL_ENV=/opt/venv \
INVOKEAI_SRC=/opt/invokeai \
PYTHON_VERSION=3.12 \
UV_PYTHON=3.12 \
UV_COMPILE_BYTECODE=1 \
UV_MANAGED_PYTHON=1 \
UV_LINK_MODE=copy \
UV_PROJECT_ENVIRONMENT=/opt/venv \
UV_INDEX="https://download.pytorch.org/whl/cu124" \
INVOKEAI_ROOT=/invokeai \
INVOKEAI_HOST=0.0.0.0 \
INVOKEAI_PORT=9090 \
PATH="/opt/venv/bin:$PATH" \
CONTAINER_UID=${CONTAINER_UID:-1000} \
CONTAINER_GID=${CONTAINER_GID:-1000}
RUN apt update && apt install -y --no-install-recommends \
git \
curl \
vim \
tmux \
ncdu \
iotop \
bzip2 \
gosu \
magic-wormhole \
libglib2.0-0 \
libgl1 \
libglx-mesa0 \
build-essential \
libopencv-dev \
libstdc++-10-dev &&\
apt-get clean && apt-get autoclean
ARG GPU_DRIVER=cuda
ENV INVOKEAI_SRC=/opt/invokeai
ENV VIRTUAL_ENV=/opt/venv
ENV UV_PROJECT_ENVIRONMENT="$VIRTUAL_ENV"
ENV PYTHON_VERSION=3.11
ENV INVOKEAI_ROOT=/invokeai
ENV INVOKEAI_HOST=0.0.0.0
ENV INVOKEAI_PORT=9090
ENV PATH="$VIRTUAL_ENV/bin:$INVOKEAI_SRC:$PATH"
ENV CONTAINER_UID=${CONTAINER_UID:-1000}
ENV CONTAINER_GID=${CONTAINER_GID:-1000}
# Install `uv` for package management
COPY --from=ghcr.io/astral-sh/uv:0.6.9 /uv /uvx /bin/
# and install python for the ubuntu user (expected to exist on ubuntu >=24.x)
# this is too tiny to optimize with multi-stage builds, but maybe we'll come back to it
COPY --from=ghcr.io/astral-sh/uv:0.6.0 /uv /uvx /bin/
USER ubuntu
RUN uv python install ${PYTHON_VERSION}
USER root
# Install python & allow non-root user to use it by traversing the /root dir without read permissions
RUN --mount=type=cache,target=/root/.cache/uv \
uv python install ${PYTHON_VERSION} && \
# chmod --recursive a+rX /root/.local/share/uv/python
chmod 711 /root
WORKDIR ${INVOKEAI_SRC}
# Install project's dependencies as a separate layer so they aren't rebuilt every commit.
# bind-mount instead of copy to defer adding sources to the image until next layer.
#
# NOTE: there are no pytorch builds for arm64 + cuda, only cpu
# x86_64/CUDA is the default
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=uv.lock,target=uv.lock \
# this is just to get the package manager to recognize that the project exists, without making changes to the docker layer
--mount=type=bind,source=invokeai/version,target=invokeai/version \
if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then UV_INDEX="https://download.pytorch.org/whl/cpu"; \
elif [ "$GPU_DRIVER" = "rocm" ]; then UV_INDEX="https://download.pytorch.org/whl/rocm6.2"; \
fi && \
uv sync --frozen
# build patchmatch
RUN cd /usr/lib/$(uname -p)-linux-gnu/pkgconfig/ && ln -sf opencv4.pc opencv.pc
RUN python -c "from patchmatch import patch_match"
# --link requires buldkit w/ dockerfile syntax 1.4
COPY --link --from=builder ${INVOKEAI_SRC} ${INVOKEAI_SRC}
COPY --link --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY --link --from=web-builder /build/dist ${INVOKEAI_SRC}/invokeai/frontend/web/dist
# Link amdgpu.ids for ROCm builds
# contributed by https://github.com/Rubonnek
RUN mkdir -p "/opt/amdgpu/share/libdrm" &&\
ln -s "/usr/share/libdrm/amdgpu.ids" "/opt/amdgpu/share/libdrm/amdgpu.ids"
ln -s "/usr/share/libdrm/amdgpu.ids" "/opt/amdgpu/share/libdrm/amdgpu.ids"
WORKDIR ${INVOKEAI_SRC}
# build patchmatch
RUN cd /usr/lib/$(uname -p)-linux-gnu/pkgconfig/ && ln -sf opencv4.pc opencv.pc
RUN python -c "from patchmatch import patch_match"
RUN mkdir -p ${INVOKEAI_ROOT} && chown -R ${CONTAINER_UID}:${CONTAINER_GID} ${INVOKEAI_ROOT}
COPY docker/docker-entrypoint.sh ./
ENTRYPOINT ["/opt/invokeai/docker-entrypoint.sh"]
CMD ["invokeai-web"]
# --link requires buldkit w/ dockerfile syntax 1.4, does not work with podman
COPY --link --from=web-builder /build/dist ${INVOKEAI_SRC}/invokeai/frontend/web/dist
# add sources last to minimize image changes on code changes
COPY invokeai ${INVOKEAI_SRC}/invokeai
# this should not increase image size because we've already installed dependencies
# in a previous layer
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=uv.lock,target=uv.lock \
if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then UV_INDEX="https://download.pytorch.org/whl/cpu"; \
elif [ "$GPU_DRIVER" = "rocm" ]; then UV_INDEX="https://download.pytorch.org/whl/rocm6.2"; \
fi && \
uv pip install -e .

View File

@@ -60,11 +60,16 @@ Next, these jobs run and must pass. They are the same jobs that are run for ever
- **`frontend-checks`**: runs `prettier` (format), `eslint` (lint), `dpdm` (circular refs), `tsc` (static type check) and `knip` (unused imports)
- **`typegen-checks`**: ensures the frontend and backend types are synced
#### `build-wheel` Job
#### `build-installer` Job
This sets up both python and frontend dependencies and builds the python package. Internally, this runs `./scripts/build_wheel.sh` and uploads `dist.zip`, which contains the wheel and unarchived build.
This sets up both python and frontend dependencies and builds the python package. Internally, this runs `installer/create_installer.sh` and uploads two artifacts:
You don't need to download or test these artifacts.
- **`dist`**: the python distribution, to be published on PyPI
- **`InvokeAI-installer-${VERSION}.zip`**: the legacy install scripts
You don't need to download either of these files.
> The legacy install scripts are no longer used, but we haven't updated the workflow to skip building them.
#### Sanity Check & Smoke Test
@@ -74,7 +79,7 @@ It's possible to test the python package before it gets published to PyPI. We've
But, if you want to be extra-super careful, here's how to test it:
- Download the `dist.zip` build artifact from the `build-wheel` job
- Download the `dist.zip` build artifact from the `build-installer` job
- Unzip it and find the wheel file
- Create a fresh Invoke install by following the [manual install guide](https://invoke-ai.github.io/InvokeAI/installation/manual/) - but instead of installing from PyPI, install from the wheel
- Test the app

View File

@@ -41,7 +41,7 @@ If you just want to use Invoke, you should use the [launcher][launcher link].
With the modifications made, the install command should look something like this:
```sh
uv pip install -e ".[dev,test,docs,xformers]" --python 3.12 --python-preference only-managed --index=https://download.pytorch.org/whl/cu126 --reinstall
uv pip install -e ".[dev,test,docs,xformers]" --python 3.11 --python-preference only-managed --index=https://download.pytorch.org/whl/cu124 --reinstall
```
6. At this point, you should have Invoke installed, a venv set up and activated, and the server running. But you will see a warning in the terminal that no UI was found. If you go to the URL for the server, you won't get a UI.

View File

@@ -0,0 +1,121 @@
# Legacy Scripts
!!! warning "Legacy Scripts"
We recommend using the Invoke Launcher to install and update Invoke. It's a desktop application for Windows, macOS and Linux. It takes care of a lot of nitty gritty details for you.
Follow the [quick start guide](./quick_start.md) to get started.
!!! tip "Use the installer to update"
Using the installer for updates will not erase any of your data (images, models, boards, etc). It only updates the core libraries used to run Invoke.
Simply use the same path you installed to originally to update your existing installation.
Both release and pre-release versions can be installed using the installer. It also supports install through a wheel if needed.
Be sure to review the [installation requirements] and ensure your system has everything it needs to install Invoke.
## Getting the Latest Installer
Download the `InvokeAI-installer-vX.Y.Z.zip` file from the [latest release] page. It is at the bottom of the page, under **Assets**.
After unzipping the installer, you should have a `InvokeAI-Installer` folder with some files inside, including `install.bat` and `install.sh`.
## Running the Installer
!!! tip
Windows users should first double-click the `WinLongPathsEnabled.reg` file to prevent a failed installation due to long file paths.
Double-click the install script:
=== "Windows"
```sh
install.bat
```
=== "Linux/macOS"
```sh
install.sh
```
!!! info "Running the Installer from the commandline"
You can also run the install script from cmd/powershell (Windows) or terminal (Linux/macOS).
!!! warning "Untrusted Publisher (Windows)"
You may get a popup saying the file comes from an `Untrusted Publisher`. Click `More Info` and `Run Anyway` to get past this.
The installation process is simple, with a few prompts:
- Select the version to install. Unless you have a specific reason to install a specific version, select the default (the latest version).
- Select location for the install. Be sure you have enough space in this folder for the base application, as described in the [installation requirements].
- Select a GPU device.
!!! info "Slow Installation"
The installer needs to download several GB of data and install it all. It may appear to get stuck at 99.9% when installing `pytorch` or during a step labeled "Installing collected packages".
If it is stuck for over 10 minutes, something has probably gone wrong and you should close the window and restart.
## Running the Application
Find the install location you selected earlier. Double-click the launcher script to run the app:
=== "Windows"
```sh
invoke.bat
```
=== "Linux/macOS"
```sh
invoke.sh
```
Choose the first option to run the UI. After a series of startup messages, you'll see something like this:
```sh
Uvicorn running on http://127.0.0.1:9090 (Press CTRL+C to quit)
```
Copy the URL into your browser and you should see the UI.
## Improved Outpainting with PatchMatch
PatchMatch is an extra add-on that can improve outpainting. Windows users are in luck - it works out of the box.
On macOS and Linux, a few extra steps are needed to set it up. See the [PatchMatch installation guide](./patchmatch.md).
## First-time Setup
You will need to [install some models] before you can generate.
Check the [configuration docs] for details on configuring the application.
## Updating
Updating is exactly the same as installing - download the latest installer, choose the latest version, enter your existing installation path, and the app will update. None of your data (images, models, boards, etc) will be erased.
!!! info "Dependency Resolution Issues"
We've found that pip's dependency resolution can cause issues when upgrading packages. One very common problem was pip "downgrading" torch from CUDA to CPU, but things broke in other novel ways.
The installer doesn't have this kind of problem, so we use it for updating as well.
## Installation Issues
If you have installation issues, please review the [FAQ]. You can also [create an issue] or ask for help on [discord].
[installation requirements]: ./requirements.md
[FAQ]: ../faq.md
[install some models]: ./models.md
[configuration docs]: ../configuration.md
[latest release]: https://github.com/invoke-ai/InvokeAI/releases/latest
[create an issue]: https://github.com/invoke-ai/InvokeAI/issues
[discord]: https://discord.gg/ZmtBAhwWhy

View File

@@ -43,10 +43,10 @@ The following commands vary depending on the version of Invoke being installed a
3. Create a virtual environment in that directory:
```sh
uv venv --relocatable --prompt invoke --python 3.12 --python-preference only-managed .venv
uv venv --relocatable --prompt invoke --python 3.11 --python-preference only-managed .venv
```
This command creates a portable virtual environment at `.venv` complete with a portable python 3.12. It doesn't matter if your system has no python installed, or has a different version - `uv` will handle everything.
This command creates a portable virtual environment at `.venv` complete with a portable python 3.11. It doesn't matter if your system has no python installed, or has a different version - `uv` will handle everything.
4. Activate the virtual environment:
@@ -64,21 +64,14 @@ The following commands vary depending on the version of Invoke being installed a
5. Choose a version to install. Review the [GitHub releases page](https://github.com/invoke-ai/InvokeAI/releases).
6. Determine the package specifier to use when installing. This is a performance optimization.
6. Determine the package package specifier to use when installing. This is a performance optimization.
- If you have an Nvidia 20xx series GPU or older, use `invokeai[xformers]`.
- If you have an Nvidia 30xx series GPU or newer, or do not have an Nvidia GPU, use `invokeai`.
7. Determine the `PyPI` index URL to use for installation, if any. This is necessary to get the right version of torch installed.
=== "Invoke v5.10.0 and later"
- If you are on Windows or Linux with an Nvidia GPU, use `https://download.pytorch.org/whl/cu126`.
- If you are on Linux with no GPU, use `https://download.pytorch.org/whl/cpu`.
- If you are on Linux with an AMD GPU, use `https://download.pytorch.org/whl/rocm6.2.4`.
- **In all other cases, do not use an index.**
=== "Invoke v5.0.0 to v5.9.1"
=== "Invoke v5 or later"
- If you are on Windows with an Nvidia GPU, use `https://download.pytorch.org/whl/cu124`.
- If you are on Linux with no GPU, use `https://download.pytorch.org/whl/cpu`.
@@ -95,13 +88,13 @@ The following commands vary depending on the version of Invoke being installed a
8. Install the `invokeai` package. Substitute the package specifier and version.
```sh
uv pip install <PACKAGE_SPECIFIER>==<VERSION> --python 3.12 --python-preference only-managed --force-reinstall
uv pip install <PACKAGE_SPECIFIER>==<VERSION> --python 3.11 --python-preference only-managed --force-reinstall
```
If you determined you needed to use a `PyPI` index URL in the previous step, you'll need to add `--index=<INDEX_URL>` like this:
```sh
uv pip install <PACKAGE_SPECIFIER>==<VERSION> --python 3.12 --python-preference only-managed --index=<INDEX_URL> --force-reinstall
uv pip install <PACKAGE_SPECIFIER>==<VERSION> --python 3.11 --python-preference only-managed --index=<INDEX_URL> --force-reinstall
```
9. Deactivate and reactivate your venv so that the invokeai-specific commands become available in the environment:

View File

@@ -49,9 +49,9 @@ If you have an existing Invoke installation, you can select it and let the launc
!!! warning "Problem running the launcher on macOS"
macOS may not allow you to run the launcher. We are working to resolve this by signing the launcher executable. Until that is done, you can manually flag the launcher as safe:
macOS may not allow you to run the launcher. We are working to resolve this by signing the launcher executable. Until that is done, you can either use the [legacy scripts](./legacy_scripts.md) to install, or manually flag the launcher as safe:
- Open the **Invoke Community Edition.dmg** file.
- Open the **Invoke-Installer-mac-arm64.dmg** file.
- Drag the launcher to **Applications**.
- Open a terminal.
- Run `xattr -d 'com.apple.quarantine' /Applications/Invoke\ Community\ Edition.app`.
@@ -117,6 +117,7 @@ If you still have problems, ask for help on the Invoke [discord](https://discord
- You can install the Invoke application as a python package. See our [manual install](./manual.md) docs.
- You can run Invoke with docker. See our [docker install](./docker.md) docs.
- You can still use our legacy scripts to install and run Invoke. See the [legacy scripts](./legacy_scripts.md) docs.
## Need Help?

View File

@@ -41,7 +41,7 @@ The requirements below are rough guidelines for best performance. GPUs with less
You don't need to do this if you are installing with the [Invoke Launcher](./quick_start.md).
Invoke requires python 3.10 through 3.12. If you don't already have one of these versions installed, we suggest installing 3.12, as it will be supported for longer.
Invoke requires python 3.10 or 3.11. If you don't already have one of these versions installed, we suggest installing 3.11, as it will be supported for longer.
Check that your system has an up-to-date Python installed by running `python3 --version` in the terminal (Linux, macOS) or cmd/powershell (Windows).
@@ -49,19 +49,19 @@ Check that your system has an up-to-date Python installed by running `python3 --
=== "Windows"
- Install python with [an official installer].
- Install python 3.11 with [an official installer].
- The installer includes an option to add python to your PATH. Be sure to enable this. If you missed it, re-run the installer, choose to modify an existing installation, and tick that checkbox.
- You may need to install [Microsoft Visual C++ Redistributable].
=== "macOS"
- Install python with [an official installer].
- Install python 3.11 with [an official installer].
- If model installs fail with a certificate error, you may need to run this command (changing the python version to match what you have installed): `/Applications/Python\ 3.10/Install\ Certificates.command`
- If you haven't already, you will need to install the XCode CLI Tools by running `xcode-select --install` in a terminal.
=== "Linux"
- Installing python varies depending on your system. We recommend [using `uv` to manage your python installation](https://docs.astral.sh/uv/concepts/python-versions/#installing-a-python-version).
- Installing python varies depending on your system. On Ubuntu, you can use the [deadsnakes PPA](https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa).
- You'll need to install `libglib2.0-0` and `libgl1-mesa-glx` for OpenCV to work. For example, on a Debian system: `sudo apt update && sudo apt install -y libglib2.0-0 libgl1-mesa-glx`
## Drivers

Binary file not shown.

View File

@@ -32,6 +32,12 @@ if [[ ! -z ${CI} ]]; then
echo
echo -e "${BCYAN}CI environment detected${RESET}"
echo
else
echo
echo -e "${BYELLOW}This script must be run from the installer directory!${RESET}"
echo "The current working directory is $(pwd)"
read -p "If that looks right, press any key to proceed, or CTRL-C to exit..."
echo
fi
echo -e "${BGREEN}HEAD${RESET}:"
@@ -71,8 +77,42 @@ fi
rm -rf ../build
python3 -m build --outdir ../dist/ ../.
python3 -m build --outdir dist/ ../.
# ----------------------
echo
echo "Building installer zip files for InvokeAI ${VERSION}..."
echo
# get rid of any old ones
rm -f *.zip
rm -rf InvokeAI-Installer
# copy content
mkdir InvokeAI-Installer
for f in templates *.txt *.reg; do
cp -r ${f} InvokeAI-Installer/
done
mkdir InvokeAI-Installer/lib
cp lib/*.py InvokeAI-Installer/lib
# Install scripts
# Mac/Linux
cp install.sh.in InvokeAI-Installer/install.sh
chmod a+x InvokeAI-Installer/install.sh
# Windows
cp install.bat.in InvokeAI-Installer/install.bat
cp WinLongPathsEnabled.reg InvokeAI-Installer/
FILENAME=InvokeAI-installer-$VERSION.zip
# Zip everything up
zip -r ${FILENAME} InvokeAI-Installer
echo
echo -e "${BGREEN}Built installer: ./${FILENAME}${RESET}"
echo -e "${BGREEN}Built PyPi distribution: ./dist${RESET}"
# clean up, but only if we are not in a github action
@@ -85,7 +125,9 @@ fi
if [[ ! -z ${CI} ]]; then
echo
echo "Setting GitHub action outputs..."
echo "DIST_PATH=./dist/" >>$GITHUB_OUTPUT
echo "INSTALLER_FILENAME=${FILENAME}" >>$GITHUB_OUTPUT
echo "INSTALLER_PATH=installer/${FILENAME}" >>$GITHUB_OUTPUT
echo "DIST_PATH=installer/dist/" >>$GITHUB_OUTPUT
fi
exit 0

128
installer/install.bat.in Normal file
View File

@@ -0,0 +1,128 @@
@echo off
setlocal EnableExtensions EnableDelayedExpansion
@rem This script requires the user to install Python 3.10 or higher. All other
@rem requirements are downloaded as needed.
@rem change to the script's directory
PUSHD "%~dp0"
set "no_cache_dir=--no-cache-dir"
if "%1" == "use-cache" (
set "no_cache_dir="
)
@rem Config
@rem The version in the next line is replaced by an up to date release number
@rem when create_installer.sh is run. Change the release number there.
set INSTRUCTIONS=https://invoke-ai.github.io/InvokeAI/installation/INSTALL_AUTOMATED/
set TROUBLESHOOTING=https://invoke-ai.github.io/InvokeAI/help/FAQ/
set PYTHON_URL=https://www.python.org/downloads/windows/
set MINIMUM_PYTHON_VERSION=3.10.0
set PYTHON_URL=https://www.python.org/downloads/release/python-3109/
set err_msg=An error has occurred and the script could not continue.
@rem --------------------------- Intro -------------------------------
echo This script will install InvokeAI and its dependencies.
echo.
echo BEFORE YOU START PLEASE MAKE SURE TO DO THE FOLLOWING
echo 1. Install python 3.10 or 3.11. Python version 3.9 is no longer supported.
echo 2. Double-click on the file WinLongPathsEnabled.reg in order to
echo enable long path support on your system.
echo 3. Install the Visual C++ core libraries.
echo Please download and install the libraries from:
echo https://learn.microsoft.com/en-US/cpp/windows/latest-supported-vc-redist?view=msvc-170
echo.
echo See %INSTRUCTIONS% for more details.
echo.
echo FOR THE BEST USER EXPERIENCE WE SUGGEST MAXIMIZING THIS WINDOW NOW.
pause
@rem ---------------------------- check Python version ---------------
echo ***** Checking and Updating Python *****
call python --version >.tmp1 2>.tmp2
if %errorlevel% == 1 (
set err_msg=Please install Python 3.10-11. See %INSTRUCTIONS% for details.
goto err_exit
)
for /f "tokens=2" %%i in (.tmp1) do set python_version=%%i
if "%python_version%" == "" (
set err_msg=No python was detected on your system. Please install Python version %MINIMUM_PYTHON_VERSION% or higher. We recommend Python 3.10.12 from %PYTHON_URL%
goto err_exit
)
call :compareVersions %MINIMUM_PYTHON_VERSION% %python_version%
if %errorlevel% == 1 (
set err_msg=Your version of Python is too low. You need at least %MINIMUM_PYTHON_VERSION% but you have %python_version%. We recommend Python 3.10.12 from %PYTHON_URL%
goto err_exit
)
@rem Cleanup
del /q .tmp1 .tmp2
@rem -------------- Install and Configure ---------------
call python .\lib\main.py
pause
exit /b
@rem ------------------------ Subroutines ---------------
@rem routine to do comparison of semantic version numbers
@rem found at https://stackoverflow.com/questions/15807762/compare-version-numbers-in-batch-file
:compareVersions
::
:: Compares two version numbers and returns the result in the ERRORLEVEL
::
:: Returns 1 if version1 > version2
:: 0 if version1 = version2
:: -1 if version1 < version2
::
:: The nodes must be delimited by . or , or -
::
:: Nodes are normally strictly numeric, without a 0 prefix. A letter suffix
:: is treated as a separate node
::
setlocal enableDelayedExpansion
set "v1=%~1"
set "v2=%~2"
call :divideLetters v1
call :divideLetters v2
:loop
call :parseNode "%v1%" n1 v1
call :parseNode "%v2%" n2 v2
if %n1% gtr %n2% exit /b 1
if %n1% lss %n2% exit /b -1
if not defined v1 if not defined v2 exit /b 0
if not defined v1 exit /b -1
if not defined v2 exit /b 1
goto :loop
:parseNode version nodeVar remainderVar
for /f "tokens=1* delims=.,-" %%A in ("%~1") do (
set "%~2=%%A"
set "%~3=%%B"
)
exit /b
:divideLetters versionVar
for %%C in (a b c d e f g h i j k l m n o p q r s t u v w x y z) do set "%~1=!%~1:%%C=.%%C!"
exit /b
:err_exit
echo %err_msg%
echo The installer will exit now.
pause
exit /b
pause
:Trim
SetLocal EnableDelayedExpansion
set Params=%*
for /f "tokens=1*" %%a in ("!Params!") do EndLocal & set %1=%%b
exit /b

40
installer/install.sh.in Executable file
View File

@@ -0,0 +1,40 @@
#!/bin/bash
# make sure we are not already in a venv
# (don't need to check status)
deactivate >/dev/null 2>&1
scriptdir=$(dirname "$0")
cd $scriptdir
function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; }
MINIMUM_PYTHON_VERSION=3.10.0
MAXIMUM_PYTHON_VERSION=3.11.100
PYTHON=""
for candidate in python3.11 python3.10 python3 python ; do
if ppath=`which $candidate 2>/dev/null`; then
# when using `pyenv`, the executable for an inactive Python version will exist but will not be operational
# we check that this found executable can actually run
if [ $($candidate --version &>/dev/null; echo ${PIPESTATUS}) -gt 0 ]; then continue; fi
python_version=$($ppath -V | awk '{ print $2 }')
if [ $(version $python_version) -ge $(version "$MINIMUM_PYTHON_VERSION") ]; then
if [ $(version $python_version) -le $(version "$MAXIMUM_PYTHON_VERSION") ]; then
PYTHON=$ppath
break
fi
fi
fi
done
if [ -z "$PYTHON" ]; then
echo "A suitable Python interpreter could not be found"
echo "Please install Python $MINIMUM_PYTHON_VERSION or higher (maximum $MAXIMUM_PYTHON_VERSION) before running this script. See instructions at $INSTRUCTIONS for help."
read -p "Press any key to exit"
exit -1
fi
echo "For the best user experience we suggest enlarging or maximizing this window now."
exec $PYTHON ./lib/main.py ${@}
read -p "Press any key to exit"

438
installer/lib/installer.py Normal file
View File

@@ -0,0 +1,438 @@
# Copyright (c) 2023 Eugene Brodsky (https://github.com/ebr)
"""
InvokeAI installer script
"""
import locale
import os
import platform
import re
import shutil
import subprocess
import sys
import venv
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional, Tuple
SUPPORTED_PYTHON = ">=3.10.0,<=3.11.100"
INSTALLER_REQS = ["rich", "semver", "requests", "plumbum", "prompt-toolkit"]
BOOTSTRAP_VENV_PREFIX = "invokeai-installer-tmp"
DOCS_URL = "https://invoke-ai.github.io/InvokeAI/"
DISCORD_URL = "https://discord.gg/ZmtBAhwWhy"
OS = platform.uname().system
ARCH = platform.uname().machine
VERSION = "latest"
def get_version_from_wheel_filename(wheel_filename: str) -> str:
match = re.search(r"-(\d+\.\d+\.\d+)", wheel_filename)
if match:
version = match.group(1)
return version
else:
raise ValueError(f"Could not extract version from wheel filename: {wheel_filename}")
class Installer:
"""
Deploys an InvokeAI installation into a given path
"""
reqs: list[str] = INSTALLER_REQS
def __init__(self) -> None:
if os.getenv("VIRTUAL_ENV") is not None:
print("A virtual environment is already activated. Please 'deactivate' before installation.")
sys.exit(-1)
self.bootstrap()
self.available_releases = get_github_releases()
def mktemp_venv(self) -> TemporaryDirectory[str]:
"""
Creates a temporary virtual environment for the installer itself
:return: path to the created virtual environment directory
:rtype: TemporaryDirectory
"""
# Cleaning up temporary directories on Windows results in a race condition
# and a stack trace.
# `ignore_cleanup_errors` was only added in Python 3.10
if OS == "Windows" and int(platform.python_version_tuple()[1]) >= 10:
venv_dir = TemporaryDirectory(prefix=BOOTSTRAP_VENV_PREFIX, ignore_cleanup_errors=True)
else:
venv_dir = TemporaryDirectory(prefix=BOOTSTRAP_VENV_PREFIX)
venv.create(venv_dir.name, with_pip=True)
self.venv_dir = venv_dir
set_sys_path(Path(venv_dir.name))
return venv_dir
def bootstrap(self, verbose: bool = False) -> TemporaryDirectory[str] | None:
"""
Bootstrap the installer venv with packages required at install time
"""
print("Initializing the installer. This may take a minute - please wait...")
venv_dir = self.mktemp_venv()
pip = get_pip_from_venv(Path(venv_dir.name))
cmd = [pip, "install", "--require-virtualenv", "--use-pep517"]
cmd.extend(self.reqs)
try:
# upgrade pip to the latest version to avoid a confusing message
res = upgrade_pip(Path(venv_dir.name))
if verbose:
print(res)
# run the install prerequisites installation
res = subprocess.check_output(cmd).decode()
if verbose:
print(res)
return venv_dir
except subprocess.CalledProcessError as e:
print(e)
def app_venv(self, venv_parent: Path) -> Path:
"""
Create a virtualenv for the InvokeAI installation
"""
venv_dir = venv_parent / ".venv"
# Prefer to copy python executables
# so that updates to system python don't break InvokeAI
try:
venv.create(venv_dir, with_pip=True)
# If installing over an existing environment previously created with symlinks,
# the executables will fail to copy. Keep symlinks in that case
except shutil.SameFileError:
venv.create(venv_dir, with_pip=True, symlinks=True)
return venv_dir
def install(
self,
root: str = "~/invokeai",
yes_to_all: bool = False,
find_links: Optional[str] = None,
wheel: Optional[Path] = None,
) -> None:
"""Install the InvokeAI application into the given runtime path
Args:
root: Destination path for the installation
yes_to_all: Accept defaults to all questions
find_links: A local directory to search for requirement wheels before going to remote indexes
wheel: A wheel file to install
"""
import messages
if wheel:
messages.installing_from_wheel(wheel.name)
version = get_version_from_wheel_filename(wheel.name)
else:
messages.welcome(self.available_releases)
version = messages.choose_version(self.available_releases)
auto_dest = Path(os.environ.get("INVOKEAI_ROOT", root)).expanduser().resolve()
destination = auto_dest if yes_to_all else messages.dest_path(root)
if destination is None:
print("Could not find or create the destination directory. Installation cancelled.")
sys.exit(0)
# create the venv for the app
self.venv = self.app_venv(venv_parent=destination)
self.instance = InvokeAiInstance(runtime=destination, venv=self.venv, version=version)
# install dependencies and the InvokeAI application
(extra_index_url, optional_modules) = get_torch_source() if not yes_to_all else (None, None)
self.instance.install(extra_index_url, optional_modules, find_links, wheel)
# install the launch/update scripts into the runtime directory
self.instance.install_user_scripts()
message = f"""
*** Installation Successful ***
To start the application, run:
{destination}/invoke.{"bat" if sys.platform == "win32" else "sh"}
For more information, troubleshooting and support, visit our docs at:
{DOCS_URL}
Join the community on Discord:
{DISCORD_URL}
"""
print(message)
class InvokeAiInstance:
"""
Manages an installed instance of InvokeAI, comprising a virtual environment and a runtime directory.
The virtual environment *may* reside within the runtime directory.
A single runtime directory *may* be shared by multiple virtual environments, though this isn't currently tested or supported.
"""
def __init__(self, runtime: Path, venv: Path, version: str = "stable") -> None:
self.runtime = runtime
self.venv = venv
self.pip = get_pip_from_venv(venv)
self.version = version
set_sys_path(venv)
os.environ["INVOKEAI_ROOT"] = str(self.runtime.expanduser().resolve())
os.environ["VIRTUAL_ENV"] = str(self.venv.expanduser().resolve())
upgrade_pip(venv)
def get(self) -> tuple[Path, Path]:
"""
Get the location of the virtualenv directory for this installation
:return: Paths of the runtime and the venv directory
:rtype: tuple[Path, Path]
"""
return (self.runtime, self.venv)
def install(
self,
extra_index_url: Optional[str] = None,
optional_modules: Optional[str] = None,
find_links: Optional[str] = None,
wheel: Optional[Path] = None,
):
"""Install the package from PyPi or a wheel, if provided.
Args:
extra_index_url: the "--extra-index-url ..." line for pip to look in extra indexes.
optional_modules: optional modules to install using "[module1,module2]" format.
find_links: path to a directory containing wheels to be searched prior to going to the internet
wheel: a wheel file to install
"""
import messages
# not currently used, but may be useful for "install most recent version" option
if self.version == "prerelease":
version = None
pre_flag = "--pre"
elif self.version == "stable":
version = None
pre_flag = None
else:
version = self.version
pre_flag = None
src = "invokeai"
if optional_modules:
src += optional_modules
if version:
src += f"=={version}"
messages.simple_banner("Installing the InvokeAI Application :art:")
from plumbum import FG, ProcessExecutionError, local
pip = local[self.pip]
# Uninstall xformers if it is present; the correct version of it will be reinstalled if needed
_ = pip["uninstall", "-yqq", "xformers"] & FG
pipeline = pip[
"install",
"--require-virtualenv",
"--force-reinstall",
"--use-pep517",
str(src) if not wheel else str(wheel),
"--find-links" if find_links is not None else None,
find_links,
"--extra-index-url" if extra_index_url is not None else None,
extra_index_url,
pre_flag if not wheel else None, # Ignore the flag if we are installing a wheel
]
try:
_ = pipeline & FG
except ProcessExecutionError as e:
print(f"Error: {e}")
print(
"Could not install InvokeAI. Please try downloading the latest version of the installer and install again."
)
sys.exit(1)
def install_user_scripts(self):
"""
Copy the launch and update scripts to the runtime dir
"""
ext = "bat" if OS == "Windows" else "sh"
scripts = ["invoke"]
for script in scripts:
src = Path(__file__).parent / ".." / "templates" / f"{script}.{ext}.in"
dest = self.runtime / f"{script}.{ext}"
shutil.copy(src, dest)
os.chmod(dest, 0o0755)
### Utility functions ###
def get_pip_from_venv(venv_path: Path) -> str:
"""
Given a path to a virtual environment, get the absolute path to the `pip` executable
in a cross-platform fashion. Does not validate that the pip executable
actually exists in the virtualenv.
:param venv_path: Path to the virtual environment
:type venv_path: Path
:return: Absolute path to the pip executable
:rtype: str
"""
pip = "Scripts\\pip.exe" if OS == "Windows" else "bin/pip"
return str(venv_path.expanduser().resolve() / pip)
def upgrade_pip(venv_path: Path) -> str | None:
"""
Upgrade the pip executable in the given virtual environment
"""
python = "Scripts\\python.exe" if OS == "Windows" else "bin/python"
python = str(venv_path.expanduser().resolve() / python)
try:
result = subprocess.check_output([python, "-m", "pip", "install", "--upgrade", "pip"]).decode(
encoding=locale.getpreferredencoding()
)
except subprocess.CalledProcessError as e:
print(e)
result = None
return result
def set_sys_path(venv_path: Path) -> None:
"""
Given a path to a virtual environment, set the sys.path, in a cross-platform fashion,
such that packages from the given venv may be imported in the current process.
Ensure that the packages from system environment are not visible (emulate
the virtual env 'activate' script) - this doesn't work on Windows yet.
:param venv_path: Path to the virtual environment
:type venv_path: Path
"""
# filter out any paths in sys.path that may be system- or user-wide
# but leave the temporary bootstrap virtualenv as it contains packages we
# temporarily need at install time
sys.path = list(filter(lambda p: not p.endswith("-packages") or p.find(BOOTSTRAP_VENV_PREFIX) != -1, sys.path))
# determine site-packages/lib directory location for the venv
lib = "Lib" if OS == "Windows" else f"lib/python{sys.version_info.major}.{sys.version_info.minor}"
# add the site-packages location to the venv
sys.path.append(str(Path(venv_path, lib, "site-packages").expanduser().resolve()))
def get_github_releases() -> tuple[list[str], list[str]] | None:
"""
Query Github for published (pre-)release versions.
Return a tuple where the first element is a list of stable releases and the second element is a list of pre-releases.
Return None if the query fails for any reason.
"""
import requests
## get latest releases using github api
url = "https://api.github.com/repos/invoke-ai/InvokeAI/releases"
releases: list[str] = []
pre_releases: list[str] = []
try:
res = requests.get(url)
res.raise_for_status()
tag_info = res.json()
for tag in tag_info:
if not tag["prerelease"]:
releases.append(tag["tag_name"].lstrip("v"))
else:
pre_releases.append(tag["tag_name"].lstrip("v"))
except requests.HTTPError as e:
print(f"Error: {e}")
print("Could not fetch version information from GitHub. Please check your network connection and try again.")
return
except Exception as e:
print(f"Error: {e}")
print("An unexpected error occurred while trying to fetch version information from GitHub. Please try again.")
return
releases.sort(reverse=True)
pre_releases.sort(reverse=True)
return releases, pre_releases
def get_torch_source() -> Tuple[str | None, str | None]:
"""
Determine the extra index URL for pip to use for torch installation.
This depends on the OS and the graphics accelerator in use.
This is only applicable to Windows and Linux, since PyTorch does not
offer accelerated builds for macOS.
Prefer CUDA-enabled wheels if the user wasn't sure of their GPU, as it will fallback to CPU if possible.
A NoneType return means just go to PyPi.
:return: tuple consisting of (extra index url or None, optional modules to load or None)
:rtype: list
"""
from messages import GpuType, select_gpu
# device can be one of: "cuda", "rocm", "cpu", "cuda_and_dml, autodetect"
device = select_gpu()
# The correct extra index URLs for torch are inconsistent, see https://pytorch.org/get-started/locally/#start-locally
url = None
optional_modules: str | None = None
if OS == "Linux":
if device == GpuType.ROCM:
url = "https://download.pytorch.org/whl/rocm6.1"
elif device == GpuType.CPU:
url = "https://download.pytorch.org/whl/cpu"
elif device == GpuType.CUDA:
url = "https://download.pytorch.org/whl/cu124"
optional_modules = "[onnx-cuda]"
elif device == GpuType.CUDA_WITH_XFORMERS:
url = "https://download.pytorch.org/whl/cu124"
optional_modules = "[xformers,onnx-cuda]"
elif OS == "Windows":
if device == GpuType.CUDA:
url = "https://download.pytorch.org/whl/cu124"
optional_modules = "[onnx-cuda]"
elif device == GpuType.CUDA_WITH_XFORMERS:
url = "https://download.pytorch.org/whl/cu124"
optional_modules = "[xformers,onnx-cuda]"
elif device.value == "cpu":
# CPU uses the default PyPi index, no optional modules
pass
elif OS == "Darwin":
# macOS uses the default PyPi index, no optional modules
pass
# Fall back to defaults
return (url, optional_modules)

57
installer/lib/main.py Normal file
View File

@@ -0,0 +1,57 @@
"""
InvokeAI Installer
"""
import argparse
import os
from pathlib import Path
from installer import Installer
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-r",
"--root",
dest="root",
type=str,
help="Destination path for installation",
default=os.environ.get("INVOKEAI_ROOT") or "~/invokeai",
)
parser.add_argument(
"-y",
"--yes",
"--yes-to-all",
dest="yes_to_all",
action="store_true",
help="Assume default answers to all questions",
default=False,
)
parser.add_argument(
"--find-links",
dest="find_links",
help="Specifies a directory of local wheel files to be searched prior to searching the online repositories.",
type=Path,
default=None,
)
parser.add_argument(
"--wheel",
dest="wheel",
help="Specifies a wheel for the InvokeAI package. Used for troubleshooting or testing prereleases.",
type=Path,
default=None,
)
args = parser.parse_args()
inst = Installer()
try:
inst.install(**args.__dict__)
except KeyboardInterrupt:
print("\n")
print("Ctrl-C pressed. Aborting.")
print("Come back soon!")

342
installer/lib/messages.py Normal file
View File

@@ -0,0 +1,342 @@
# Copyright (c) 2023 Eugene Brodsky (https://github.com/ebr)
"""
Installer user interaction
"""
import os
import platform
from enum import Enum
from pathlib import Path
from typing import Optional
from prompt_toolkit import prompt
from prompt_toolkit.completion import FuzzyWordCompleter, PathCompleter
from prompt_toolkit.validation import Validator
from rich import box, print
from rich.console import Console, Group, group
from rich.panel import Panel
from rich.prompt import Confirm
from rich.style import Style
from rich.syntax import Syntax
from rich.text import Text
OS = platform.uname().system
ARCH = platform.uname().machine
if OS == "Windows":
# Windows terminals look better without a background colour
console = Console(style=Style(color="grey74"))
else:
console = Console(style=Style(color="grey74", bgcolor="grey19"))
def welcome(available_releases: tuple[list[str], list[str]] | None = None) -> None:
@group()
def text():
if (platform_specific := _platform_specific_help()) is not None:
yield platform_specific
yield ""
yield Text.from_markup(
"Some of the installation steps take a long time to run. Please be patient. If the script appears to hang for more than 10 minutes, please interrupt with [i]Control-C[/] and retry.",
justify="center",
)
if available_releases is not None:
latest_stable = available_releases[0][0]
last_pre = available_releases[1][0]
yield ""
yield Text.from_markup(
f"[red3]🠶[/] Latest stable release (recommended): [b bright_white]{latest_stable}", justify="center"
)
yield Text.from_markup(
f"[red3]🠶[/] Last published pre-release version: [b bright_white]{last_pre}", justify="center"
)
console.rule()
print(
Panel(
title="[bold wheat1]Welcome to the InvokeAI Installer",
renderable=text(),
box=box.DOUBLE,
expand=True,
padding=(1, 2),
style=Style(bgcolor="grey23", color="orange1"),
subtitle=f"[bold grey39]{OS}-{ARCH}",
)
)
console.line()
def installing_from_wheel(wheel_filename: str) -> None:
"""Display a message about installing from a wheel"""
@group()
def text():
yield Text.from_markup(f"You are installing from a wheel file: [bold]{wheel_filename}\n")
yield Text.from_markup(
"[bold orange3]If you are not sure why you are doing this, you should cancel and install InvokeAI normally."
)
console.print(
Panel(
title="Installing from Wheel",
renderable=text(),
box=box.DOUBLE,
expand=True,
padding=(1, 2),
)
)
should_proceed = Confirm.ask("Do you want to proceed?")
if not should_proceed:
console.print("Installation cancelled.")
exit()
def choose_version(available_releases: tuple[list[str], list[str]] | None = None) -> str:
"""
Prompt the user to choose an Invoke version to install
"""
# short circuit if we couldn't get a version list
# still try to install the latest stable version
if available_releases is None:
return "stable"
console.print(":grey_question: [orange3]Please choose an Invoke version to install.")
choices = available_releases[0] + available_releases[1]
response = prompt(
message=f" <Enter> to install the recommended release ({choices[0]}). <Tab> or type to pick a version: ",
complete_while_typing=True,
completer=FuzzyWordCompleter(choices),
)
console.print(f" Version {choices[0] if response == '' else response} will be installed.")
console.line()
return "stable" if response == "" else response
def confirm_install(dest: Path) -> bool:
if dest.exists():
print(f":stop_sign: Directory {dest} already exists!")
print(" Is this location correct?")
default = False
else:
print(f":file_folder: InvokeAI will be installed in {dest}")
default = True
dest_confirmed = Confirm.ask(" Please confirm:", default=default)
console.line()
return dest_confirmed
def dest_path(dest: Optional[str | Path] = None) -> Path | None:
"""
Prompt the user for the destination path and create the path
:param dest: a filesystem path, defaults to None
:type dest: str, optional
:return: absolute path to the created installation directory
:rtype: Path
"""
if dest is not None:
dest = Path(dest).expanduser().resolve()
else:
dest = Path.cwd().expanduser().resolve()
prev_dest = init_path = dest
dest_confirmed = False
while not dest_confirmed:
browse_start = (dest or Path.cwd()).expanduser().resolve()
path_completer = PathCompleter(
only_directories=True,
expanduser=True,
get_paths=lambda: [str(browse_start)], # noqa: B023
# get_paths=lambda: [".."].extend(list(browse_start.iterdir()))
)
console.line()
console.print(f":grey_question: [orange3]Please select the install destination:[/] \\[{browse_start}]: ")
selected = prompt(
">>> ",
complete_in_thread=True,
completer=path_completer,
default=str(browse_start) + os.sep,
vi_mode=True,
complete_while_typing=True,
# Test that this is not needed on Windows
# complete_style=CompleteStyle.READLINE_LIKE,
)
prev_dest = dest
dest = Path(selected)
console.line()
dest_confirmed = confirm_install(dest.expanduser().resolve())
if not dest_confirmed:
dest = prev_dest
dest = dest.expanduser().resolve()
try:
dest.mkdir(exist_ok=True, parents=True)
return dest
except PermissionError:
console.print(
f"Failed to create directory {dest} due to insufficient permissions",
style=Style(color="red"),
highlight=True,
)
except OSError:
console.print_exception()
if Confirm.ask("Would you like to try again?"):
dest_path(init_path)
else:
console.rule("Goodbye!")
class GpuType(Enum):
CUDA_WITH_XFORMERS = "xformers"
CUDA = "cuda"
ROCM = "rocm"
CPU = "cpu"
def select_gpu() -> GpuType:
"""
Prompt the user to select the GPU driver
"""
if ARCH == "arm64" and OS != "Darwin":
print(f"Only CPU acceleration is available on {ARCH} architecture. Proceeding with that.")
return GpuType.CPU
nvidia = (
"an [gold1 b]NVIDIA[/] RTX 3060 or newer GPU using CUDA",
GpuType.CUDA,
)
vintage_nvidia = (
"an [gold1 b]NVIDIA[/] RTX 20xx or older GPU using CUDA+xFormers",
GpuType.CUDA_WITH_XFORMERS,
)
amd = (
"an [gold1 b]AMD[/] GPU using ROCm",
GpuType.ROCM,
)
cpu = (
"Do not install any GPU support, use CPU for generation (slow)",
GpuType.CPU,
)
options = []
if OS == "Windows":
options = [nvidia, vintage_nvidia, cpu]
if OS == "Linux":
options = [nvidia, vintage_nvidia, amd, cpu]
elif OS == "Darwin":
options = [cpu]
if len(options) == 1:
return options[0][1]
options = {str(i): opt for i, opt in enumerate(options, 1)}
console.rule(":space_invader: GPU (Graphics Card) selection :space_invader:")
console.print(
Panel(
Group(
"\n".join(
[
f"Detected the [gold1]{OS}-{ARCH}[/] platform",
"",
"See [deep_sky_blue1]https://invoke-ai.github.io/InvokeAI/installation/requirements/[/] to ensure your system meets the minimum requirements.",
"",
"[red3]🠶[/] [b]Your GPU drivers must be correctly installed before using InvokeAI![/] [red3]🠴[/]",
]
),
"",
"Please select the type of GPU installed in your computer.",
Panel(
"\n".join([f"[dark_goldenrod b i]{i}[/] [dark_red]🢒[/]{opt[0]}" for (i, opt) in options.items()]),
box=box.MINIMAL,
),
),
box=box.MINIMAL,
padding=(1, 1),
)
)
choice = prompt(
"Please make your selection: ",
validator=Validator.from_callable(
lambda n: n in options.keys(), error_message="Please select one the above options"
),
)
return options[choice][1]
def simple_banner(message: str) -> None:
"""
A simple banner with a message, defined here for styling consistency
:param message: The message to display
:type message: str
"""
console.rule(message)
# TODO this does not yet work correctly
def windows_long_paths_registry() -> None:
"""
Display a message about applying the Windows long paths registry fix
"""
with open(str(Path(__file__).parent / "WinLongPathsEnabled.reg"), "r", encoding="utf-16le") as code:
syntax = Syntax(code.read(), line_numbers=True, lexer="regedit")
console.print(
Panel(
Group(
"\n".join(
[
"We will now apply a registry fix to enable long paths on Windows. InvokeAI needs this to function correctly. We are asking your permission to modify the Windows Registry on your behalf.",
"",
"This is the change that will be applied:",
str(syntax),
]
)
),
title="Windows Long Paths registry fix",
box=box.HORIZONTALS,
padding=(1, 1),
)
)
def _platform_specific_help() -> Text | None:
if OS == "Darwin":
text = Text.from_markup(
"""[b wheat1]macOS Users![/]\n\nPlease be sure you have the [b wheat1]Xcode command-line tools[/] installed before continuing.\nIf not, cancel with [i]Control-C[/] and follow the Xcode install instructions at [deep_sky_blue1]https://www.freecodecamp.org/news/install-xcode-command-line-tools/[/]."""
)
elif OS == "Windows":
text = Text.from_markup(
"""[b wheat1]Windows Users![/]\n\nBefore you start, please do the following:
1. Double-click on the file [b wheat1]WinLongPathsEnabled.reg[/] in order to
enable long path support on your system.
2. Make sure you have the [b wheat1]Visual C++ core libraries[/] installed. If not, install from
[deep_sky_blue1]https://learn.microsoft.com/en-US/cpp/windows/latest-supported-vc-redist?view=msvc-170[/]"""
)
else:
return
return text

52
installer/readme.txt Normal file
View File

@@ -0,0 +1,52 @@
InvokeAI
Project homepage: https://github.com/invoke-ai/InvokeAI
Preparations:
You will need to install Python 3.10 or higher for this installer
to work. Instructions are given here:
https://invoke-ai.github.io/InvokeAI/installation/INSTALL_AUTOMATED/
Before you start the installer, please open up your system's command
line window (Terminal or Command) and type the commands:
python --version
If all is well, it will print "Python 3.X.X", where the version number
is at least 3.10.*, and not higher than 3.11.*.
If this works, check the version of the Python package manager, pip:
pip --version
You should get a message that indicates that the pip package
installer was derived from Python 3.10 or 3.11. For example:
"pip 22.0.1 from /usr/bin/pip (python 3.10)"
Long Paths on Windows:
If you are on Windows, you will need to enable Windows Long Paths to
run InvokeAI successfully. If you're not sure what this is, you
almost certainly need to do this.
Simply double-click the "WinLongPathsEnabled.reg" file located in
this directory, and approve the Windows warnings. Note that you will
need to have admin privileges in order to do this.
Launching the installer:
Windows: double-click the 'install.bat' file (while keeping it inside
the InvokeAI-Installer folder).
Linux and Mac: Please open the terminal application and run
'./install.sh' (while keeping it inside the InvokeAI-Installer
folder).
The installer will create a directory of your choice and install the
InvokeAI application within it. This directory contains everything you need to run
invokeai. Once InvokeAI is up and running, you may delete the
InvokeAI-Installer folder at your convenience.
For more information, please see
https://invoke-ai.github.io/InvokeAI/installation/INSTALL_AUTOMATED/

View File

@@ -0,0 +1,54 @@
@echo off
PUSHD "%~dp0"
setlocal
call .venv\Scripts\activate.bat
set INVOKEAI_ROOT=.
:start
echo Desired action:
echo 1. Generate images with the browser-based interface
echo 2. Open the developer console
echo 3. Command-line help
echo Q - Quit
echo.
echo To update, download and run the installer from https://github.com/invoke-ai/InvokeAI/releases/latest
echo.
set /P choice="Please enter 1-4, Q: [1] "
if not defined choice set choice=1
IF /I "%choice%" == "1" (
echo Starting the InvokeAI browser-based UI..
python .venv\Scripts\invokeai-web.exe %*
) ELSE IF /I "%choice%" == "2" (
echo Developer Console
echo Python command is:
where python
echo Python version is:
python --version
echo *************************
echo You are now in the system shell, with the local InvokeAI Python virtual environment activated,
echo so that you can troubleshoot this InvokeAI installation as necessary.
echo *************************
echo *** Type `exit` to quit this shell and deactivate the Python virtual environment ***
call cmd /k
) ELSE IF /I "%choice%" == "3" (
echo Displaying command line help...
python .venv\Scripts\invokeai-web.exe --help %*
pause
exit /b
) ELSE IF /I "%choice%" == "q" (
echo Goodbye!
goto ending
) ELSE (
echo Invalid selection
pause
exit /b
)
goto start
endlocal
pause
:ending
exit /b

View File

@@ -0,0 +1,87 @@
#!/bin/bash
# MIT License
# Coauthored by Lincoln Stein, Eugene Brodsky and Joshua Kimsey
# Copyright 2023, The InvokeAI Development Team
####
# This launch script assumes that:
# 1. it is located in the runtime directory,
# 2. the .venv is also located in the runtime directory and is named exactly that
#
# If both of the above are not true, this script will likely not work as intended.
# Activate the virtual environment and run `invoke.py` directly.
####
set -eu
# Ensure we're in the correct folder in case user's CWD is somewhere else
scriptdir=$(dirname $(readlink -f "$0"))
cd "$scriptdir"
. .venv/bin/activate
export INVOKEAI_ROOT="$scriptdir"
# Stash the CLI args - when we prompt for user input, `$@` is overwritten
PARAMS=$@
# This setting allows torch to fall back to CPU for operations that are not supported by MPS on macOS.
if [ "$(uname -s)" == "Darwin" ]; then
export PYTORCH_ENABLE_MPS_FALLBACK=1
fi
# Primary function for the case statement to determine user input
do_choice() {
case $1 in
1)
clear
printf "Generate images with a browser-based interface\n"
invokeai-web $PARAMS
;;
2)
clear
printf "Open the developer console\n"
file_name=$(basename "${BASH_SOURCE[0]}")
bash --init-file "$file_name"
;;
3)
clear
printf "Command-line help\n"
invokeai-web --help
;;
*)
clear
printf "Exiting...\n"
exit
;;
esac
clear
}
# Command-line interface for launching Invoke functions
do_line_input() {
clear
printf "What would you like to do?\n"
printf "1: Generate images using the browser-based interface\n"
printf "2: Open the developer console\n"
printf "3: Command-line help\n"
printf "Q: Quit\n\n"
printf "To update, download and run the installer from https://github.com/invoke-ai/InvokeAI/releases/latest\n\n"
read -p "Please enter 1-4, Q: [1] " yn
choice=${yn:='1'}
do_choice $choice
clear
}
# Main IF statement for launching Invoke, and for checking if the user is in the developer console
if [ "$0" != "bash" ]; then
while true; do
do_line_input
done
else # in developer console
python --version
printf "Press ^D to exit\n"
export PS1="(InvokeAI) \u@\h \w> "
fi

View File

@@ -37,14 +37,7 @@ from invokeai.app.services.style_preset_records.style_preset_records_sqlite impo
from invokeai.app.services.urls.urls_default import LocalUrlService
from invokeai.app.services.workflow_records.workflow_records_sqlite import SqliteWorkflowRecordsStorage
from invokeai.app.services.workflow_thumbnails.workflow_thumbnails_disk import WorkflowThumbnailFileStorageDisk
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
BasicConditioningInfo,
CogView4ConditioningInfo,
ConditioningFieldData,
FLUXConditioningInfo,
SD3ConditioningInfo,
SDXLConditioningInfo,
)
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData
from invokeai.backend.util.logging import InvokeAILogger
from invokeai.version.invokeai_version import __version__
@@ -108,25 +101,10 @@ class ApiDependencies:
images = ImageService()
invocation_cache = MemoryInvocationCache(max_cache_size=config.node_cache_size)
tensors = ObjectSerializerForwardCache(
ObjectSerializerDisk[torch.Tensor](
output_folder / "tensors",
safe_globals=[torch.Tensor],
ephemeral=True,
),
ObjectSerializerDisk[torch.Tensor](output_folder / "tensors", ephemeral=True)
)
conditioning = ObjectSerializerForwardCache(
ObjectSerializerDisk[ConditioningFieldData](
output_folder / "conditioning",
safe_globals=[
ConditioningFieldData,
BasicConditioningInfo,
SDXLConditioningInfo,
FLUXConditioningInfo,
SD3ConditioningInfo,
CogView4ConditioningInfo,
],
ephemeral=True,
),
ObjectSerializerDisk[ConditioningFieldData](output_folder / "conditioning", ephemeral=True)
)
download_queue_service = DownloadQueueService(app_config=configuration, event_bus=events)
model_images_service = ModelImageFileStorageDisk(model_images_folder / "model_images")

View File

@@ -85,7 +85,6 @@ example_model_config = {
"config_path": "string",
"key": "string",
"hash": "string",
"file_size": 1,
"description": "string",
"source": "string",
"converted_at": 0,

View File

@@ -2,7 +2,7 @@ from typing import Optional
from fastapi import Body, Path, Query
from fastapi.routing import APIRouter
from pydantic import BaseModel, Field
from pydantic import BaseModel
from invokeai.app.api.dependencies import ApiDependencies
from invokeai.app.services.session_processor.session_processor_common import SessionProcessorStatus
@@ -15,7 +15,6 @@ from invokeai.app.services.session_queue.session_queue_common import (
CancelByDestinationResult,
ClearResult,
EnqueueBatchResult,
FieldIdentifier,
PruneResult,
RetryItemsResult,
SessionQueueCountsByDestination,
@@ -35,12 +34,6 @@ class SessionQueueAndProcessorStatus(BaseModel):
processor: SessionProcessorStatus
class ValidationRunData(BaseModel):
workflow_id: str = Field(description="The id of the workflow being published.")
input_fields: list[FieldIdentifier] = Body(description="The input fields for the published workflow")
output_fields: list[FieldIdentifier] = Body(description="The output fields for the published workflow")
@session_queue_router.post(
"/{queue_id}/enqueue_batch",
operation_id="enqueue_batch",
@@ -52,10 +45,6 @@ async def enqueue_batch(
queue_id: str = Path(description="The queue id to perform this operation on"),
batch: Batch = Body(description="Batch to process"),
prepend: bool = Body(default=False, description="Whether or not to prepend this batch in the queue"),
validation_run_data: Optional[ValidationRunData] = Body(
default=None,
description="The validation run data to use for this batch. This is only used if this is a validation run.",
),
) -> EnqueueBatchResult:
"""Processes a batch and enqueues the output graphs for execution."""

View File

@@ -106,7 +106,6 @@ async def list_workflows(
tags: Optional[list[str]] = Query(default=None, description="The tags of workflow to get"),
query: Optional[str] = Query(default=None, description="The text to query by (matches name and description)"),
has_been_opened: Optional[bool] = Query(default=None, description="Whether to include/exclude recent workflows"),
is_published: Optional[bool] = Query(default=None, description="Whether to include/exclude published workflows"),
) -> PaginatedResults[WorkflowRecordListItemWithThumbnailDTO]:
"""Gets a page of workflows"""
workflows_with_thumbnails: list[WorkflowRecordListItemWithThumbnailDTO] = []
@@ -119,7 +118,6 @@ async def list_workflows(
categories=categories,
tags=tags,
has_been_opened=has_been_opened,
is_published=is_published,
)
for workflow in workflows.items:
workflows_with_thumbnails.append(

View File

@@ -8,7 +8,6 @@ import sys
import warnings
from abc import ABC, abstractmethod
from enum import Enum
from functools import lru_cache
from inspect import signature
from typing import (
TYPE_CHECKING,
@@ -28,6 +27,7 @@ import semver
from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, create_model
from pydantic.fields import FieldInfo
from pydantic_core import PydanticUndefined
from typing_extensions import TypeAliasType
from invokeai.app.invocations.fields import (
FieldKind,
@@ -100,6 +100,37 @@ class BaseInvocationOutput(BaseModel):
All invocation outputs must use the `@invocation_output` decorator to provide their unique type.
"""
_output_classes: ClassVar[set[BaseInvocationOutput]] = set()
_typeadapter: ClassVar[Optional[TypeAdapter[Any]]] = None
_typeadapter_needs_update: ClassVar[bool] = False
@classmethod
def register_output(cls, output: BaseInvocationOutput) -> None:
"""Registers an invocation output."""
cls._output_classes.add(output)
cls._typeadapter_needs_update = True
@classmethod
def get_outputs(cls) -> Iterable[BaseInvocationOutput]:
"""Gets all invocation outputs."""
return cls._output_classes
@classmethod
def get_typeadapter(cls) -> TypeAdapter[Any]:
"""Gets a pydantc TypeAdapter for the union of all invocation output types."""
if not cls._typeadapter or cls._typeadapter_needs_update:
AnyInvocationOutput = TypeAliasType(
"AnyInvocationOutput", Annotated[Union[tuple(cls._output_classes)], Field(discriminator="type")]
)
cls._typeadapter = TypeAdapter(AnyInvocationOutput)
cls._typeadapter_needs_update = False
return cls._typeadapter
@classmethod
def get_output_types(cls) -> Iterable[str]:
"""Gets all invocation output types."""
return (i.get_type() for i in BaseInvocationOutput.get_outputs())
@staticmethod
def json_schema_extra(schema: dict[str, Any], model_class: Type[BaseInvocationOutput]) -> None:
"""Adds various UI-facing attributes to the invocation output's OpenAPI schema."""
@@ -142,16 +173,76 @@ class BaseInvocation(ABC, BaseModel):
All invocations must use the `@invocation` decorator to provide their unique type.
"""
_invocation_classes: ClassVar[set[BaseInvocation]] = set()
_typeadapter: ClassVar[Optional[TypeAdapter[Any]]] = None
_typeadapter_needs_update: ClassVar[bool] = False
@classmethod
def get_type(cls) -> str:
"""Gets the invocation's type, as provided by the `@invocation` decorator."""
return cls.model_fields["type"].default
@classmethod
def register_invocation(cls, invocation: BaseInvocation) -> None:
"""Registers an invocation."""
cls._invocation_classes.add(invocation)
cls._typeadapter_needs_update = True
@classmethod
def get_typeadapter(cls) -> TypeAdapter[Any]:
"""Gets a pydantc TypeAdapter for the union of all invocation types."""
if not cls._typeadapter or cls._typeadapter_needs_update:
AnyInvocation = TypeAliasType(
"AnyInvocation", Annotated[Union[tuple(cls.get_invocations())], Field(discriminator="type")]
)
cls._typeadapter = TypeAdapter(AnyInvocation)
cls._typeadapter_needs_update = False
return cls._typeadapter
@classmethod
def invalidate_typeadapter(cls) -> None:
"""Invalidates the typeadapter, forcing it to be rebuilt on next access. If the invocation allowlist or
denylist is changed, this should be called to ensure the typeadapter is updated and validation respects
the updated allowlist and denylist."""
cls._typeadapter_needs_update = True
@classmethod
def get_invocations(cls) -> Iterable[BaseInvocation]:
"""Gets all invocations, respecting the allowlist and denylist."""
app_config = get_config()
allowed_invocations: set[BaseInvocation] = set()
for sc in cls._invocation_classes:
invocation_type = sc.get_type()
is_in_allowlist = (
invocation_type in app_config.allow_nodes if isinstance(app_config.allow_nodes, list) else True
)
is_in_denylist = (
invocation_type in app_config.deny_nodes if isinstance(app_config.deny_nodes, list) else False
)
if is_in_allowlist and not is_in_denylist:
allowed_invocations.add(sc)
return allowed_invocations
@classmethod
def get_invocations_map(cls) -> dict[str, BaseInvocation]:
"""Gets a map of all invocation types to their invocation classes."""
return {i.get_type(): i for i in BaseInvocation.get_invocations()}
@classmethod
def get_invocation_types(cls) -> Iterable[str]:
"""Gets all invocation types."""
return (i.get_type() for i in BaseInvocation.get_invocations())
@classmethod
def get_output_annotation(cls) -> BaseInvocationOutput:
"""Gets the invocation's output annotation (i.e. the return annotation of its `invoke()` method)."""
return signature(cls.invoke).return_annotation
@classmethod
def get_invocation_for_type(cls, invocation_type: str) -> BaseInvocation | None:
"""Gets the invocation class for a given invocation type."""
return cls.get_invocations_map().get(invocation_type)
@staticmethod
def json_schema_extra(schema: dict[str, Any], model_class: Type[BaseInvocation]) -> None:
"""Adds various UI-facing attributes to the invocation's OpenAPI schema."""
@@ -249,105 +340,6 @@ class BaseInvocation(ABC, BaseModel):
TBaseInvocation = TypeVar("TBaseInvocation", bound=BaseInvocation)
class InvocationRegistry:
_invocation_classes: ClassVar[set[type[BaseInvocation]]] = set()
_output_classes: ClassVar[set[type[BaseInvocationOutput]]] = set()
@classmethod
def register_invocation(cls, invocation: type[BaseInvocation]) -> None:
"""Registers an invocation."""
cls._invocation_classes.add(invocation)
cls.invalidate_invocation_typeadapter()
@classmethod
@lru_cache(maxsize=1)
def get_invocation_typeadapter(cls) -> TypeAdapter[Any]:
"""Gets a pydantic TypeAdapter for the union of all invocation types.
This is used to parse serialized invocations into the correct invocation class.
This method is cached to avoid rebuilding the TypeAdapter on every access. If the invocation allowlist or
denylist is changed, the cache should be cleared to ensure the TypeAdapter is updated and validation respects
the updated allowlist and denylist.
@see https://docs.pydantic.dev/latest/concepts/type_adapter/
"""
return TypeAdapter(Annotated[Union[tuple(cls.get_invocation_classes())], Field(discriminator="type")])
@classmethod
def invalidate_invocation_typeadapter(cls) -> None:
"""Invalidates the cached invocation type adapter."""
cls.get_invocation_typeadapter.cache_clear()
@classmethod
def get_invocation_classes(cls) -> Iterable[type[BaseInvocation]]:
"""Gets all invocations, respecting the allowlist and denylist."""
app_config = get_config()
allowed_invocations: set[type[BaseInvocation]] = set()
for sc in cls._invocation_classes:
invocation_type = sc.get_type()
is_in_allowlist = (
invocation_type in app_config.allow_nodes if isinstance(app_config.allow_nodes, list) else True
)
is_in_denylist = (
invocation_type in app_config.deny_nodes if isinstance(app_config.deny_nodes, list) else False
)
if is_in_allowlist and not is_in_denylist:
allowed_invocations.add(sc)
return allowed_invocations
@classmethod
def get_invocations_map(cls) -> dict[str, type[BaseInvocation]]:
"""Gets a map of all invocation types to their invocation classes."""
return {i.get_type(): i for i in cls.get_invocation_classes()}
@classmethod
def get_invocation_types(cls) -> Iterable[str]:
"""Gets all invocation types."""
return (i.get_type() for i in cls.get_invocation_classes())
@classmethod
def get_invocation_for_type(cls, invocation_type: str) -> type[BaseInvocation] | None:
"""Gets the invocation class for a given invocation type."""
return cls.get_invocations_map().get(invocation_type)
@classmethod
def register_output(cls, output: "type[TBaseInvocationOutput]") -> None:
"""Registers an invocation output."""
cls._output_classes.add(output)
cls.invalidate_output_typeadapter()
@classmethod
def get_output_classes(cls) -> Iterable[type[BaseInvocationOutput]]:
"""Gets all invocation outputs."""
return cls._output_classes
@classmethod
@lru_cache(maxsize=1)
def get_output_typeadapter(cls) -> TypeAdapter[Any]:
"""Gets a pydantic TypeAdapter for the union of all invocation output types.
This is used to parse serialized invocation outputs into the correct invocation output class.
This method is cached to avoid rebuilding the TypeAdapter on every access. If the invocation allowlist or
denylist is changed, the cache should be cleared to ensure the TypeAdapter is updated and validation respects
the updated allowlist and denylist.
@see https://docs.pydantic.dev/latest/concepts/type_adapter/
"""
return TypeAdapter(Annotated[Union[tuple(cls._output_classes)], Field(discriminator="type")])
@classmethod
def invalidate_output_typeadapter(cls) -> None:
"""Invalidates the cached invocation output type adapter."""
cls.get_output_typeadapter.cache_clear()
@classmethod
def get_output_types(cls) -> Iterable[str]:
"""Gets all invocation output types."""
return (i.get_type() for i in cls.get_output_classes())
RESERVED_NODE_ATTRIBUTE_FIELD_NAMES = {
"id",
"is_intermediate",
@@ -461,8 +453,8 @@ def invocation(
node_pack = cls.__module__.split(".")[0]
# Handle the case where an existing node is being clobbered by the one we are registering
if invocation_type in InvocationRegistry.get_invocation_types():
clobbered_invocation = InvocationRegistry.get_invocation_for_type(invocation_type)
if invocation_type in BaseInvocation.get_invocation_types():
clobbered_invocation = BaseInvocation.get_invocation_for_type(invocation_type)
# This should always be true - we just checked if the invocation type was in the set
assert clobbered_invocation is not None
@@ -547,7 +539,8 @@ def invocation(
)
cls.__doc__ = docstring
InvocationRegistry.register_invocation(cls)
# TODO: how to type this correctly? it's typed as ModelMetaclass, a private class in pydantic
BaseInvocation.register_invocation(cls) # type: ignore
return cls
@@ -572,7 +565,7 @@ def invocation_output(
if re.compile(r"^\S+$").match(output_type) is None:
raise ValueError(f'"output_type" must consist of non-whitespace characters, got "{output_type}"')
if output_type in InvocationRegistry.get_output_types():
if output_type in BaseInvocationOutput.get_output_types():
raise ValueError(f'Invocation type "{output_type}" already exists')
validate_fields(cls.model_fields, output_type)
@@ -593,7 +586,7 @@ def invocation_output(
)
cls.__doc__ = docstring
InvocationRegistry.register_output(cls)
BaseInvocationOutput.register_output(cls) # type: ignore # TODO: how to type this correctly?
return cls

View File

@@ -1,363 +0,0 @@
from typing import Callable, Optional
import torch
import torchvision.transforms as tv_transforms
from diffusers.models.transformers.transformer_cogview4 import CogView4Transformer2DModel
from torchvision.transforms.functional import resize as tv_resize
from tqdm import tqdm
from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
from invokeai.app.invocations.fields import (
CogView4ConditioningField,
DenoiseMaskField,
FieldDescriptions,
Input,
InputField,
LatentsField,
WithBoard,
WithMetadata,
)
from invokeai.app.invocations.model import TransformerField
from invokeai.app.invocations.primitives import LatentsOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.flux.sampling_utils import clip_timestep_schedule_fractional
from invokeai.backend.model_manager.config import BaseModelType
from invokeai.backend.rectified_flow.rectified_flow_inpaint_extension import RectifiedFlowInpaintExtension
from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import CogView4ConditioningInfo
from invokeai.backend.util.devices import TorchDevice
@invocation(
"cogview4_denoise",
title="Denoise - CogView4",
tags=["image", "cogview4"],
category="image",
version="1.0.0",
classification=Classification.Prototype,
)
class CogView4DenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Run the denoising process with a CogView4 model."""
# If latents is provided, this means we are doing image-to-image.
latents: Optional[LatentsField] = InputField(
default=None, description=FieldDescriptions.latents, input=Input.Connection
)
# denoise_mask is used for image-to-image inpainting. Only the masked region is modified.
denoise_mask: Optional[DenoiseMaskField] = InputField(
default=None, description=FieldDescriptions.denoise_mask, input=Input.Connection
)
denoising_start: float = InputField(default=0.0, ge=0, le=1, description=FieldDescriptions.denoising_start)
denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
transformer: TransformerField = InputField(
description=FieldDescriptions.cogview4_model, input=Input.Connection, title="Transformer"
)
positive_conditioning: CogView4ConditioningField = InputField(
description=FieldDescriptions.positive_cond, input=Input.Connection
)
negative_conditioning: CogView4ConditioningField = InputField(
description=FieldDescriptions.negative_cond, input=Input.Connection
)
cfg_scale: float | list[float] = InputField(default=3.5, description=FieldDescriptions.cfg_scale, title="CFG Scale")
width: int = InputField(default=1024, multiple_of=32, description="Width of the generated image.")
height: int = InputField(default=1024, multiple_of=32, description="Height of the generated image.")
steps: int = InputField(default=25, gt=0, description=FieldDescriptions.steps)
seed: int = InputField(default=0, description="Randomness seed for reproducibility.")
@torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput:
latents = self._run_diffusion(context)
latents = latents.detach().to("cpu")
name = context.tensors.save(tensor=latents)
return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
def _prep_inpaint_mask(self, context: InvocationContext, latents: torch.Tensor) -> torch.Tensor | None:
"""Prepare the inpaint mask.
- Loads the mask
- Resizes if necessary
- Casts to same device/dtype as latents
Args:
context (InvocationContext): The invocation context, for loading the inpaint mask.
latents (torch.Tensor): A latent image tensor. Used to determine the target shape, device, and dtype for the
inpaint mask.
Returns:
torch.Tensor | None: Inpaint mask. Values of 0.0 represent the regions to be fully denoised, and 1.0
represent the regions to be preserved.
"""
if self.denoise_mask is None:
return None
mask = context.tensors.load(self.denoise_mask.mask_name)
# The input denoise_mask contains values in [0, 1], where 0.0 represents the regions to be fully denoised, and
# 1.0 represents the regions to be preserved.
# We invert the mask so that the regions to be preserved are 0.0 and the regions to be denoised are 1.0.
mask = 1.0 - mask
_, _, latent_height, latent_width = latents.shape
mask = tv_resize(
img=mask,
size=[latent_height, latent_width],
interpolation=tv_transforms.InterpolationMode.BILINEAR,
antialias=False,
)
mask = mask.to(device=latents.device, dtype=latents.dtype)
return mask
def _load_text_conditioning(
self,
context: InvocationContext,
conditioning_name: str,
dtype: torch.dtype,
device: torch.device,
) -> torch.Tensor:
# Load the conditioning data.
cond_data = context.conditioning.load(conditioning_name)
assert len(cond_data.conditionings) == 1
cogview4_conditioning = cond_data.conditionings[0]
assert isinstance(cogview4_conditioning, CogView4ConditioningInfo)
cogview4_conditioning = cogview4_conditioning.to(dtype=dtype, device=device)
return cogview4_conditioning.glm_embeds
def _get_noise(
self,
batch_size: int,
num_channels_latents: int,
height: int,
width: int,
dtype: torch.dtype,
device: torch.device,
seed: int,
) -> torch.Tensor:
# We always generate noise on the same device and dtype then cast to ensure consistency across devices/dtypes.
rand_device = "cpu"
rand_dtype = torch.float16
return torch.randn(
batch_size,
num_channels_latents,
int(height) // LATENT_SCALE_FACTOR,
int(width) // LATENT_SCALE_FACTOR,
device=rand_device,
dtype=rand_dtype,
generator=torch.Generator(device=rand_device).manual_seed(seed),
).to(device=device, dtype=dtype)
def _prepare_cfg_scale(self, num_timesteps: int) -> list[float]:
"""Prepare the CFG scale list.
Args:
num_timesteps (int): The number of timesteps in the scheduler. Could be different from num_steps depending
on the scheduler used (e.g. higher order schedulers).
Returns:
list[float]: _description_
"""
if isinstance(self.cfg_scale, float):
cfg_scale = [self.cfg_scale] * num_timesteps
elif isinstance(self.cfg_scale, list):
assert len(self.cfg_scale) == num_timesteps
cfg_scale = self.cfg_scale
else:
raise ValueError(f"Invalid CFG scale type: {type(self.cfg_scale)}")
return cfg_scale
def _convert_timesteps_to_sigmas(self, image_seq_len: int, timesteps: torch.Tensor) -> list[float]:
# The logic to prepare the timestep / sigma schedule is based on:
# https://github.com/huggingface/diffusers/blob/b38450d5d2e5b87d5ff7088ee5798c85587b9635/src/diffusers/pipelines/cogview4/pipeline_cogview4.py#L575-L595
# The default FlowMatchEulerDiscreteScheduler configs are based on:
# https://huggingface.co/THUDM/CogView4-6B/blob/fb6f57289c73ac6d139e8d81bd5a4602d1877847/scheduler/scheduler_config.json
# This implementation differs slightly from the original for the sake of simplicity (differs in terminal value
# handling, not quantizing timesteps to integers, etc.).
def calculate_timestep_shift(
image_seq_len: int, base_seq_len: int = 256, base_shift: float = 0.25, max_shift: float = 0.75
) -> float:
m = (image_seq_len / base_seq_len) ** 0.5
mu = m * max_shift + base_shift
return mu
def time_shift_linear(mu: float, sigma: float, t: torch.Tensor) -> torch.Tensor:
return mu / (mu + (1 / t - 1) ** sigma)
mu = calculate_timestep_shift(image_seq_len)
sigmas = time_shift_linear(mu, 1.0, timesteps)
return sigmas.tolist()
def _run_diffusion(
self,
context: InvocationContext,
):
inference_dtype = torch.bfloat16
device = TorchDevice.choose_torch_device()
transformer_info = context.models.load(self.transformer.transformer)
assert isinstance(transformer_info.model, CogView4Transformer2DModel)
# Load/process the conditioning data.
# TODO(ryand): Make CFG optional.
do_classifier_free_guidance = True
pos_prompt_embeds = self._load_text_conditioning(
context=context,
conditioning_name=self.positive_conditioning.conditioning_name,
dtype=inference_dtype,
device=device,
)
neg_prompt_embeds = self._load_text_conditioning(
context=context,
conditioning_name=self.negative_conditioning.conditioning_name,
dtype=inference_dtype,
device=device,
)
# Prepare misc. conditioning variables.
# TODO(ryand): We could expose these as params (like with SDXL). But, we should experiment to see if they are
# useful first.
original_size = torch.tensor([(self.height, self.width)], dtype=pos_prompt_embeds.dtype, device=device)
target_size = torch.tensor([(self.height, self.width)], dtype=pos_prompt_embeds.dtype, device=device)
crops_coords_top_left = torch.tensor([(0, 0)], dtype=pos_prompt_embeds.dtype, device=device)
# Prepare the timestep / sigma schedule.
patch_size = transformer_info.model.config.patch_size # type: ignore
assert isinstance(patch_size, int)
image_seq_len = ((self.height // LATENT_SCALE_FACTOR) * (self.width // LATENT_SCALE_FACTOR)) // (patch_size**2)
# We add an extra step to the end to account for the final timestep of 0.0.
timesteps: list[float] = torch.linspace(1, 0, self.steps + 1).tolist()
# Clip the timesteps schedule based on denoising_start and denoising_end.
timesteps = clip_timestep_schedule_fractional(timesteps, self.denoising_start, self.denoising_end)
sigmas = self._convert_timesteps_to_sigmas(image_seq_len, torch.tensor(timesteps))
total_steps = len(timesteps) - 1
# Prepare the CFG scale list.
cfg_scale = self._prepare_cfg_scale(total_steps)
# Load the input latents, if provided.
init_latents = context.tensors.load(self.latents.latents_name) if self.latents else None
if init_latents is not None:
init_latents = init_latents.to(device=device, dtype=inference_dtype)
# Generate initial latent noise.
num_channels_latents = transformer_info.model.config.in_channels # type: ignore
assert isinstance(num_channels_latents, int)
noise = self._get_noise(
batch_size=1,
num_channels_latents=num_channels_latents,
height=self.height,
width=self.width,
dtype=inference_dtype,
device=device,
seed=self.seed,
)
# Prepare input latent image.
if init_latents is not None:
# Noise the init_latents by the appropriate amount for the first timestep.
s_0 = sigmas[0]
latents = s_0 * noise + (1.0 - s_0) * init_latents
else:
# init_latents are not provided, so we are not doing image-to-image (i.e. we are starting from pure noise).
if self.denoising_start > 1e-5:
raise ValueError("denoising_start should be 0 when initial latents are not provided.")
latents = noise
# If len(timesteps) == 1, then short-circuit. We are just noising the input latents, but not taking any
# denoising steps.
if len(timesteps) <= 1:
return latents
# Prepare inpaint extension.
inpaint_mask = self._prep_inpaint_mask(context, latents)
inpaint_extension: RectifiedFlowInpaintExtension | None = None
if inpaint_mask is not None:
assert init_latents is not None
inpaint_extension = RectifiedFlowInpaintExtension(
init_latents=init_latents,
inpaint_mask=inpaint_mask,
noise=noise,
)
step_callback = self._build_step_callback(context)
step_callback(
PipelineIntermediateState(
step=0,
order=1,
total_steps=total_steps,
timestep=int(timesteps[0]),
latents=latents,
),
)
with transformer_info.model_on_device() as (_, transformer):
assert isinstance(transformer, CogView4Transformer2DModel)
# Denoising loop
for step_idx in tqdm(range(total_steps)):
t_curr = timesteps[step_idx]
sigma_curr = sigmas[step_idx]
sigma_prev = sigmas[step_idx + 1]
# Expand the timestep to match the latent model input.
# Multiply by 1000 to match the default FlowMatchEulerDiscreteScheduler num_train_timesteps.
timestep = torch.tensor([t_curr * 1000], device=device).expand(latents.shape[0])
# TODO(ryand): Support both sequential and batched CFG inference.
noise_pred_cond = transformer(
hidden_states=latents,
encoder_hidden_states=pos_prompt_embeds,
timestep=timestep,
original_size=original_size,
target_size=target_size,
crop_coords=crops_coords_top_left,
return_dict=False,
)[0]
# Apply CFG.
if do_classifier_free_guidance:
noise_pred_uncond = transformer(
hidden_states=latents,
encoder_hidden_states=neg_prompt_embeds,
timestep=timestep,
original_size=original_size,
target_size=target_size,
crop_coords=crops_coords_top_left,
return_dict=False,
)[0]
noise_pred = noise_pred_uncond + cfg_scale[step_idx] * (noise_pred_cond - noise_pred_uncond)
else:
noise_pred = noise_pred_cond
# Compute the previous noisy sample x_t -> x_t-1.
latents_dtype = latents.dtype
# TODO(ryand): Is casting to float32 necessary for precision/stability? I copied this from SD3.
latents = latents.to(dtype=torch.float32)
latents = latents + (sigma_prev - sigma_curr) * noise_pred
latents = latents.to(dtype=latents_dtype)
if inpaint_extension is not None:
latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
step_callback(
PipelineIntermediateState(
step=step_idx + 1,
order=1,
total_steps=total_steps,
timestep=int(t_curr),
latents=latents,
),
)
return latents
def _build_step_callback(self, context: InvocationContext) -> Callable[[PipelineIntermediateState], None]:
def step_callback(state: PipelineIntermediateState) -> None:
context.util.sd_step_callback(state, BaseModelType.CogView4)
return step_callback

View File

@@ -1,69 +0,0 @@
import einops
import torch
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
from invokeai.app.invocations.fields import (
FieldDescriptions,
ImageField,
Input,
InputField,
WithBoard,
WithMetadata,
)
from invokeai.app.invocations.model import VAEField
from invokeai.app.invocations.primitives import LatentsOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.model_manager.load.load_base import LoadedModel
from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
from invokeai.backend.util.devices import TorchDevice
# TODO(ryand): This is effectively a copy of SD3ImageToLatentsInvocation and a subset of ImageToLatentsInvocation. We
# should refactor to avoid this duplication.
@invocation(
"cogview4_i2l",
title="Image to Latents - CogView4",
tags=["image", "latents", "vae", "i2l", "cogview4"],
category="image",
version="1.0.0",
classification=Classification.Prototype,
)
class CogView4ImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Generates latents from an image."""
image: ImageField = InputField(description="The image to encode.")
vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
@staticmethod
def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tensor:
with vae_info as vae:
assert isinstance(vae, AutoencoderKL)
vae.disable_tiling()
image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae.dtype)
with torch.inference_mode():
image_tensor_dist = vae.encode(image_tensor).latent_dist
# TODO: Use seed to make sampling reproducible.
latents: torch.Tensor = image_tensor_dist.sample().to(dtype=vae.dtype)
latents = vae.config.scaling_factor * latents
return latents
@torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput:
image = context.images.get_pil(self.image.image_name)
image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
if image_tensor.dim() == 3:
image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
vae_info = context.models.load(self.vae.vae)
latents = self.vae_encode(vae_info=vae_info, image_tensor=image_tensor)
latents = latents.to("cpu")
name = context.tensors.save(tensor=latents)
return LatentsOutput.build(latents_name=name, latents=latents, seed=None)

View File

@@ -1,86 +0,0 @@
from contextlib import nullcontext
import torch
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
from einops import rearrange
from PIL import Image
from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
from invokeai.app.invocations.fields import (
FieldDescriptions,
Input,
InputField,
LatentsField,
WithBoard,
WithMetadata,
)
from invokeai.app.invocations.model import VAEField
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
from invokeai.backend.util.devices import TorchDevice
# TODO(ryand): This is effectively a copy of SD3LatentsToImageInvocation and a subset of LatentsToImageInvocation. We
# should refactor to avoid this duplication.
@invocation(
"cogview4_l2i",
title="Latents to Image - CogView4",
tags=["latents", "image", "vae", "l2i", "cogview4"],
category="latents",
version="1.0.0",
classification=Classification.Prototype,
)
class CogView4LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Generates an image from latents."""
latents: LatentsField = InputField(description=FieldDescriptions.latents, input=Input.Connection)
vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
def _estimate_working_memory(self, latents: torch.Tensor, vae: AutoencoderKL) -> int:
"""Estimate the working memory required by the invocation in bytes."""
out_h = LATENT_SCALE_FACTOR * latents.shape[-2]
out_w = LATENT_SCALE_FACTOR * latents.shape[-1]
element_size = next(vae.parameters()).element_size()
scaling_constant = 2200 # Determined experimentally.
working_memory = out_h * out_w * element_size * scaling_constant
return int(working_memory)
@torch.no_grad()
def invoke(self, context: InvocationContext) -> ImageOutput:
latents = context.tensors.load(self.latents.latents_name)
vae_info = context.models.load(self.vae.vae)
assert isinstance(vae_info.model, (AutoencoderKL))
estimated_working_memory = self._estimate_working_memory(latents, vae_info.model)
with (
SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes),
vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae),
):
context.util.signal_progress("Running VAE")
assert isinstance(vae, (AutoencoderKL))
latents = latents.to(TorchDevice.choose_torch_device())
vae.disable_tiling()
tiling_context = nullcontext()
# clear memory as vae decode can request a lot
TorchDevice.empty_cache()
with torch.inference_mode(), tiling_context:
# copied from diffusers pipeline
latents = latents / vae.config.scaling_factor
img = vae.decode(latents, return_dict=False)[0]
img = img.clamp(-1, 1)
img = rearrange(img[0], "c h w -> h w c") # noqa: F821
img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy())
TorchDevice.empty_cache()
image_dto = context.images.save(image=img_pil)
return ImageOutput.build(image_dto)

View File

@@ -1,55 +0,0 @@
from invokeai.app.invocations.baseinvocation import (
BaseInvocation,
BaseInvocationOutput,
Classification,
invocation,
invocation_output,
)
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
from invokeai.app.invocations.model import (
GlmEncoderField,
ModelIdentifierField,
TransformerField,
VAEField,
)
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.model_manager.config import SubModelType
@invocation_output("cogview4_model_loader_output")
class CogView4ModelLoaderOutput(BaseInvocationOutput):
"""CogView4 base model loader output."""
transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
glm_encoder: GlmEncoderField = OutputField(description=FieldDescriptions.glm_encoder, title="GLM Encoder")
vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
@invocation(
"cogview4_model_loader",
title="Main Model - CogView4",
tags=["model", "cogview4"],
category="model",
version="1.0.0",
classification=Classification.Prototype,
)
class CogView4ModelLoaderInvocation(BaseInvocation):
"""Loads a CogView4 base model, outputting its submodels."""
model: ModelIdentifierField = InputField(
description=FieldDescriptions.cogview4_model,
ui_type=UIType.CogView4MainModel,
input=Input.Direct,
)
def invoke(self, context: InvocationContext) -> CogView4ModelLoaderOutput:
transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer})
vae = self.model.model_copy(update={"submodel_type": SubModelType.VAE})
glm_tokenizer = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer})
glm_encoder = self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
return CogView4ModelLoaderOutput(
transformer=TransformerField(transformer=transformer, loras=[]),
glm_encoder=GlmEncoderField(tokenizer=glm_tokenizer, text_encoder=glm_encoder),
vae=VAEField(vae=vae),
)

View File

@@ -1,92 +0,0 @@
import torch
from transformers import GlmModel, PreTrainedTokenizerFast
from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, UIComponent
from invokeai.app.invocations.model import GlmEncoderField
from invokeai.app.invocations.primitives import CogView4ConditioningOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
CogView4ConditioningInfo,
ConditioningFieldData,
)
from invokeai.backend.util.devices import TorchDevice
# The CogView4 GLM Text Encoder max sequence length set based on the default in diffusers.
COGVIEW4_GLM_MAX_SEQ_LEN = 1024
@invocation(
"cogview4_text_encoder",
title="Prompt - CogView4",
tags=["prompt", "conditioning", "cogview4"],
category="conditioning",
version="1.0.0",
classification=Classification.Prototype,
)
class CogView4TextEncoderInvocation(BaseInvocation):
"""Encodes and preps a prompt for a cogview4 image."""
prompt: str = InputField(description="Text prompt to encode.", ui_component=UIComponent.Textarea)
glm_encoder: GlmEncoderField = InputField(
title="GLM Encoder",
description=FieldDescriptions.glm_encoder,
input=Input.Connection,
)
@torch.no_grad()
def invoke(self, context: InvocationContext) -> CogView4ConditioningOutput:
glm_embeds = self._glm_encode(context, max_seq_len=COGVIEW4_GLM_MAX_SEQ_LEN)
conditioning_data = ConditioningFieldData(conditionings=[CogView4ConditioningInfo(glm_embeds=glm_embeds)])
conditioning_name = context.conditioning.save(conditioning_data)
return CogView4ConditioningOutput.build(conditioning_name)
def _glm_encode(self, context: InvocationContext, max_seq_len: int) -> torch.Tensor:
prompt = [self.prompt]
# TODO(ryand): Add model inputs to the invocation rather than hard-coding.
with (
context.models.load(self.glm_encoder.text_encoder).model_on_device() as (_, glm_text_encoder),
context.models.load(self.glm_encoder.tokenizer).model_on_device() as (_, glm_tokenizer),
):
context.util.signal_progress("Running GLM text encoder")
assert isinstance(glm_text_encoder, GlmModel)
assert isinstance(glm_tokenizer, PreTrainedTokenizerFast)
text_inputs = glm_tokenizer(
prompt,
padding="longest",
max_length=max_seq_len,
truncation=True,
add_special_tokens=True,
return_tensors="pt",
)
text_input_ids = text_inputs.input_ids
untruncated_ids = glm_tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
assert isinstance(text_input_ids, torch.Tensor)
assert isinstance(untruncated_ids, torch.Tensor)
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
text_input_ids, untruncated_ids
):
removed_text = glm_tokenizer.batch_decode(untruncated_ids[:, max_seq_len - 1 : -1])
context.logger.warning(
"The following part of your input was truncated because `max_sequence_length` is set to "
f" {max_seq_len} tokens: {removed_text}"
)
current_length = text_input_ids.shape[1]
pad_length = (16 - (current_length % 16)) % 16
if pad_length > 0:
pad_ids = torch.full(
(text_input_ids.shape[0], pad_length),
fill_value=glm_tokenizer.pad_token_id,
dtype=text_input_ids.dtype,
device=text_input_ids.device,
)
text_input_ids = torch.cat([pad_ids, text_input_ids], dim=1)
prompt_embeds = glm_text_encoder(
text_input_ids.to(TorchDevice.choose_torch_device()), output_hidden_states=True
).hidden_states[-2]
assert isinstance(prompt_embeds, torch.Tensor)
return prompt_embeds

View File

@@ -1,128 +0,0 @@
# Invocations for ControlNet image preprocessors
# initial implementation by Gregg Helt, 2023
from typing import List, Union
from pydantic import BaseModel, Field, field_validator, model_validator
from invokeai.app.invocations.baseinvocation import (
BaseInvocation,
BaseInvocationOutput,
Classification,
invocation,
invocation_output,
)
from invokeai.app.invocations.fields import (
FieldDescriptions,
ImageField,
InputField,
OutputField,
UIType,
)
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.invocations.util import validate_begin_end_step, validate_weights
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, heuristic_resize
from invokeai.backend.image_util.util import np_to_pil, pil_to_np
class ControlField(BaseModel):
image: ImageField = Field(description="The control image")
control_model: ModelIdentifierField = Field(description="The ControlNet model to use")
control_weight: Union[float, List[float]] = Field(default=1, description="The weight given to the ControlNet")
begin_step_percent: float = Field(
default=0, ge=0, le=1, description="When the ControlNet is first applied (% of total steps)"
)
end_step_percent: float = Field(
default=1, ge=0, le=1, description="When the ControlNet is last applied (% of total steps)"
)
control_mode: CONTROLNET_MODE_VALUES = Field(default="balanced", description="The control mode to use")
resize_mode: CONTROLNET_RESIZE_VALUES = Field(default="just_resize", description="The resize mode to use")
@field_validator("control_weight")
@classmethod
def validate_control_weight(cls, v):
validate_weights(v)
return v
@model_validator(mode="after")
def validate_begin_end_step_percent(self):
validate_begin_end_step(self.begin_step_percent, self.end_step_percent)
return self
@invocation_output("control_output")
class ControlOutput(BaseInvocationOutput):
"""node output for ControlNet info"""
# Outputs
control: ControlField = OutputField(description=FieldDescriptions.control)
@invocation("controlnet", title="ControlNet - SD1.5, SDXL", tags=["controlnet"], category="controlnet", version="1.1.3")
class ControlNetInvocation(BaseInvocation):
"""Collects ControlNet info to pass to other nodes"""
image: ImageField = InputField(description="The control image")
control_model: ModelIdentifierField = InputField(
description=FieldDescriptions.controlnet_model, ui_type=UIType.ControlNetModel
)
control_weight: Union[float, List[float]] = InputField(
default=1.0, ge=-1, le=2, description="The weight given to the ControlNet"
)
begin_step_percent: float = InputField(
default=0, ge=0, le=1, description="When the ControlNet is first applied (% of total steps)"
)
end_step_percent: float = InputField(
default=1, ge=0, le=1, description="When the ControlNet is last applied (% of total steps)"
)
control_mode: CONTROLNET_MODE_VALUES = InputField(default="balanced", description="The control mode used")
resize_mode: CONTROLNET_RESIZE_VALUES = InputField(default="just_resize", description="The resize mode used")
@field_validator("control_weight")
@classmethod
def validate_control_weight(cls, v):
validate_weights(v)
return v
@model_validator(mode="after")
def validate_begin_end_step_percent(self) -> "ControlNetInvocation":
validate_begin_end_step(self.begin_step_percent, self.end_step_percent)
return self
def invoke(self, context: InvocationContext) -> ControlOutput:
return ControlOutput(
control=ControlField(
image=self.image,
control_model=self.control_model,
control_weight=self.control_weight,
begin_step_percent=self.begin_step_percent,
end_step_percent=self.end_step_percent,
control_mode=self.control_mode,
resize_mode=self.resize_mode,
),
)
@invocation(
"heuristic_resize",
title="Heuristic Resize",
tags=["image, controlnet"],
category="image",
version="1.0.1",
classification=Classification.Prototype,
)
class HeuristicResizeInvocation(BaseInvocation):
"""Resize an image using a heuristic method. Preserves edge maps."""
image: ImageField = InputField(description="The image to resize")
width: int = InputField(default=512, ge=1, description="The width to resize to (px)")
height: int = InputField(default=512, ge=1, description="The height to resize to (px)")
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.images.get_pil(self.image.image_name, "RGB")
np_img = pil_to_np(image)
np_resized = heuristic_resize(np_img, (self.width, self.height))
resized = np_to_pil(np_resized)
image_dto = context.images.save(image=resized)
return ImageOutput.build(image_dto)

View File

@@ -0,0 +1,716 @@
# Invocations for ControlNet image preprocessors
# initial implementation by Gregg Helt, 2023
# heavily leverages controlnet_aux package: https://github.com/patrickvonplaten/controlnet_aux
from builtins import bool, float
from pathlib import Path
from typing import Dict, List, Literal, Union
import cv2
import numpy as np
from controlnet_aux import (
ContentShuffleDetector,
LeresDetector,
MediapipeFaceDetector,
MidasDetector,
MLSDdetector,
NormalBaeDetector,
PidiNetDetector,
SamDetector,
ZoeDetector,
)
from controlnet_aux.util import HWC3, ade_palette
from PIL import Image
from pydantic import BaseModel, Field, field_validator, model_validator
from transformers import pipeline
from transformers.pipelines import DepthEstimationPipeline
from invokeai.app.invocations.baseinvocation import (
BaseInvocation,
BaseInvocationOutput,
Classification,
invocation,
invocation_output,
)
from invokeai.app.invocations.fields import (
FieldDescriptions,
ImageField,
InputField,
OutputField,
UIType,
WithBoard,
WithMetadata,
)
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.invocations.util import validate_begin_end_step, validate_weights
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, heuristic_resize
from invokeai.backend.image_util.canny import get_canny_edges
from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector
from invokeai.backend.image_util.hed import HEDProcessor
from invokeai.backend.image_util.lineart import LineartProcessor
from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
from invokeai.backend.image_util.util import np_to_pil, pil_to_np
class ControlField(BaseModel):
image: ImageField = Field(description="The control image")
control_model: ModelIdentifierField = Field(description="The ControlNet model to use")
control_weight: Union[float, List[float]] = Field(default=1, description="The weight given to the ControlNet")
begin_step_percent: float = Field(
default=0, ge=0, le=1, description="When the ControlNet is first applied (% of total steps)"
)
end_step_percent: float = Field(
default=1, ge=0, le=1, description="When the ControlNet is last applied (% of total steps)"
)
control_mode: CONTROLNET_MODE_VALUES = Field(default="balanced", description="The control mode to use")
resize_mode: CONTROLNET_RESIZE_VALUES = Field(default="just_resize", description="The resize mode to use")
@field_validator("control_weight")
@classmethod
def validate_control_weight(cls, v):
validate_weights(v)
return v
@model_validator(mode="after")
def validate_begin_end_step_percent(self):
validate_begin_end_step(self.begin_step_percent, self.end_step_percent)
return self
@invocation_output("control_output")
class ControlOutput(BaseInvocationOutput):
"""node output for ControlNet info"""
# Outputs
control: ControlField = OutputField(description=FieldDescriptions.control)
@invocation("controlnet", title="ControlNet - SD1.5, SDXL", tags=["controlnet"], category="controlnet", version="1.1.3")
class ControlNetInvocation(BaseInvocation):
"""Collects ControlNet info to pass to other nodes"""
image: ImageField = InputField(description="The control image")
control_model: ModelIdentifierField = InputField(
description=FieldDescriptions.controlnet_model, ui_type=UIType.ControlNetModel
)
control_weight: Union[float, List[float]] = InputField(
default=1.0, ge=-1, le=2, description="The weight given to the ControlNet"
)
begin_step_percent: float = InputField(
default=0, ge=0, le=1, description="When the ControlNet is first applied (% of total steps)"
)
end_step_percent: float = InputField(
default=1, ge=0, le=1, description="When the ControlNet is last applied (% of total steps)"
)
control_mode: CONTROLNET_MODE_VALUES = InputField(default="balanced", description="The control mode used")
resize_mode: CONTROLNET_RESIZE_VALUES = InputField(default="just_resize", description="The resize mode used")
@field_validator("control_weight")
@classmethod
def validate_control_weight(cls, v):
validate_weights(v)
return v
@model_validator(mode="after")
def validate_begin_end_step_percent(self) -> "ControlNetInvocation":
validate_begin_end_step(self.begin_step_percent, self.end_step_percent)
return self
def invoke(self, context: InvocationContext) -> ControlOutput:
return ControlOutput(
control=ControlField(
image=self.image,
control_model=self.control_model,
control_weight=self.control_weight,
begin_step_percent=self.begin_step_percent,
end_step_percent=self.end_step_percent,
control_mode=self.control_mode,
resize_mode=self.resize_mode,
),
)
# This invocation exists for other invocations to subclass it - do not register with @invocation!
class ImageProcessorInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Base class for invocations that preprocess images for ControlNet"""
image: ImageField = InputField(description="The image to process")
def run_processor(self, image: Image.Image) -> Image.Image:
# superclass just passes through image without processing
return image
def load_image(self, context: InvocationContext) -> Image.Image:
# allows override for any special formatting specific to the preprocessor
return context.images.get_pil(self.image.image_name, "RGB")
def invoke(self, context: InvocationContext) -> ImageOutput:
self._context = context
raw_image = self.load_image(context)
# image type should be PIL.PngImagePlugin.PngImageFile ?
processed_image = self.run_processor(raw_image)
# currently can't see processed image in node UI without a showImage node,
# so for now setting image_type to RESULT instead of INTERMEDIATE so will get saved in gallery
image_dto = context.images.save(image=processed_image)
"""Builds an ImageOutput and its ImageField"""
processed_image_field = ImageField(image_name=image_dto.image_name)
return ImageOutput(
image=processed_image_field,
# width=processed_image.width,
width=image_dto.width,
# height=processed_image.height,
height=image_dto.height,
# mode=processed_image.mode,
)
@invocation(
"canny_image_processor",
title="Canny Processor",
tags=["controlnet", "canny"],
category="controlnet",
version="1.3.3",
classification=Classification.Deprecated,
)
class CannyImageProcessorInvocation(ImageProcessorInvocation):
"""Canny edge detection for ControlNet"""
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
low_threshold: int = InputField(
default=100, ge=0, le=255, description="The low threshold of the Canny pixel gradient (0-255)"
)
high_threshold: int = InputField(
default=200, ge=0, le=255, description="The high threshold of the Canny pixel gradient (0-255)"
)
def load_image(self, context: InvocationContext) -> Image.Image:
# Keep alpha channel for Canny processing to detect edges of transparent areas
return context.images.get_pil(self.image.image_name, "RGBA")
def run_processor(self, image: Image.Image) -> Image.Image:
processed_image = get_canny_edges(
image,
self.low_threshold,
self.high_threshold,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
)
return processed_image
@invocation(
"hed_image_processor",
title="HED (softedge) Processor",
tags=["controlnet", "hed", "softedge"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class HedImageProcessorInvocation(ImageProcessorInvocation):
"""Applies HED edge detection to image"""
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
# safe not supported in controlnet_aux v0.0.3
# safe: bool = InputField(default=False, description=FieldDescriptions.safe_mode)
scribble: bool = InputField(default=False, description=FieldDescriptions.scribble_mode)
def run_processor(self, image: Image.Image) -> Image.Image:
hed_processor = HEDProcessor()
processed_image = hed_processor.run(
image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
# safe not supported in controlnet_aux v0.0.3
# safe=self.safe,
scribble=self.scribble,
)
return processed_image
@invocation(
"lineart_image_processor",
title="Lineart Processor",
tags=["controlnet", "lineart"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class LineartImageProcessorInvocation(ImageProcessorInvocation):
"""Applies line art processing to image"""
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
coarse: bool = InputField(default=False, description="Whether to use coarse mode")
def run_processor(self, image: Image.Image) -> Image.Image:
lineart_processor = LineartProcessor()
processed_image = lineart_processor.run(
image, detect_resolution=self.detect_resolution, image_resolution=self.image_resolution, coarse=self.coarse
)
return processed_image
@invocation(
"lineart_anime_image_processor",
title="Lineart Anime Processor",
tags=["controlnet", "lineart", "anime"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation):
"""Applies line art anime processing to image"""
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image) -> Image.Image:
processor = LineartAnimeProcessor()
processed_image = processor.run(
image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
)
return processed_image
@invocation(
"midas_depth_image_processor",
title="Midas Depth Processor",
tags=["controlnet", "midas"],
category="controlnet",
version="1.2.4",
classification=Classification.Deprecated,
)
class MidasDepthImageProcessorInvocation(ImageProcessorInvocation):
"""Applies Midas depth processing to image"""
a_mult: float = InputField(default=2.0, ge=0, description="Midas parameter `a_mult` (a = a_mult * PI)")
bg_th: float = InputField(default=0.1, ge=0, description="Midas parameter `bg_th`")
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
# depth_and_normal not supported in controlnet_aux v0.0.3
# depth_and_normal: bool = InputField(default=False, description="whether to use depth and normal mode")
def run_processor(self, image: Image.Image) -> Image.Image:
# TODO: replace from_pretrained() calls with context.models.download_and_cache() (or similar)
midas_processor = MidasDetector.from_pretrained("lllyasviel/Annotators")
processed_image = midas_processor(
image,
a=np.pi * self.a_mult,
bg_th=self.bg_th,
image_resolution=self.image_resolution,
detect_resolution=self.detect_resolution,
# dept_and_normal not supported in controlnet_aux v0.0.3
# depth_and_normal=self.depth_and_normal,
)
return processed_image
@invocation(
"normalbae_image_processor",
title="Normal BAE Processor",
tags=["controlnet"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class NormalbaeImageProcessorInvocation(ImageProcessorInvocation):
"""Applies NormalBae processing to image"""
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image) -> Image.Image:
normalbae_processor = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
processed_image = normalbae_processor(
image, detect_resolution=self.detect_resolution, image_resolution=self.image_resolution
)
return processed_image
@invocation(
"mlsd_image_processor",
title="MLSD Processor",
tags=["controlnet", "mlsd"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class MlsdImageProcessorInvocation(ImageProcessorInvocation):
"""Applies MLSD processing to image"""
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
thr_v: float = InputField(default=0.1, ge=0, description="MLSD parameter `thr_v`")
thr_d: float = InputField(default=0.1, ge=0, description="MLSD parameter `thr_d`")
def run_processor(self, image: Image.Image) -> Image.Image:
mlsd_processor = MLSDdetector.from_pretrained("lllyasviel/Annotators")
processed_image = mlsd_processor(
image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
thr_v=self.thr_v,
thr_d=self.thr_d,
)
return processed_image
@invocation(
"pidi_image_processor",
title="PIDI Processor",
tags=["controlnet", "pidi"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class PidiImageProcessorInvocation(ImageProcessorInvocation):
"""Applies PIDI processing to image"""
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
safe: bool = InputField(default=False, description=FieldDescriptions.safe_mode)
scribble: bool = InputField(default=False, description=FieldDescriptions.scribble_mode)
def run_processor(self, image: Image.Image) -> Image.Image:
pidi_processor = PidiNetDetector.from_pretrained("lllyasviel/Annotators")
processed_image = pidi_processor(
image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
safe=self.safe,
scribble=self.scribble,
)
return processed_image
@invocation(
"content_shuffle_image_processor",
title="Content Shuffle Processor",
tags=["controlnet", "contentshuffle"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation):
"""Applies content shuffle processing to image"""
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
h: int = InputField(default=512, ge=0, description="Content shuffle `h` parameter")
w: int = InputField(default=512, ge=0, description="Content shuffle `w` parameter")
f: int = InputField(default=256, ge=0, description="Content shuffle `f` parameter")
def run_processor(self, image: Image.Image) -> Image.Image:
content_shuffle_processor = ContentShuffleDetector()
processed_image = content_shuffle_processor(
image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
h=self.h,
w=self.w,
f=self.f,
)
return processed_image
# should work with controlnet_aux >= 0.0.4 and timm <= 0.6.13
@invocation(
"zoe_depth_image_processor",
title="Zoe (Depth) Processor",
tags=["controlnet", "zoe", "depth"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class ZoeDepthImageProcessorInvocation(ImageProcessorInvocation):
"""Applies Zoe depth processing to image"""
def run_processor(self, image: Image.Image) -> Image.Image:
zoe_depth_processor = ZoeDetector.from_pretrained("lllyasviel/Annotators")
processed_image = zoe_depth_processor(image)
return processed_image
@invocation(
"mediapipe_face_processor",
title="Mediapipe Face Processor",
tags=["controlnet", "mediapipe", "face"],
category="controlnet",
version="1.2.4",
classification=Classification.Deprecated,
)
class MediapipeFaceProcessorInvocation(ImageProcessorInvocation):
"""Applies mediapipe face processing to image"""
max_faces: int = InputField(default=1, ge=1, description="Maximum number of faces to detect")
min_confidence: float = InputField(default=0.5, ge=0, le=1, description="Minimum confidence for face detection")
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image) -> Image.Image:
mediapipe_face_processor = MediapipeFaceDetector()
processed_image = mediapipe_face_processor(
image,
max_faces=self.max_faces,
min_confidence=self.min_confidence,
image_resolution=self.image_resolution,
detect_resolution=self.detect_resolution,
)
return processed_image
@invocation(
"leres_image_processor",
title="Leres (Depth) Processor",
tags=["controlnet", "leres", "depth"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class LeresImageProcessorInvocation(ImageProcessorInvocation):
"""Applies leres processing to image"""
thr_a: float = InputField(default=0, description="Leres parameter `thr_a`")
thr_b: float = InputField(default=0, description="Leres parameter `thr_b`")
boost: bool = InputField(default=False, description="Whether to use boost mode")
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image) -> Image.Image:
leres_processor = LeresDetector.from_pretrained("lllyasviel/Annotators")
processed_image = leres_processor(
image,
thr_a=self.thr_a,
thr_b=self.thr_b,
boost=self.boost,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
)
return processed_image
@invocation(
"tile_image_processor",
title="Tile Resample Processor",
tags=["controlnet", "tile"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class TileResamplerProcessorInvocation(ImageProcessorInvocation):
"""Tile resampler processor"""
# res: int = InputField(default=512, ge=0, le=1024, description="The pixel resolution for each tile")
down_sampling_rate: float = InputField(default=1.0, ge=1.0, le=8.0, description="Down sampling rate")
# tile_resample copied from sd-webui-controlnet/scripts/processor.py
def tile_resample(
self,
np_img: np.ndarray,
res=512, # never used?
down_sampling_rate=1.0,
):
np_img = HWC3(np_img)
if down_sampling_rate < 1.1:
return np_img
H, W, C = np_img.shape
H = int(float(H) / float(down_sampling_rate))
W = int(float(W) / float(down_sampling_rate))
np_img = cv2.resize(np_img, (W, H), interpolation=cv2.INTER_AREA)
return np_img
def run_processor(self, image: Image.Image) -> Image.Image:
np_img = np.array(image, dtype=np.uint8)
processed_np_image = self.tile_resample(
np_img,
# res=self.tile_size,
down_sampling_rate=self.down_sampling_rate,
)
processed_image = Image.fromarray(processed_np_image)
return processed_image
@invocation(
"segment_anything_processor",
title="Segment Anything Processor",
tags=["controlnet", "segmentanything"],
category="controlnet",
version="1.2.4",
classification=Classification.Deprecated,
)
class SegmentAnythingProcessorInvocation(ImageProcessorInvocation):
"""Applies segment anything processing to image"""
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image) -> Image.Image:
# segment_anything_processor = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
segment_anything_processor = SamDetectorReproducibleColors.from_pretrained(
"ybelkada/segment-anything", subfolder="checkpoints"
)
np_img = np.array(image, dtype=np.uint8)
processed_image = segment_anything_processor(
np_img, image_resolution=self.image_resolution, detect_resolution=self.detect_resolution
)
return processed_image
class SamDetectorReproducibleColors(SamDetector):
# overriding SamDetector.show_anns() method to use reproducible colors for segmentation image
# base class show_anns() method randomizes colors,
# which seems to also lead to non-reproducible image generation
# so using ADE20k color palette instead
def show_anns(self, anns: List[Dict]):
if len(anns) == 0:
return
sorted_anns = sorted(anns, key=(lambda x: x["area"]), reverse=True)
h, w = anns[0]["segmentation"].shape
final_img = Image.fromarray(np.zeros((h, w, 3), dtype=np.uint8), mode="RGB")
palette = ade_palette()
for i, ann in enumerate(sorted_anns):
m = ann["segmentation"]
img = np.empty((m.shape[0], m.shape[1], 3), dtype=np.uint8)
# doing modulo just in case number of annotated regions exceeds number of colors in palette
ann_color = palette[i % len(palette)]
img[:, :] = ann_color
final_img.paste(Image.fromarray(img, mode="RGB"), (0, 0), Image.fromarray(np.uint8(m * 255)))
return np.array(final_img, dtype=np.uint8)
@invocation(
"color_map_image_processor",
title="Color Map Processor",
tags=["controlnet"],
category="controlnet",
version="1.2.3",
classification=Classification.Deprecated,
)
class ColorMapImageProcessorInvocation(ImageProcessorInvocation):
"""Generates a color map from the provided image"""
color_map_tile_size: int = InputField(default=64, ge=1, description=FieldDescriptions.tile_size)
def run_processor(self, image: Image.Image) -> Image.Image:
np_image = np.array(image, dtype=np.uint8)
height, width = np_image.shape[:2]
width_tile_size = min(self.color_map_tile_size, width)
height_tile_size = min(self.color_map_tile_size, height)
color_map = cv2.resize(
np_image,
(width // width_tile_size, height // height_tile_size),
interpolation=cv2.INTER_CUBIC,
)
color_map = cv2.resize(color_map, (width, height), interpolation=cv2.INTER_NEAREST)
color_map = Image.fromarray(color_map)
return color_map
DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small", "small_v2"]
# DepthAnything V2 Small model is licensed under Apache 2.0 but not the base and large models.
DEPTH_ANYTHING_MODELS = {
"large": "LiheYoung/depth-anything-large-hf",
"base": "LiheYoung/depth-anything-base-hf",
"small": "LiheYoung/depth-anything-small-hf",
"small_v2": "depth-anything/Depth-Anything-V2-Small-hf",
}
@invocation(
"depth_anything_image_processor",
title="Depth Anything Processor",
tags=["controlnet", "depth", "depth anything"],
category="controlnet",
version="1.1.3",
classification=Classification.Deprecated,
)
class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
"""Generates a depth map based on the Depth Anything algorithm"""
model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
default="small_v2", description="The size of the depth model to use"
)
resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image) -> Image.Image:
def load_depth_anything(model_path: Path):
depth_anything_pipeline = pipeline(model=str(model_path), task="depth-estimation", local_files_only=True)
assert isinstance(depth_anything_pipeline, DepthEstimationPipeline)
return DepthAnythingPipeline(depth_anything_pipeline)
with self._context.models.load_remote_model(
source=DEPTH_ANYTHING_MODELS[self.model_size], loader=load_depth_anything
) as depth_anything_detector:
assert isinstance(depth_anything_detector, DepthAnythingPipeline)
depth_map = depth_anything_detector.generate_depth(image)
# Resizing to user target specified size
new_height = int(image.size[1] * (self.resolution / image.size[0]))
depth_map = depth_map.resize((self.resolution, new_height))
return depth_map
@invocation(
"dw_openpose_image_processor",
title="DW Openpose Image Processor",
tags=["controlnet", "dwpose", "openpose"],
category="controlnet",
version="1.1.1",
classification=Classification.Deprecated,
)
class DWOpenposeImageProcessorInvocation(ImageProcessorInvocation):
"""Generates an openpose pose from an image using DWPose"""
draw_body: bool = InputField(default=True)
draw_face: bool = InputField(default=False)
draw_hands: bool = InputField(default=False)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image) -> Image.Image:
onnx_det = self._context.models.download_and_cache_model(DWPOSE_MODELS["yolox_l.onnx"])
onnx_pose = self._context.models.download_and_cache_model(DWPOSE_MODELS["dw-ll_ucoco_384.onnx"])
dw_openpose = DWOpenposeDetector(onnx_det=onnx_det, onnx_pose=onnx_pose)
processed_image = dw_openpose(
image,
draw_face=self.draw_face,
draw_hands=self.draw_hands,
draw_body=self.draw_body,
resolution=self.image_resolution,
)
return processed_image
@invocation(
"heuristic_resize",
title="Heuristic Resize",
tags=["image, controlnet"],
category="image",
version="1.0.1",
classification=Classification.Prototype,
)
class HeuristicResizeInvocation(BaseInvocation):
"""Resize an image using a heuristic method. Preserves edge maps."""
image: ImageField = InputField(description="The image to resize")
width: int = InputField(default=512, ge=1, description="The width to resize to (px)")
height: int = InputField(default=512, ge=1, description="The height to resize to (px)")
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.images.get_pil(self.image.image_name, "RGB")
np_img = pil_to_np(image)
np_resized = heuristic_resize(np_img, (self.width, self.height))
resized = np_to_pil(np_resized)
image_dto = context.images.save(image=resized)
return ImageOutput.build(image_dto)

View File

@@ -22,7 +22,7 @@ from transformers import CLIPVisionModelWithProjection
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
from invokeai.app.invocations.controlnet import ControlField
from invokeai.app.invocations.controlnet_image_processors import ControlField
from invokeai.app.invocations.fields import (
ConditioningField,
DenoiseMaskField,

View File

@@ -4,7 +4,7 @@ from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.image_util.dw_openpose import DWOpenposeDetector
from invokeai.backend.image_util.dw_openpose import DWOpenposeDetector2
@invocation(
@@ -25,20 +25,20 @@ class DWOpenposeDetectionInvocation(BaseInvocation, WithMetadata, WithBoard):
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.images.get_pil(self.image.image_name, "RGB")
onnx_det_path = context.models.download_and_cache_model(DWOpenposeDetector.get_model_url_det())
onnx_pose_path = context.models.download_and_cache_model(DWOpenposeDetector.get_model_url_pose())
onnx_det_path = context.models.download_and_cache_model(DWOpenposeDetector2.get_model_url_det())
onnx_pose_path = context.models.download_and_cache_model(DWOpenposeDetector2.get_model_url_pose())
loaded_session_det = context.models.load_local_model(
onnx_det_path, DWOpenposeDetector.create_onnx_inference_session
onnx_det_path, DWOpenposeDetector2.create_onnx_inference_session
)
loaded_session_pose = context.models.load_local_model(
onnx_pose_path, DWOpenposeDetector.create_onnx_inference_session
onnx_pose_path, DWOpenposeDetector2.create_onnx_inference_session
)
with loaded_session_det as session_det, loaded_session_pose as session_pose:
assert isinstance(session_det, ort.InferenceSession)
assert isinstance(session_pose, ort.InferenceSession)
detector = DWOpenposeDetector(session_det=session_det, session_pose=session_pose)
detector = DWOpenposeDetector2(session_det=session_det, session_pose=session_pose)
detected_image = detector.run(
image,
draw_face=self.draw_face,

View File

@@ -40,7 +40,6 @@ class UIType(str, Enum, metaclass=MetaEnum):
# region Model Field Types
MainModel = "MainModelField"
CogView4MainModel = "CogView4MainModelField"
FluxMainModel = "FluxMainModelField"
SD3MainModel = "SD3MainModelField"
SDXLMainModel = "SDXLMainModelField"
@@ -138,7 +137,6 @@ class FieldDescriptions:
noise = "Noise tensor"
clip = "CLIP (tokenizer, text encoder, LoRAs) and skipped layer count"
t5_encoder = "T5 tokenizer and text encoder"
glm_encoder = "GLM (THUDM) tokenizer and text encoder"
clip_embed_model = "CLIP Embed loader"
clip_g_model = "CLIP-G Embed loader"
unet = "UNet (scheduler, LoRAs)"
@@ -153,7 +151,6 @@ class FieldDescriptions:
main_model = "Main model (UNet, VAE, CLIP) to load"
flux_model = "Flux model (Transformer) to load"
sd3_model = "SD3 model (MMDiTX) to load"
cogview4_model = "CogView4 model (Transformer) to load"
sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load"
sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load"
onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load"
@@ -293,12 +290,6 @@ class SD3ConditioningField(BaseModel):
conditioning_name: str = Field(description="The name of conditioning tensor")
class CogView4ConditioningField(BaseModel):
"""A conditioning tensor primitive value"""
conditioning_name: str = Field(description="The name of conditioning tensor")
class ConditioningField(BaseModel):
"""A conditioning tensor primitive value"""

View File

@@ -33,6 +33,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.flux.controlnet.instantx_controlnet_flux import InstantXControlNetFlux
from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlNetFlux
from invokeai.backend.flux.denoise import denoise
from invokeai.backend.flux.extensions.inpaint_extension import InpaintExtension
from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
from invokeai.backend.flux.extensions.xlabs_controlnet_extension import XLabsControlNetExtension
@@ -52,7 +53,6 @@ from invokeai.backend.model_manager.taxonomy import ModelFormat, ModelVariantTyp
from invokeai.backend.patches.layer_patcher import LayerPatcher
from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
from invokeai.backend.rectified_flow.rectified_flow_inpaint_extension import RectifiedFlowInpaintExtension
from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo
from invokeai.backend.util.devices import TorchDevice
@@ -295,10 +295,10 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
assert packed_h * packed_w == x.shape[1]
# Prepare inpaint extension.
inpaint_extension: RectifiedFlowInpaintExtension | None = None
inpaint_extension: InpaintExtension | None = None
if inpaint_mask is not None:
assert init_latents is not None
inpaint_extension = RectifiedFlowInpaintExtension(
inpaint_extension = InpaintExtension(
init_latents=init_latents,
inpaint_mask=inpaint_mask,
noise=noise,

View File

@@ -1,9 +1,7 @@
import math
from typing import Literal, Optional
from typing import Optional
import torch
from PIL import Image
from transformers import SiglipImageProcessor, SiglipVisionModel
from invokeai.app.invocations.baseinvocation import (
BaseInvocation,
@@ -41,15 +39,12 @@ class FluxReduxOutput(BaseInvocationOutput):
)
DOWNSAMPLING_FUNCTIONS = Literal["nearest", "bilinear", "bicubic", "area", "nearest-exact"]
@invocation(
"flux_redux",
title="FLUX Redux",
tags=["ip_adapter", "control"],
category="ip_adapter",
version="2.1.0",
version="2.0.0",
classification=Classification.Beta,
)
class FluxReduxInvocation(BaseInvocation):
@@ -66,64 +61,23 @@ class FluxReduxInvocation(BaseInvocation):
title="FLUX Redux Model",
ui_type=UIType.FluxReduxModel,
)
downsampling_factor: int = InputField(
ge=1,
le=9,
default=1,
description="Redux Downsampling Factor (1-9)",
)
downsampling_function: DOWNSAMPLING_FUNCTIONS = InputField(
default="area",
description="Redux Downsampling Function",
)
weight: float = InputField(
ge=0,
le=1,
default=1.0,
description="Redux weight (0.0-1.0)",
)
def invoke(self, context: InvocationContext) -> FluxReduxOutput:
image = context.images.get_pil(self.image.image_name, "RGB")
encoded_x = self._siglip_encode(context, image)
redux_conditioning = self._flux_redux_encode(context, encoded_x)
if self.downsampling_factor > 1 or self.weight != 1.0:
redux_conditioning = self._downsample_weight(context, redux_conditioning)
tensor_name = context.tensors.save(redux_conditioning)
return FluxReduxOutput(
redux_cond=FluxReduxConditioningField(conditioning=TensorField(tensor_name=tensor_name), mask=self.mask)
)
@torch.no_grad()
def _downsample_weight(self, context: InvocationContext, redux_conditioning: torch.Tensor) -> torch.Tensor:
# Downsampling derived from https://github.com/kaibioinfo/ComfyUI_AdvancedRefluxControl
(b, t, h) = redux_conditioning.shape
m = int(math.sqrt(t))
if self.downsampling_factor > 1:
redux_conditioning = redux_conditioning.view(b, m, m, h)
redux_conditioning = torch.nn.functional.interpolate(
redux_conditioning.transpose(1, -1),
size=(m // self.downsampling_factor, m // self.downsampling_factor),
mode=self.downsampling_function,
)
redux_conditioning = redux_conditioning.transpose(1, -1).reshape(b, -1, h)
if self.weight != 1.0:
redux_conditioning = redux_conditioning * self.weight * self.weight
return redux_conditioning
@torch.no_grad()
def _siglip_encode(self, context: InvocationContext, image: Image.Image) -> torch.Tensor:
siglip_model_config = self._get_siglip_model(context)
with context.models.load(siglip_model_config.key).model_on_device() as (_, model):
assert isinstance(model, SiglipVisionModel)
model_abs_path = context.models.get_absolute_path(siglip_model_config)
processor = SiglipImageProcessor.from_pretrained(model_abs_path, local_files_only=True)
assert isinstance(processor, SiglipImageProcessor)
siglip_pipeline = SigLipPipeline(processor, model)
with context.models.load(siglip_model_config.key).model_on_device() as (_, siglip_pipeline):
assert isinstance(siglip_pipeline, SigLipPipeline)
return siglip_pipeline.encode_image(
x=image, device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype()
)

View File

@@ -1089,7 +1089,7 @@ class CanvasV2MaskAndCropInvocation(BaseInvocation, WithMetadata, WithBoard):
@invocation(
"expand_mask_with_fade", title="Expand Mask with Fade", tags=["image", "mask"], category="image", version="1.0.1"
"expand_mask_with_fade", title="Expand Mask with Fade", tags=["image", "mask"], category="image", version="1.0.0"
)
class ExpandMaskWithFadeInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Expands a mask with a fade effect. The mask uses black to indicate areas to keep from the generated image and white for areas to discard.
@@ -1147,21 +1147,8 @@ class ExpandMaskWithFadeInvocation(BaseInvocation, WithMetadata, WithBoard):
coeffs = numpy.polyfit(x_control, y_control, 3)
poly = numpy.poly1d(coeffs)
# Evaluate the polynomial
feather = poly(d_norm)
# The polynomial fit isn't perfect. Points beyond the fade distance are likely to be slightly less than 1.0,
# even though the control points indicate that they should be exactly 1.0. This is due to the nature of the
# polynomial fit, which is a best approximation of the control points but not an exact match.
# When this occurs, the area outside the mask and fade-out will not be 100% transparent. For example, it may
# have an alpha value of 1 instead of 0. So we must force pixels at or beyond the fade distance to exactly 1.0.
# Force pixels at or beyond the fade distance to exactly 1.0
feather = numpy.where(d_norm >= 1.0, 1.0, feather)
# Clip any other values to ensure they're in the valid range [0,1]
feather = numpy.clip(feather, 0, 1)
# Evaluate and clip the smooth mapping
feather = numpy.clip(poly(d_norm), 0, 1)
# Build final image.
np_result = numpy.where(black_mask == 1, 0, (feather * 255).astype(numpy.uint8))

View File

@@ -127,16 +127,13 @@ class InfillPatchMatchInvocation(InfillImageProcessorInvocation):
return infilled
LAMA_MODEL_URL = "https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt"
@invocation("infill_lama", title="LaMa Infill", tags=["image", "inpaint"], category="inpaint", version="1.2.2")
class LaMaInfillInvocation(InfillImageProcessorInvocation):
"""Infills transparent areas of an image using the LaMa model"""
def infill(self, image: Image.Image):
with self._context.models.load_remote_model(
source=LAMA_MODEL_URL,
source="https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt",
loader=LaMA.load_jit_model,
) as model:
lama = LaMA(model)

View File

@@ -3,14 +3,13 @@ from typing import Any
import torch
from PIL.Image import Image
from pydantic import field_validator
from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration, LlavaOnevisionProcessor
from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
from invokeai.app.invocations.fields import FieldDescriptions, ImageField, InputField, UIComponent, UIType
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.invocations.primitives import StringOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.llava_onevision_pipeline import LlavaOnevisionPipeline
from invokeai.backend.llava_onevision_model import LlavaOnevisionModel
from invokeai.backend.util.devices import TorchDevice
@@ -55,17 +54,10 @@ class LlavaOnevisionVllmInvocation(BaseInvocation):
@torch.no_grad()
def invoke(self, context: InvocationContext) -> StringOutput:
images = self._get_images(context)
model_config = context.models.get_config(self.vllm_model)
with context.models.load(self.vllm_model).model_on_device() as (_, model):
assert isinstance(model, LlavaOnevisionForConditionalGeneration)
model_abs_path = context.models.get_absolute_path(model_config)
processor = AutoProcessor.from_pretrained(model_abs_path, local_files_only=True)
assert isinstance(processor, LlavaOnevisionProcessor)
model = LlavaOnevisionPipeline(model, processor)
output = model.run(
with context.models.load(self.vllm_model) as vllm_model:
assert isinstance(vllm_model, LlavaOnevisionModel)
output = vllm_model.run(
prompt=self.prompt,
images=images,
device=TorchDevice.choose_torch_device(),

View File

@@ -152,10 +152,6 @@ GENERATION_MODES = Literal[
"sd3_img2img",
"sd3_inpaint",
"sd3_outpaint",
"cogview4_txt2img",
"cogview4_img2img",
"cogview4_inpaint",
"cogview4_outpaint",
]

View File

@@ -14,7 +14,7 @@ from invokeai.app.invocations.baseinvocation import (
invocation,
invocation_output,
)
from invokeai.app.invocations.controlnet import ControlField, ControlNetInvocation
from invokeai.app.invocations.controlnet_image_processors import ControlField, ControlNetInvocation
from invokeai.app.invocations.denoise_latents import DenoiseLatentsInvocation
from invokeai.app.invocations.fields import (
FieldDescriptions,
@@ -39,17 +39,7 @@ from invokeai.app.invocations.model import (
VAEField,
VAEOutput,
)
from invokeai.app.invocations.primitives import (
BooleanCollectionOutput,
BooleanOutput,
FloatCollectionOutput,
FloatOutput,
IntegerCollectionOutput,
IntegerOutput,
LatentsOutput,
StringCollectionOutput,
StringOutput,
)
from invokeai.app.invocations.primitives import BooleanOutput, FloatOutput, IntegerOutput, LatentsOutput, StringOutput
from invokeai.app.invocations.scheduler import SchedulerOutput
from invokeai.app.invocations.t2i_adapter import T2IAdapterField, T2IAdapterInvocation
from invokeai.app.services.shared.invocation_context import InvocationContext
@@ -1172,133 +1162,3 @@ class MetadataToT2IAdaptersInvocation(BaseInvocation, WithMetadata):
adapters = append_list(T2IAdapterField, i.t2i_adapter, adapters)
return MDT2IAdapterListOutput(t2i_adapter_list=adapters)
@invocation(
"metadata_to_string_collection",
title="Metadata To String Collection",
tags=["metadata"],
category="metadata",
version="1.0.0",
classification=Classification.Beta,
)
class MetadataToStringCollectionInvocation(BaseInvocation, WithMetadata):
"""Extracts a string collection value of a label from metadata"""
label: CORE_LABELS_STRING = InputField(
default=CUSTOM_LABEL,
description=FieldDescriptions.metadata_item_label,
input=Input.Direct,
)
custom_label: Optional[str] = InputField(
default=None,
description=FieldDescriptions.metadata_item_label,
input=Input.Direct,
)
default_value: list[str] = InputField(
description="The default string collection to use if not found in the metadata"
)
_validate_custom_label = model_validator(mode="after")(validate_custom_label)
def invoke(self, context: InvocationContext) -> StringCollectionOutput:
data: Dict[str, Any] = {} if self.metadata is None else self.metadata.root
output = data.get(str(self.custom_label if self.label == CUSTOM_LABEL else self.label), self.default_value)
return StringCollectionOutput(collection=output)
@invocation(
"metadata_to_integer_collection",
title="Metadata To Integer Collection",
tags=["metadata"],
category="metadata",
version="1.0.0",
classification=Classification.Beta,
)
class MetadataToIntegerCollectionInvocation(BaseInvocation, WithMetadata):
"""Extracts an integer value Collection of a label from metadata"""
label: CORE_LABELS_INTEGER = InputField(
default=CUSTOM_LABEL,
description=FieldDescriptions.metadata_item_label,
input=Input.Direct,
)
custom_label: Optional[str] = InputField(
default=None,
description=FieldDescriptions.metadata_item_label,
input=Input.Direct,
)
default_value: list[int] = InputField(description="The default integer to use if not found in the metadata")
_validate_custom_label = model_validator(mode="after")(validate_custom_label)
def invoke(self, context: InvocationContext) -> IntegerCollectionOutput:
data: Dict[str, Any] = {} if self.metadata is None else self.metadata.root
output = data.get(str(self.custom_label if self.label == CUSTOM_LABEL else self.label), self.default_value)
return IntegerCollectionOutput(collection=output)
@invocation(
"metadata_to_float_collection",
title="Metadata To Float Collection",
tags=["metadata"],
category="metadata",
version="1.0.0",
classification=Classification.Beta,
)
class MetadataToFloatCollectionInvocation(BaseInvocation, WithMetadata):
"""Extracts a Float value Collection of a label from metadata"""
label: CORE_LABELS_FLOAT = InputField(
default=CUSTOM_LABEL,
description=FieldDescriptions.metadata_item_label,
input=Input.Direct,
)
custom_label: Optional[str] = InputField(
default=None,
description=FieldDescriptions.metadata_item_label,
input=Input.Direct,
)
default_value: list[float] = InputField(description="The default float to use if not found in the metadata")
_validate_custom_label = model_validator(mode="after")(validate_custom_label)
def invoke(self, context: InvocationContext) -> FloatCollectionOutput:
data: Dict[str, Any] = {} if self.metadata is None else self.metadata.root
output = data.get(str(self.custom_label if self.label == CUSTOM_LABEL else self.label), self.default_value)
return FloatCollectionOutput(collection=output)
@invocation(
"metadata_to_bool_collection",
title="Metadata To Bool Collection",
tags=["metadata"],
category="metadata",
version="1.0.0",
classification=Classification.Beta,
)
class MetadataToBoolCollectionInvocation(BaseInvocation, WithMetadata):
"""Extracts a Boolean value Collection of a label from metadata"""
label: CORE_LABELS_BOOL = InputField(
default=CUSTOM_LABEL,
description=FieldDescriptions.metadata_item_label,
input=Input.Direct,
)
custom_label: Optional[str] = InputField(
default=None,
description=FieldDescriptions.metadata_item_label,
input=Input.Direct,
)
default_value: list[bool] = InputField(description="The default bool to use if not found in the metadata")
_validate_custom_label = model_validator(mode="after")(validate_custom_label)
def invoke(self, context: InvocationContext) -> BooleanCollectionOutput:
data: Dict[str, Any] = {} if self.metadata is None else self.metadata.root
output = data.get(str(self.custom_label if self.label == CUSTOM_LABEL else self.label), self.default_value)
return BooleanCollectionOutput(collection=output)

View File

@@ -68,11 +68,6 @@ class T5EncoderField(BaseModel):
loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")
class GlmEncoderField(BaseModel):
tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel")
text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
class VAEField(BaseModel):
vae: ModelIdentifierField = Field(description="Info to load vae submodel")
seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless')

View File

@@ -13,7 +13,6 @@ from invokeai.app.invocations.baseinvocation import (
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
from invokeai.app.invocations.fields import (
BoundingBoxField,
CogView4ConditioningField,
ColorField,
ConditioningField,
DenoiseMaskField,
@@ -441,17 +440,6 @@ class SD3ConditioningOutput(BaseInvocationOutput):
return cls(conditioning=SD3ConditioningField(conditioning_name=conditioning_name))
@invocation_output("cogview4_conditioning_output")
class CogView4ConditioningOutput(BaseInvocationOutput):
"""Base class for nodes that output a CogView text conditioning tensor."""
conditioning: CogView4ConditioningField = OutputField(description=FieldDescriptions.cond)
@classmethod
def build(cls, conditioning_name: str) -> "CogView4ConditioningOutput":
return cls(conditioning=CogView4ConditioningField(conditioning_name=conditioning_name))
@invocation_output("conditioning_output")
class ConditioningOutput(BaseInvocationOutput):
"""Base class for nodes that output a single conditioning tensor"""

View File

@@ -24,7 +24,7 @@ from invokeai.app.invocations.sd3_text_encoder import SD3_T5_MAX_SEQ_LEN
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.flux.sampling_utils import clip_timestep_schedule_fractional
from invokeai.backend.model_manager import BaseModelType
from invokeai.backend.rectified_flow.rectified_flow_inpaint_extension import RectifiedFlowInpaintExtension
from invokeai.backend.sd3.extensions.inpaint_extension import InpaintExtension
from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import SD3ConditioningInfo
from invokeai.backend.util.devices import TorchDevice
@@ -263,10 +263,10 @@ class SD3DenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
# Prepare inpaint extension.
inpaint_mask = self._prep_inpaint_mask(context, latents)
inpaint_extension: RectifiedFlowInpaintExtension | None = None
inpaint_extension: InpaintExtension | None = None
if inpaint_mask is not None:
assert init_latents is not None
inpaint_extension = RectifiedFlowInpaintExtension(
inpaint_extension = InpaintExtension(
init_latents=init_latents,
inpaint_mask=inpaint_mask,
noise=noise,

View File

@@ -9,7 +9,7 @@ from pydantic import field_validator
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
from invokeai.app.invocations.controlnet import ControlField
from invokeai.app.invocations.controlnet_image_processors import ControlField
from invokeai.app.invocations.denoise_latents import DenoiseLatentsInvocation, get_scheduler
from invokeai.app.invocations.fields import (
ConditioningField,

View File

@@ -31,12 +31,6 @@ def run_app() -> None:
if app_config.pytorch_cuda_alloc_conf:
configure_torch_cuda_allocator(app_config.pytorch_cuda_alloc_conf, logger)
# This import must happen after configure_torch_cuda_allocator() is called, because the module imports torch.
from invokeai.backend.util.devices import TorchDevice
torch_device_name = TorchDevice.get_torch_device_name()
logger.info(f"Using torch device: {torch_device_name}")
# Import from startup_utils here to avoid importing torch before configure_torch_cuda_allocator() is called.
from invokeai.app.util.startup_utils import (
apply_monkeypatches,

View File

@@ -38,6 +38,7 @@ from invokeai.backend.model_manager.config import (
AnyModelConfig,
CheckpointConfigBase,
InvalidModelConfigException,
ModelConfigBase,
)
from invokeai.backend.model_manager.legacy_probe import ModelProbe
from invokeai.backend.model_manager.metadata import (
@@ -646,14 +647,10 @@ class ModelInstallService(ModelInstallServiceBase):
hash_algo = self._app_config.hashing_algorithm
fields = config.model_dump()
return ModelProbe.probe(model_path=model_path, fields=fields, hash_algo=hash_algo)
# New model probe API is disabled pending resolution of issue caused by a change of the ordering of checks.
# See commit message for details.
# try:
# return ModelConfigBase.classify(model_path=model_path, hash_algo=hash_algo, **fields)
# except InvalidModelConfigException:
# return ModelProbe.probe(model_path=model_path, fields=fields, hash_algo=hash_algo) # type: ignore
try:
return ModelConfigBase.classify(model_path=model_path, hash_algo=hash_algo, **fields)
except InvalidModelConfigException:
return ModelProbe.probe(model_path=model_path, fields=fields, hash_algo=hash_algo) # type: ignore
def _register(
self, model_path: Path, config: Optional[ModelRecordChanges] = None, info: Optional[AnyModelConfig] = None

View File

@@ -80,7 +80,6 @@ class ModelRecordChanges(BaseModelExcludeNull):
type: Optional[ModelType] = Field(description="Type of model", default=None)
key: Optional[str] = Field(description="Database ID for this model", default=None)
hash: Optional[str] = Field(description="hash of model file", default=None)
file_size: Optional[int] = Field(description="Size of model file", default=None)
format: Optional[str] = Field(description="format of model file", default=None)
trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None)
default_settings: Optional[MainModelDefaultSettings | ControlAdapterDefaultSettings] = Field(

View File

@@ -302,10 +302,7 @@ class ModelRecordServiceSQL(ModelRecordServiceBase):
# We catch this error so that the app can still run if there are invalid model configs in the database.
# One reason that an invalid model config might be in the database is if someone had to rollback from a
# newer version of the app that added a new model type.
row_data = f"{row[0][:64]}..." if len(row[0]) > 64 else row[0]
self._logger.warning(
f"Found an invalid model config in the database. Ignoring this model. ({row_data})"
)
self._logger.warning(f"Found an invalid model config in the database. Ignoring this model. ({row[0]})")
else:
results.append(model_config)

View File

@@ -21,16 +21,10 @@ class ObjectSerializerDisk(ObjectSerializerBase[T]):
"""Disk-backed storage for arbitrary python objects. Serialization is handled by `torch.save` and `torch.load`.
:param output_dir: The folder where the serialized objects will be stored
:param safe_globals: A list of types to be added to the safe globals for torch serialization
:param ephemeral: If True, objects will be stored in a temporary directory inside the given output_dir and cleaned up on exit
"""
def __init__(
self,
output_dir: Path,
safe_globals: list[type],
ephemeral: bool = False,
) -> None:
def __init__(self, output_dir: Path, ephemeral: bool = False):
super().__init__()
self._ephemeral = ephemeral
self._base_output_dir = output_dir
@@ -48,8 +42,6 @@ class ObjectSerializerDisk(ObjectSerializerBase[T]):
self._output_dir = Path(self._tempdir.name) if self._tempdir else self._base_output_dir
self.__obj_class_name: Optional[str] = None
torch.serialization.add_safe_globals(safe_globals) if safe_globals else None
def load(self, name: str) -> T:
file_path = self._get_path(name)
try:

View File

@@ -201,12 +201,6 @@ def get_workflow(queue_item_dict: dict) -> Optional[WorkflowWithoutID]:
return None
class FieldIdentifier(BaseModel):
kind: Literal["input", "output"] = Field(description="The kind of field")
node_id: str = Field(description="The ID of the node")
field_name: str = Field(description="The name of the field")
class SessionQueueItemWithoutGraph(BaseModel):
"""Session queue item without the full graph. Used for serialization."""
@@ -243,20 +237,6 @@ class SessionQueueItemWithoutGraph(BaseModel):
retried_from_item_id: Optional[int] = Field(
default=None, description="The item_id of the queue item that this item was retried from"
)
is_api_validation_run: bool = Field(
default=False,
description="Whether this queue item is an API validation run.",
)
published_workflow_id: Optional[str] = Field(
default=None,
description="The ID of the published workflow associated with this queue item",
)
api_input_fields: Optional[list[FieldIdentifier]] = Field(
default=None, description="The fields that were used as input to the API"
)
api_output_fields: Optional[list[FieldIdentifier]] = Field(
default=None, description="The nodes that were used as output from the API"
)
@classmethod
def queue_item_dto_from_dict(cls, queue_item_dict: dict) -> "SessionQueueItemDTO":

View File

@@ -21,7 +21,6 @@ from invokeai.app.invocations import * # noqa: F401 F403
from invokeai.app.invocations.baseinvocation import (
BaseInvocation,
BaseInvocationOutput,
InvocationRegistry,
invocation,
invocation_output,
)
@@ -284,7 +283,7 @@ class AnyInvocation(BaseInvocation):
@classmethod
def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
def validate_invocation(v: Any) -> "AnyInvocation":
return InvocationRegistry.get_invocation_typeadapter().validate_python(v)
return BaseInvocation.get_typeadapter().validate_python(v)
return core_schema.no_info_plain_validator_function(validate_invocation)
@@ -295,7 +294,7 @@ class AnyInvocation(BaseInvocation):
# Nodes are too powerful, we have to make our own OpenAPI schema manually
# No but really, because the schema is dynamic depending on loaded nodes, we need to generate it manually
oneOf: list[dict[str, str]] = []
names = [i.__name__ for i in InvocationRegistry.get_invocation_classes()]
names = [i.__name__ for i in BaseInvocation.get_invocations()]
for name in sorted(names):
oneOf.append({"$ref": f"#/components/schemas/{name}"})
return {"oneOf": oneOf}
@@ -305,7 +304,7 @@ class AnyInvocationOutput(BaseInvocationOutput):
@classmethod
def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler):
def validate_invocation_output(v: Any) -> "AnyInvocationOutput":
return InvocationRegistry.get_output_typeadapter().validate_python(v)
return BaseInvocationOutput.get_typeadapter().validate_python(v)
return core_schema.no_info_plain_validator_function(validate_invocation_output)
@@ -317,7 +316,7 @@ class AnyInvocationOutput(BaseInvocationOutput):
# No but really, because the schema is dynamic depending on loaded nodes, we need to generate it manually
oneOf: list[dict[str, str]] = []
names = [i.__name__ for i in InvocationRegistry.get_output_classes()]
names = [i.__name__ for i in BaseInvocationOutput.get_outputs()]
for name in sorted(names):
oneOf.append({"$ref": f"#/components/schemas/{name}"})
return {"oneOf": oneOf}

View File

@@ -18,10 +18,9 @@ from invokeai.app.services.invocation_services import InvocationServices
from invokeai.app.services.model_records.model_records_base import UnknownModelException
from invokeai.app.services.session_processor.session_processor_common import ProgressImage
from invokeai.app.services.shared.sqlite.sqlite_common import SQLiteDirection
from invokeai.app.util.step_callback import diffusion_step_callback
from invokeai.app.util.step_callback import flux_step_callback, stable_diffusion_step_callback
from invokeai.backend.model_manager.config import (
AnyModelConfig,
ModelConfigBase,
)
from invokeai.backend.model_manager.load.load_base import LoadedModel, LoadedModelWithoutConfig
from invokeai.backend.model_manager.taxonomy import AnyModel, BaseModelType, ModelFormat, ModelType, SubModelType
@@ -544,30 +543,6 @@ class ModelsInterface(InvocationContextInterface):
self._util.signal_progress(f"Loading model {source}")
return self._services.model_manager.load.load_model_from_path(model_path=model_path, loader=loader)
def get_absolute_path(self, config_or_path: AnyModelConfig | Path | str) -> Path:
"""Gets the absolute path for a given model config or path.
For example, if the model's path is `flux/main/FLUX Dev.safetensors`, and the models path is
`/home/username/InvokeAI/models`, this method will return
`/home/username/InvokeAI/models/flux/main/FLUX Dev.safetensors`.
Args:
config_or_path: The model config or path.
Returns:
The absolute path to the model.
"""
model_path = Path(config_or_path.path) if isinstance(config_or_path, ModelConfigBase) else Path(config_or_path)
if model_path.is_absolute():
return model_path.resolve()
base_models_path = self._services.configuration.models_path
joined_path = base_models_path / model_path
resolved_path = joined_path.resolve()
return resolved_path
class ConfigInterface(InvocationContextInterface):
def get(self) -> InvokeAIAppConfig:
@@ -607,7 +582,7 @@ class UtilInterface(InvocationContextInterface):
base_model: The base model for the current denoising step.
"""
diffusion_step_callback(
stable_diffusion_step_callback(
signal_progress=self.signal_progress,
intermediate_state=intermediate_state,
base_model=base_model,
@@ -625,10 +600,9 @@ class UtilInterface(InvocationContextInterface):
intermediate_state: The intermediate state of the diffusion pipeline.
"""
diffusion_step_callback(
flux_step_callback(
signal_progress=self.signal_progress,
intermediate_state=intermediate_state,
base_model=BaseModelType.Flux,
is_canceled=self.is_canceled,
)

View File

@@ -21,7 +21,6 @@ from invokeai.app.services.shared.sqlite_migrator.migrations.migration_15 import
from invokeai.app.services.shared.sqlite_migrator.migrations.migration_16 import build_migration_16
from invokeai.app.services.shared.sqlite_migrator.migrations.migration_17 import build_migration_17
from invokeai.app.services.shared.sqlite_migrator.migrations.migration_18 import build_migration_18
from invokeai.app.services.shared.sqlite_migrator.migrations.migration_19 import build_migration_19
from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_impl import SqliteMigrator
@@ -60,7 +59,6 @@ def init_db(config: InvokeAIAppConfig, logger: Logger, image_files: ImageFileSto
migrator.register_migration(build_migration_16())
migrator.register_migration(build_migration_17())
migrator.register_migration(build_migration_18())
migrator.register_migration(build_migration_19(app_config=config))
migrator.run_migrations()
return db

View File

@@ -1,37 +0,0 @@
import sqlite3
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_common import Migration
from invokeai.backend.model_manager.model_on_disk import ModelOnDisk
class Migration19Callback:
def __init__(self, app_config: InvokeAIAppConfig):
self.models_path = app_config.models_path
def __call__(self, cursor: sqlite3.Cursor) -> None:
self._populate_size(cursor)
self._add_size_column(cursor)
def _add_size_column(self, cursor: sqlite3.Cursor) -> None:
cursor.execute(
"ALTER TABLE models ADD COLUMN file_size INTEGER "
"GENERATED ALWAYS as (json_extract(config, '$.file_size')) VIRTUAL NOT NULL"
)
def _populate_size(self, cursor: sqlite3.Cursor) -> None:
all_models = cursor.execute("SELECT id, path FROM models;").fetchall()
for model_id, model_path in all_models:
mod = ModelOnDisk(self.models_path / model_path)
cursor.execute(
"UPDATE models SET config = json_set(config, '$.file_size', ?) WHERE id = ?", (mod.size(), model_id)
)
def build_migration_19(app_config: InvokeAIAppConfig) -> Migration:
return Migration(
from_version=18,
to_version=19,
callback=Migration19Callback(app_config),
)

View File

@@ -1,343 +0,0 @@
{
"name": "Text to Image - CogView4",
"author": "",
"description": "Generate an image from a prompt with CogView4.",
"version": "",
"contact": "",
"tags": "CogView4, Text to Image",
"notes": "",
"exposedFields": [],
"meta": { "category": "default", "version": "3.0.0" },
"id": "default_0e405a8e-ab5e-4e6c-bd99-b59deabd5591",
"form": {
"elements": {
"container-XSINSu999B": {
"id": "container-XSINSu999B",
"data": {
"layout": "column",
"children": [
"heading-N0TXlsboP5",
"text-PVw8AvXCTz",
"divider-5wmCOm9mqG",
"node-field-gPil4XSw8L",
"node-field-T2oYYNrAzH",
"node-field-SRj6Dn28lm"
]
},
"type": "container"
},
"node-field-gPil4XSw8L": {
"id": "node-field-gPil4XSw8L",
"type": "node-field",
"parentId": "container-XSINSu999B",
"data": {
"fieldIdentifier": {
"nodeId": "a4569d8b-6a43-44b9-8919-4ceec6682904",
"fieldName": "prompt"
},
"settings": {
"type": "string-field-config",
"component": "textarea"
},
"showDescription": false
}
},
"node-field-T2oYYNrAzH": {
"id": "node-field-T2oYYNrAzH",
"type": "node-field",
"parentId": "container-XSINSu999B",
"data": {
"fieldIdentifier": {
"nodeId": "acb26944-1208-4016-9929-ab8dd0860573",
"fieldName": "prompt"
},
"settings": {
"type": "string-field-config",
"component": "textarea"
},
"showDescription": false
}
},
"node-field-SRj6Dn28lm": {
"id": "node-field-SRj6Dn28lm",
"type": "node-field",
"parentId": "container-XSINSu999B",
"data": {
"fieldIdentifier": {
"nodeId": "7890507c-d346-4d13-bcb4-bc6d4850b2e3",
"fieldName": "model"
},
"showDescription": false
}
},
"heading-N0TXlsboP5": {
"id": "heading-N0TXlsboP5",
"parentId": "container-XSINSu999B",
"type": "heading",
"data": { "content": "Text to Image - CogView4" }
},
"text-PVw8AvXCTz": {
"id": "text-PVw8AvXCTz",
"parentId": "container-XSINSu999B",
"type": "text",
"data": { "content": "Generate an image from a prompt with CogView4." }
},
"divider-5wmCOm9mqG": {
"id": "divider-5wmCOm9mqG",
"parentId": "container-XSINSu999B",
"type": "divider"
}
},
"rootElementId": "container-XSINSu999B"
},
"nodes": [
{
"id": "7890507c-d346-4d13-bcb4-bc6d4850b2e3",
"type": "invocation",
"data": {
"id": "7890507c-d346-4d13-bcb4-bc6d4850b2e3",
"version": "1.0.0",
"nodePack": "invokeai",
"label": "",
"notes": "",
"type": "cogview4_model_loader",
"inputs": {
"model": {
"name": "model",
"label": ""
}
},
"isOpen": true,
"isIntermediate": true,
"useCache": true
},
"position": { "x": -52.193850056888095, "y": 282.4721422789611 }
},
{
"id": "a4569d8b-6a43-44b9-8919-4ceec6682904",
"type": "invocation",
"data": {
"id": "a4569d8b-6a43-44b9-8919-4ceec6682904",
"version": "1.0.0",
"nodePack": "invokeai",
"label": "",
"notes": "",
"type": "cogview4_text_encoder",
"inputs": {
"prompt": {
"name": "prompt",
"label": "Positive Prompt",
"description": "",
"value": "A whimsical stuffed gnome sits on a golden sandy beach, its plush fabric slightly textured and well-worn. The gnome has a round, cheerful face with a fluffy white beard, a bulbous nose, and a tall, slightly floppy red hat with a few decorative stitching details. It wears a tiny blue vest over a soft, earthy-toned tunic, and its stubby arms grasp a ripe yellow banana with a few brown speckles. The ocean waves gently roll onto the shore in the background, with turquoise water reflecting the warm glow of the late afternoon sun. A few scattered seashells and driftwood pieces are near the gnome, while a colorful beach umbrella and footprints in the sand hint at a lively beach scene. The sky is a soft pastel blend of pink, orange, and light blue, with wispy clouds stretching across the horizon.\n"
},
"glm_encoder": {
"name": "glm_encoder",
"label": "",
"description": ""
}
},
"isOpen": true,
"isIntermediate": true,
"useCache": true
},
"position": { "x": 328.9380683664592, "y": 305.11768986950995 }
},
{
"id": "acb26944-1208-4016-9929-ab8dd0860573",
"type": "invocation",
"data": {
"id": "acb26944-1208-4016-9929-ab8dd0860573",
"version": "1.0.0",
"nodePack": "invokeai",
"label": "",
"notes": "",
"type": "cogview4_text_encoder",
"inputs": {
"prompt": {
"name": "prompt",
"label": "Negative Prompt",
"description": "",
"value": ""
},
"glm_encoder": {
"name": "glm_encoder",
"label": "",
"description": ""
}
},
"isOpen": true,
"isIntermediate": true,
"useCache": true
},
"position": { "x": 334.6799782744916, "y": 496.5882067536601 }
},
{
"id": "cdd72700-463d-4e10-8d76-3e842e4c0b49",
"type": "invocation",
"data": {
"id": "cdd72700-463d-4e10-8d76-3e842e4c0b49",
"version": "1.0.0",
"nodePack": "invokeai",
"label": "",
"notes": "",
"type": "cogview4_l2i",
"inputs": {
"board": {
"name": "board",
"label": "",
"description": "",
"value": "auto"
},
"metadata": { "name": "metadata", "label": "", "description": "" },
"latents": { "name": "latents", "label": "", "description": "" },
"vae": { "name": "vae", "label": "", "description": "" }
},
"isOpen": true,
"isIntermediate": false,
"useCache": true
},
"position": { "x": 1112.027247217991, "y": 294.1351498145327 }
},
{
"id": "e75e2ced-284e-4135-81dc-cdf06c7a409d",
"type": "invocation",
"data": {
"id": "e75e2ced-284e-4135-81dc-cdf06c7a409d",
"version": "1.0.0",
"nodePack": "invokeai",
"label": "",
"notes": "",
"type": "cogview4_denoise",
"inputs": {
"board": {
"name": "board",
"label": "",
"description": "",
"value": "auto"
},
"metadata": { "name": "metadata", "label": "", "description": "" },
"latents": { "name": "latents", "label": "", "description": "" },
"denoise_mask": {
"name": "denoise_mask",
"label": "",
"description": ""
},
"denoising_start": {
"name": "denoising_start",
"label": "",
"description": "",
"value": 0
},
"denoising_end": {
"name": "denoising_end",
"label": "",
"description": "",
"value": 1
},
"transformer": {
"name": "transformer",
"label": "",
"description": ""
},
"positive_conditioning": {
"name": "positive_conditioning",
"label": "",
"description": ""
},
"negative_conditioning": {
"name": "negative_conditioning",
"label": "",
"description": ""
},
"cfg_scale": {
"name": "cfg_scale",
"label": "",
"description": "",
"value": 3.5
},
"width": {
"name": "width",
"label": "",
"description": "",
"value": 1024
},
"height": {
"name": "height",
"label": "",
"description": "",
"value": 1024
},
"steps": {
"name": "steps",
"label": "",
"description": "",
"value": 30
},
"seed": { "name": "seed", "label": "", "description": "", "value": 0 }
},
"isOpen": true,
"isIntermediate": true,
"useCache": false
},
"position": { "x": 720.8830004638692, "y": 332.66609681908415 }
}
],
"edges": [
{
"id": "reactflow__edge-7890507c-d346-4d13-bcb4-bc6d4850b2e3vae-cdd72700-463d-4e10-8d76-3e842e4c0b49vae",
"type": "default",
"source": "7890507c-d346-4d13-bcb4-bc6d4850b2e3",
"target": "cdd72700-463d-4e10-8d76-3e842e4c0b49",
"sourceHandle": "vae",
"targetHandle": "vae"
},
{
"id": "reactflow__edge-7890507c-d346-4d13-bcb4-bc6d4850b2e3glm_encoder-a4569d8b-6a43-44b9-8919-4ceec6682904glm_encoder",
"type": "default",
"source": "7890507c-d346-4d13-bcb4-bc6d4850b2e3",
"target": "a4569d8b-6a43-44b9-8919-4ceec6682904",
"sourceHandle": "glm_encoder",
"targetHandle": "glm_encoder"
},
{
"id": "reactflow__edge-7890507c-d346-4d13-bcb4-bc6d4850b2e3glm_encoder-acb26944-1208-4016-9929-ab8dd0860573glm_encoder",
"type": "default",
"source": "7890507c-d346-4d13-bcb4-bc6d4850b2e3",
"target": "acb26944-1208-4016-9929-ab8dd0860573",
"sourceHandle": "glm_encoder",
"targetHandle": "glm_encoder"
},
{
"id": "reactflow__edge-a4569d8b-6a43-44b9-8919-4ceec6682904conditioning-e75e2ced-284e-4135-81dc-cdf06c7a409dpositive_conditioning",
"type": "default",
"source": "a4569d8b-6a43-44b9-8919-4ceec6682904",
"target": "e75e2ced-284e-4135-81dc-cdf06c7a409d",
"sourceHandle": "conditioning",
"targetHandle": "positive_conditioning"
},
{
"id": "reactflow__edge-acb26944-1208-4016-9929-ab8dd0860573conditioning-e75e2ced-284e-4135-81dc-cdf06c7a409dnegative_conditioning",
"type": "default",
"source": "acb26944-1208-4016-9929-ab8dd0860573",
"target": "e75e2ced-284e-4135-81dc-cdf06c7a409d",
"sourceHandle": "conditioning",
"targetHandle": "negative_conditioning"
},
{
"id": "reactflow__edge-e75e2ced-284e-4135-81dc-cdf06c7a409dlatents-cdd72700-463d-4e10-8d76-3e842e4c0b49latents",
"type": "default",
"source": "e75e2ced-284e-4135-81dc-cdf06c7a409d",
"target": "cdd72700-463d-4e10-8d76-3e842e4c0b49",
"sourceHandle": "latents",
"targetHandle": "latents"
},
{
"id": "reactflow__edge-7890507c-d346-4d13-bcb4-bc6d4850b2e3transformer-e75e2ced-284e-4135-81dc-cdf06c7a409dtransformer",
"type": "default",
"source": "7890507c-d346-4d13-bcb4-bc6d4850b2e3",
"target": "e75e2ced-284e-4135-81dc-cdf06c7a409d",
"sourceHandle": "transformer",
"targetHandle": "transformer"
}
]
}

View File

@@ -47,7 +47,6 @@ class WorkflowRecordsStorageBase(ABC):
query: Optional[str],
tags: Optional[list[str]],
has_been_opened: Optional[bool],
is_published: Optional[bool],
) -> PaginatedResults[WorkflowRecordListItemDTO]:
"""Gets many workflows."""
pass
@@ -57,7 +56,6 @@ class WorkflowRecordsStorageBase(ABC):
self,
categories: list[WorkflowCategory],
has_been_opened: Optional[bool] = None,
is_published: Optional[bool] = None,
) -> dict[str, int]:
"""Gets a dictionary of counts for each of the provided categories."""
pass
@@ -68,7 +66,6 @@ class WorkflowRecordsStorageBase(ABC):
tags: list[str],
categories: Optional[list[WorkflowCategory]] = None,
has_been_opened: Optional[bool] = None,
is_published: Optional[bool] = None,
) -> dict[str, int]:
"""Gets a dictionary of counts for each of the provided tags."""
pass

View File

@@ -67,7 +67,6 @@ class WorkflowWithoutID(BaseModel):
# This is typed as optional to prevent errors when pulling workflows from the DB. The frontend adds a default form if
# it is None.
form: dict[str, JsonValue] | None = Field(default=None, description="The form of the workflow.")
is_published: bool | None = Field(default=None, description="Whether the workflow is published or not.")
model_config = ConfigDict(extra="ignore")
@@ -102,7 +101,6 @@ class WorkflowRecordDTOBase(BaseModel):
opened_at: Optional[Union[datetime.datetime, str]] = Field(
default=None, description="The opened timestamp of the workflow."
)
is_published: bool | None = Field(default=None, description="Whether the workflow is published or not.")
class WorkflowRecordDTO(WorkflowRecordDTOBase):

View File

@@ -119,7 +119,6 @@ class SqliteWorkflowRecordsStorage(WorkflowRecordsStorageBase):
query: Optional[str] = None,
tags: Optional[list[str]] = None,
has_been_opened: Optional[bool] = None,
is_published: Optional[bool] = None,
) -> PaginatedResults[WorkflowRecordListItemDTO]:
# sanitize!
assert order_by in WorkflowRecordOrderBy
@@ -242,7 +241,6 @@ class SqliteWorkflowRecordsStorage(WorkflowRecordsStorageBase):
tags: list[str],
categories: Optional[list[WorkflowCategory]] = None,
has_been_opened: Optional[bool] = None,
is_published: Optional[bool] = None,
) -> dict[str, int]:
if not tags:
return {}
@@ -294,7 +292,6 @@ class SqliteWorkflowRecordsStorage(WorkflowRecordsStorageBase):
self,
categories: list[WorkflowCategory],
has_been_opened: Optional[bool] = None,
is_published: Optional[bool] = None,
) -> dict[str, int]:
cursor = self._conn.cursor()
result: dict[str, int] = {}

View File

@@ -4,10 +4,7 @@ from fastapi import FastAPI
from fastapi.openapi.utils import get_openapi
from pydantic.json_schema import models_json_schema
from invokeai.app.invocations.baseinvocation import (
InvocationRegistry,
UIConfigBase,
)
from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, UIConfigBase
from invokeai.app.invocations.fields import InputFieldJSONSchemaExtra, OutputFieldJSONSchemaExtra
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.services.events.events_common import EventBase
@@ -59,14 +56,14 @@ def get_openapi_func(
invocation_output_map_required: list[str] = []
# We need to manually add all outputs to the schema - pydantic doesn't add them because they aren't used directly.
for output in InvocationRegistry.get_output_classes():
for output in BaseInvocationOutput.get_outputs():
json_schema = output.model_json_schema(mode="serialization", ref_template="#/components/schemas/{model}")
move_defs_to_top_level(openapi_schema, json_schema)
openapi_schema["components"]["schemas"][output.__name__] = json_schema
# Technically, invocations are added to the schema by pydantic, but we still need to manually set their output
# property, so we'll just do it all manually.
for invocation in InvocationRegistry.get_invocation_classes():
for invocation in BaseInvocation.get_invocations():
json_schema = invocation.model_json_schema(
mode="serialization", ref_template="#/components/schemas/{model}"
)

View File

@@ -65,6 +65,9 @@ def apply_monkeypatches() -> None:
import invokeai.backend.util.hotfixes # noqa: F401 (monkeypatching on import)
if torch.backends.mps.is_available():
import invokeai.backend.util.mps_fixes # noqa: F401 (monkeypatching on import)
def register_mime_types() -> None:
"""Register additional mime types for windows."""

View File

@@ -8,8 +8,6 @@ from invokeai.app.services.session_processor.session_processor_common import Can
from invokeai.backend.model_manager.taxonomy import BaseModelType
from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
# See scripts/generate_vae_linear_approximation.py for generating these factors.
# fast latents preview matrix for sdxl
# generated by @StAlKeR7779
SDXL_LATENT_RGB_FACTORS = [
@@ -74,32 +72,11 @@ FLUX_LATENT_RGB_FACTORS = [
[-0.1146, -0.0827, -0.0598],
]
COGVIEW4_LATENT_RGB_FACTORS = [
[0.00408832, -0.00082485, -0.00214816],
[0.00084172, 0.00132241, 0.00842067],
[-0.00466737, -0.00983181, -0.00699561],
[0.03698397, -0.04797235, 0.03585809],
[0.00234701, -0.00124326, 0.00080869],
[-0.00723903, -0.00388422, -0.00656606],
[-0.00970917, -0.00467356, -0.00971113],
[0.17292486, -0.03452463, -0.1457515],
[0.02330308, 0.02942557, 0.02704329],
[-0.00903131, -0.01499841, -0.01432564],
[0.01250298, 0.0019407, -0.02168986],
[0.01371188, 0.00498283, -0.01302135],
[0.42396525, 0.4280575, 0.42148206],
[0.00983825, 0.00613302, 0.00610316],
[0.00473307, -0.00889551, -0.00915924],
[-0.00955853, -0.00980067, -0.00977842],
]
def sample_to_lowres_estimated_image(
samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None
):
if samples.dim() == 4:
samples = samples[0]
latent_image = samples.permute(1, 2, 0) @ latent_rgb_factors
latent_image = samples[0].permute(1, 2, 0) @ latent_rgb_factors
if smooth_matrix is not None:
latent_image = latent_image.unsqueeze(0).permute(3, 0, 1, 2)
@@ -131,7 +108,7 @@ def calc_percentage(intermediate_state: PipelineIntermediateState) -> float:
SignalProgressFunc: TypeAlias = Callable[[str, float | None, Image.Image | None, tuple[int, int] | None], None]
def diffusion_step_callback(
def stable_diffusion_step_callback(
signal_progress: SignalProgressFunc,
intermediate_state: PipelineIntermediateState,
base_model: BaseModelType,
@@ -148,28 +125,39 @@ def diffusion_step_callback(
else:
sample = intermediate_state.latents
smooth_matrix: list[list[float]] | None = None
if base_model in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]:
latent_rgb_factors = SD1_5_LATENT_RGB_FACTORS
elif base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
latent_rgb_factors = SDXL_LATENT_RGB_FACTORS
smooth_matrix = SDXL_SMOOTH_MATRIX
if base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
sdxl_latent_rgb_factors = torch.tensor(SDXL_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
sdxl_smooth_matrix = torch.tensor(SDXL_SMOOTH_MATRIX, dtype=sample.dtype, device=sample.device)
image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix)
elif base_model == BaseModelType.StableDiffusion3:
latent_rgb_factors = SD3_5_LATENT_RGB_FACTORS
elif base_model == BaseModelType.CogView4:
latent_rgb_factors = COGVIEW4_LATENT_RGB_FACTORS
elif base_model == BaseModelType.Flux:
latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
sd3_latent_rgb_factors = torch.tensor(SD3_5_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
image = sample_to_lowres_estimated_image(sample, sd3_latent_rgb_factors)
else:
raise ValueError(f"Unsupported base model: {base_model}")
latent_rgb_factors_torch = torch.tensor(latent_rgb_factors, dtype=sample.dtype, device=sample.device)
smooth_matrix_torch = (
torch.tensor(smooth_matrix, dtype=sample.dtype, device=sample.device) if smooth_matrix else None
)
image = sample_to_lowres_estimated_image(
samples=sample, latent_rgb_factors=latent_rgb_factors_torch, smooth_matrix=smooth_matrix_torch
)
v1_5_latent_rgb_factors = torch.tensor(SD1_5_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors)
width = image.width * 8
height = image.height * 8
percentage = calc_percentage(intermediate_state)
signal_progress("Denoising", percentage, image, (width, height))
def flux_step_callback(
signal_progress: SignalProgressFunc,
intermediate_state: PipelineIntermediateState,
is_canceled: Callable[[], bool],
) -> None:
if is_canceled():
raise CanceledException
sample = intermediate_state.latents
latent_rgb_factors = torch.tensor(FLUX_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
latent_image_perm = sample.permute(1, 2, 0).to(dtype=sample.dtype, device=sample.device)
latent_image = latent_image_perm @ latent_rgb_factors
latents_ubyte = (
((latent_image + 1) / 2).clamp(0, 1).mul(0xFF) # change scale from -1..1 to 0..1 # to 0..255
).to(device="cpu", dtype=torch.uint8)
image = Image.fromarray(latents_ubyte.cpu().numpy())
width = image.width * 8
height = image.height * 8

View File

@@ -5,12 +5,12 @@ import torch
from tqdm import tqdm
from invokeai.backend.flux.controlnet.controlnet_flux_output import ControlNetFluxOutput, sum_controlnet_flux_outputs
from invokeai.backend.flux.extensions.inpaint_extension import InpaintExtension
from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
from invokeai.backend.flux.extensions.xlabs_controlnet_extension import XLabsControlNetExtension
from invokeai.backend.flux.extensions.xlabs_ip_adapter_extension import XLabsIPAdapterExtension
from invokeai.backend.flux.model import Flux
from invokeai.backend.rectified_flow.rectified_flow_inpaint_extension import RectifiedFlowInpaintExtension
from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
@@ -26,7 +26,7 @@ def denoise(
step_callback: Callable[[PipelineIntermediateState], None],
guidance: float,
cfg_scale: list[float],
inpaint_extension: RectifiedFlowInpaintExtension | None,
inpaint_extension: InpaintExtension | None,
controlnet_extensions: list[XLabsControlNetExtension | InstantXControlNetExtension],
pos_ip_adapter_extensions: list[XLabsIPAdapterExtension],
neg_ip_adapter_extensions: list[XLabsIPAdapterExtension],

View File

@@ -1,15 +1,8 @@
import torch
def assert_broadcastable(*shapes):
try:
torch.broadcast_shapes(*shapes)
except RuntimeError as e:
raise AssertionError(f"Shapes {shapes} are not broadcastable.") from e
class RectifiedFlowInpaintExtension:
"""A class for managing inpainting with rectified flow models (e.g. FLUX, SD3, CogView4)."""
class InpaintExtension:
"""A class for managing inpainting with FLUX."""
def __init__(self, init_latents: torch.Tensor, inpaint_mask: torch.Tensor, noise: torch.Tensor):
"""Initialize InpaintExtension.
@@ -21,8 +14,7 @@ class RectifiedFlowInpaintExtension:
inpainted region with the background. In 'packed' format.
noise (torch.Tensor): The noise tensor used to noise the init_latents. In 'packed' format.
"""
assert_broadcastable(init_latents.shape, inpaint_mask.shape, noise.shape)
assert init_latents.shape == inpaint_mask.shape == noise.shape
self._init_latents = init_latents
self._inpaint_mask = inpaint_mask
self._noise = noise

View File

@@ -1,23 +0,0 @@
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from invokeai.backend.model_manager.legacy_probe import CkptType
def get_flux_in_channels_from_state_dict(state_dict: "CkptType") -> int | None:
"""Gets the in channels from the state dict."""
# "Standard" FLUX models use "img_in.weight", but some community fine tunes use
# "model.diffusion_model.img_in.weight". Known models that use the latter key:
# - https://civitai.com/models/885098?modelVersionId=990775
# - https://civitai.com/models/1018060?modelVersionId=1596255
# - https://civitai.com/models/978314/ultrareal-fine-tune?modelVersionId=1413133
keys = {"img_in.weight", "model.diffusion_model.img_in.weight"}
for key in keys:
val = state_dict.get(key)
if val is not None:
return val.shape[1]
return None

View File

@@ -5,14 +5,62 @@ import huggingface_hub
import numpy as np
import onnxruntime as ort
import torch
from controlnet_aux.util import resize_image
from PIL import Image
from invokeai.backend.image_util.dw_openpose.onnxdet import inference_detector
from invokeai.backend.image_util.dw_openpose.onnxpose import inference_pose
from invokeai.backend.image_util.dw_openpose.utils import NDArrayInt, draw_bodypose, draw_facepose, draw_handpose
from invokeai.backend.image_util.dw_openpose.wholebody import Wholebody
from invokeai.backend.image_util.util import np_to_pil
from invokeai.backend.util.devices import TorchDevice
DWPOSE_MODELS = {
"yolox_l.onnx": "https://huggingface.co/yzd-v/DWPose/resolve/main/yolox_l.onnx?download=true",
"dw-ll_ucoco_384.onnx": "https://huggingface.co/yzd-v/DWPose/resolve/main/dw-ll_ucoco_384.onnx?download=true",
}
def draw_pose(
pose: Dict[str, NDArrayInt | Dict[str, NDArrayInt]],
H: int,
W: int,
draw_face: bool = True,
draw_body: bool = True,
draw_hands: bool = True,
resolution: int = 512,
) -> Image.Image:
bodies = pose["bodies"]
faces = pose["faces"]
hands = pose["hands"]
assert isinstance(bodies, dict)
candidate = bodies["candidate"]
assert isinstance(bodies, dict)
subset = bodies["subset"]
canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
if draw_body:
canvas = draw_bodypose(canvas, candidate, subset)
if draw_hands:
assert isinstance(hands, np.ndarray)
canvas = draw_handpose(canvas, hands)
if draw_face:
assert isinstance(hands, np.ndarray)
canvas = draw_facepose(canvas, faces) # type: ignore
dwpose_image: Image.Image = resize_image(
canvas,
resolution,
)
dwpose_image = Image.fromarray(dwpose_image)
return dwpose_image
class DWOpenposeDetector:
"""
@@ -20,6 +68,62 @@ class DWOpenposeDetector:
Credits: https://github.com/IDEA-Research/DWPose
"""
def __init__(self, onnx_det: Path, onnx_pose: Path) -> None:
self.pose_estimation = Wholebody(onnx_det=onnx_det, onnx_pose=onnx_pose)
def __call__(
self,
image: Image.Image,
draw_face: bool = False,
draw_body: bool = True,
draw_hands: bool = False,
resolution: int = 512,
) -> Image.Image:
np_image = np.array(image)
H, W, C = np_image.shape
with torch.no_grad():
candidate, subset = self.pose_estimation(np_image)
nums, keys, locs = candidate.shape
candidate[..., 0] /= float(W)
candidate[..., 1] /= float(H)
body = candidate[:, :18].copy()
body = body.reshape(nums * 18, locs)
score = subset[:, :18]
for i in range(len(score)):
for j in range(len(score[i])):
if score[i][j] > 0.3:
score[i][j] = int(18 * i + j)
else:
score[i][j] = -1
un_visible = subset < 0.3
candidate[un_visible] = -1
# foot = candidate[:, 18:24]
faces = candidate[:, 24:92]
hands = candidate[:, 92:113]
hands = np.vstack([hands, candidate[:, 113:]])
bodies = {"candidate": body, "subset": score}
pose = {"bodies": bodies, "hands": hands, "faces": faces}
return draw_pose(
pose, H, W, draw_face=draw_face, draw_hands=draw_hands, draw_body=draw_body, resolution=resolution
)
class DWOpenposeDetector2:
"""
Code from the original implementation of the DW Openpose Detector.
Credits: https://github.com/IDEA-Research/DWPose
This implementation is similar to DWOpenposeDetector, with some alterations to allow the onnx models to be loaded
and managed by the model manager.
"""
hf_repo_id = "yzd-v/DWPose"
hf_filename_onnx_det = "yolox_l.onnx"
hf_filename_onnx_pose = "dw-ll_ucoco_384.onnx"
@@ -109,7 +213,7 @@ class DWOpenposeDetector:
bodies = {"candidate": body, "subset": score}
pose = {"bodies": bodies, "hands": hands, "faces": faces}
return DWOpenposeDetector.draw_pose(
return DWOpenposeDetector2.draw_pose(
pose, H, W, draw_face=draw_face, draw_hands=draw_hands, draw_body=draw_body
)

View File

@@ -3,6 +3,7 @@
import math
import cv2
import matplotlib
import numpy as np
import numpy.typing as npt
@@ -126,13 +127,11 @@ def draw_handpose(canvas: NDArrayInt, all_hand_peaks: NDArrayInt) -> NDArrayInt:
x2 = int(x2 * W)
y2 = int(y2 * H)
if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
hsv_color = np.array([[[ie / float(len(edges)) * 180, 255, 255]]], dtype=np.uint8)
rgb_color = cv2.cvtColor(hsv_color, cv2.COLOR_HSV2RGB)[0, 0]
cv2.line(
canvas,
(x1, y1),
(x2, y2),
rgb_color.tolist(),
matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255,
thickness=2,
)

View File

@@ -0,0 +1,44 @@
# Code from the original DWPose Implementation: https://github.com/IDEA-Research/DWPose
# Modified pathing to suit Invoke
from pathlib import Path
import numpy as np
import onnxruntime as ort
from invokeai.app.services.config.config_default import get_config
from invokeai.backend.image_util.dw_openpose.onnxdet import inference_detector
from invokeai.backend.image_util.dw_openpose.onnxpose import inference_pose
from invokeai.backend.util.devices import TorchDevice
config = get_config()
class Wholebody:
def __init__(self, onnx_det: Path, onnx_pose: Path):
device = TorchDevice.choose_torch_device()
providers = ["CUDAExecutionProvider"] if device.type == "cuda" else ["CPUExecutionProvider"]
self.session_det = ort.InferenceSession(path_or_bytes=onnx_det, providers=providers)
self.session_pose = ort.InferenceSession(path_or_bytes=onnx_pose, providers=providers)
def __call__(self, oriImg):
det_result = inference_detector(self.session_det, oriImg)
keypoints, scores = inference_pose(self.session_pose, det_result, oriImg)
keypoints_info = np.concatenate((keypoints, scores[..., None]), axis=-1)
# compute neck joint
neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
# neck score when visualizing pred
neck[:, 2:4] = np.logical_and(keypoints_info[:, 5, 2:4] > 0.3, keypoints_info[:, 6, 2:4] > 0.3).astype(int)
new_keypoints_info = np.insert(keypoints_info, 17, neck, axis=1)
mmpose_idx = [17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3]
openpose_idx = [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17]
new_keypoints_info[:, openpose_idx] = new_keypoints_info[:, mmpose_idx]
keypoints_info = new_keypoints_info
keypoints, scores = keypoints_info[..., :2], keypoints_info[..., 2]
return keypoints, scores

View File

@@ -1,15 +1,26 @@
from pathlib import Path
from typing import Optional
import torch
from PIL.Image import Image
from transformers import LlavaOnevisionForConditionalGeneration, LlavaOnevisionProcessor
from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration, LlavaOnevisionProcessor
from invokeai.backend.raw_model import RawModel
class LlavaOnevisionPipeline:
"""A wrapper for a LLaVA Onevision model + processor."""
class LlavaOnevisionModel(RawModel):
def __init__(self, vllm_model: LlavaOnevisionForConditionalGeneration, processor: LlavaOnevisionProcessor):
self._vllm_model = vllm_model
self._processor = processor
@classmethod
def load_from_path(cls, path: str | Path):
vllm_model = LlavaOnevisionForConditionalGeneration.from_pretrained(path, local_files_only=True)
assert isinstance(vllm_model, LlavaOnevisionForConditionalGeneration)
processor = AutoProcessor.from_pretrained(path, local_files_only=True)
assert isinstance(processor, LlavaOnevisionProcessor)
return cls(vllm_model, processor)
def run(self, prompt: str, images: list[Image], device: torch.device, dtype: torch.dtype) -> str:
# TODO(ryand): Tune the max number of images that are useful for the model.
if len(images) > 3:
@@ -33,3 +44,13 @@ class LlavaOnevisionPipeline:
# The output_str will include the prompt, so we extract the response.
response = output_str.split("assistant\n", 1)[1].strip()
return response
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
self._vllm_model.to(device=device, dtype=dtype)
def calc_size(self) -> int:
"""Get size of the model in memory in bytes."""
# HACK(ryand): Fix this issue with circular imports.
from invokeai.backend.model_manager.load.model_util import calc_module_size
return calc_module_size(self._vllm_model)

View File

@@ -30,18 +30,19 @@ from inspect import isabstract
from pathlib import Path
from typing import ClassVar, Literal, Optional, TypeAlias, Union
import safetensors.torch
import torch
from picklescan.scanner import scan_file_path
from pydantic import BaseModel, ConfigDict, Discriminator, Field, Tag, TypeAdapter
from typing_extensions import Annotated, Any, Dict
from invokeai.app.util.misc import uuid_string
from invokeai.backend.model_hash.hash_validator import validate_hash
from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS
from invokeai.backend.model_manager.model_on_disk import ModelOnDisk
from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
from invokeai.backend.model_manager.taxonomy import (
AnyVariant,
BaseModelType,
ClipVariantType,
FluxLoRAFormat,
ModelFormat,
ModelRepoVariant,
ModelSourceType,
@@ -50,8 +51,9 @@ from invokeai.backend.model_manager.taxonomy import (
SchedulerPredictionType,
SubModelType,
)
from invokeai.backend.model_manager.util.model_util import lora_token_vector_length
from invokeai.backend.quantization.gguf.loaders import gguf_sd_loader
from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
from invokeai.backend.util.silence_warnings import SilenceWarnings
logger = logging.getLogger(__name__)
@@ -65,6 +67,11 @@ class InvalidModelConfigException(Exception):
DEFAULTS_PRECISION = Literal["fp16", "fp32"]
class FSLayout(Enum):
FILE = "file"
DIRECTORY = "directory"
class SubmodelDefinition(BaseModel):
path_or_prefix: str
model_type: ModelType
@@ -95,6 +102,87 @@ class ControlAdapterDefaultSettings(BaseModel):
model_config = ConfigDict(extra="forbid")
class ModelOnDisk:
"""A utility class representing a model stored on disk."""
def __init__(self, path: Path, hash_algo: HASHING_ALGORITHMS = "blake3_single"):
self.path = path
# TODO: Revisit checkpoint vs diffusers terminology
self.layout = FSLayout.DIRECTORY if path.is_dir() else FSLayout.FILE
if self.path.suffix in {".safetensors", ".bin", ".pt", ".ckpt"}:
self.name = path.stem
else:
self.name = path.name
self.hash_algo = hash_algo
self._state_dict_cache = {}
def hash(self) -> str:
return ModelHash(algorithm=self.hash_algo).hash(self.path)
def size(self) -> int:
if self.layout == FSLayout.FILE:
return self.path.stat().st_size
return sum(file.stat().st_size for file in self.path.rglob("*"))
def component_paths(self) -> set[Path]:
if self.layout == FSLayout.FILE:
return {self.path}
extensions = {".safetensors", ".pt", ".pth", ".ckpt", ".bin", ".gguf"}
return {f for f in self.path.rglob("*") if f.suffix in extensions}
def repo_variant(self) -> Optional[ModelRepoVariant]:
if self.layout == FSLayout.FILE:
return None
weight_files = list(self.path.glob("**/*.safetensors"))
weight_files.extend(list(self.path.glob("**/*.bin")))
for x in weight_files:
if ".fp16" in x.suffixes:
return ModelRepoVariant.FP16
if "openvino_model" in x.name:
return ModelRepoVariant.OpenVINO
if "flax_model" in x.name:
return ModelRepoVariant.Flax
if x.suffix == ".onnx":
return ModelRepoVariant.ONNX
return ModelRepoVariant.Default
def load_state_dict(self, path: Optional[Path] = None) -> Dict[str | int, Any]:
if path in self._state_dict_cache:
return self._state_dict_cache[path]
if not path:
components = list(self.component_paths())
match components:
case []:
raise ValueError("No weight files found for this model")
case [p]:
path = p
case ps if len(ps) >= 2:
raise ValueError(
f"Multiple weight files found for this model: {ps}. "
f"Please specify the intended file using the 'path' argument"
)
with SilenceWarnings():
if path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")):
scan_result = scan_file_path(path)
if scan_result.infected_files != 0 or scan_result.scan_err:
raise RuntimeError(f"The model {path.stem} is potentially infected by malware. Aborting import.")
checkpoint = torch.load(path, map_location="cpu")
assert isinstance(checkpoint, dict)
elif path.suffix.endswith(".gguf"):
checkpoint = gguf_sd_loader(path, compute_dtype=torch.float32)
elif path.suffix.endswith(".safetensors"):
checkpoint = safetensors.torch.load_file(path)
else:
raise ValueError(f"Unrecognized model extension: {path.suffix}")
state_dict = checkpoint.get("state_dict", checkpoint)
self._state_dict_cache[path] = state_dict
return state_dict
class MatchSpeed(int, Enum):
"""Represents the estimated runtime speed of a config's 'matches' method."""
@@ -128,7 +216,6 @@ class ModelConfigBase(ABC, BaseModel):
path: str = Field(
description="Path to the model on the filesystem. Relative paths are relative to the Invoke root directory."
)
file_size: int = Field(description="The size of the model in bytes.")
name: str = Field(description="Name of the model.")
type: ModelType = Field(description="Model type")
format: ModelFormat = Field(description="Model format")
@@ -170,7 +257,7 @@ class ModelConfigBase(ABC, BaseModel):
Created to deprecate ModelProbe.probe
"""
candidates = ModelConfigBase._USING_CLASSIFY_API
sorted_by_match_speed = sorted(candidates, key=lambda cls: (cls._MATCH_SPEED, cls.__name__))
sorted_by_match_speed = sorted(candidates, key=lambda cls: cls._MATCH_SPEED)
mod = ModelOnDisk(model_path, hash_algo)
for config_cls in sorted_by_match_speed:
@@ -221,9 +308,6 @@ class ModelConfigBase(ABC, BaseModel):
if "source_type" in overrides:
overrides["source_type"] = ModelSourceType(overrides["source_type"])
if "variant" in overrides:
overrides["variant"] = ModelVariantType(overrides["variant"])
@classmethod
def from_model_on_disk(cls, mod: ModelOnDisk, **overrides):
"""Creates an instance of this config or raises InvalidModelConfigException."""
@@ -242,7 +326,6 @@ class ModelConfigBase(ABC, BaseModel):
fields["key"] = fields.get("key") or uuid_string()
fields["description"] = fields.get("description") or f"{base.value} {type.value} model {name}"
fields["repo_variant"] = fields.get("repo_variant") or mod.repo_variant()
fields["file_size"] = fields.get("file_size") or mod.size()
return cls(**fields)
@@ -284,38 +367,6 @@ class LoRAConfigBase(ABC, BaseModel):
type: Literal[ModelType.LoRA] = ModelType.LoRA
trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None)
@classmethod
def flux_lora_format(cls, mod: ModelOnDisk):
key = "FLUX_LORA_FORMAT"
if key in mod.cache:
return mod.cache[key]
from invokeai.backend.patches.lora_conversions.formats import flux_format_from_state_dict
sd = mod.load_state_dict(mod.path)
value = flux_format_from_state_dict(sd)
mod.cache[key] = value
return value
@classmethod
def base_model(cls, mod: ModelOnDisk) -> BaseModelType:
if cls.flux_lora_format(mod):
return BaseModelType.Flux
state_dict = mod.load_state_dict()
# If we've gotten here, we assume that the model is a Stable Diffusion model
token_vector_length = lora_token_vector_length(state_dict)
if token_vector_length == 768:
return BaseModelType.StableDiffusion1
elif token_vector_length == 1024:
return BaseModelType.StableDiffusion2
elif token_vector_length == 1280:
return BaseModelType.StableDiffusionXL # recognizes format at https://civitai.com/models/224641
elif token_vector_length == 2048:
return BaseModelType.StableDiffusionXL
else:
raise InvalidModelConfigException("Unknown LoRA type")
class T5EncoderConfigBase(ABC, BaseModel):
"""Base class for diffusers-style models."""
@@ -331,40 +382,11 @@ class T5EncoderBnbQuantizedLlmInt8bConfig(T5EncoderConfigBase, LegacyProbeMixin,
format: Literal[ModelFormat.BnbQuantizedLlmInt8b] = ModelFormat.BnbQuantizedLlmInt8b
class LoRALyCORISConfig(LoRAConfigBase, ModelConfigBase):
class LoRALyCORISConfig(LoRAConfigBase, LegacyProbeMixin, ModelConfigBase):
"""Model config for LoRA/Lycoris models."""
format: Literal[ModelFormat.LyCORIS] = ModelFormat.LyCORIS
@classmethod
def matches(cls, mod: ModelOnDisk) -> bool:
if mod.path.is_dir():
return False
# Avoid false positive match against ControlLoRA and Diffusers
if cls.flux_lora_format(mod) in [FluxLoRAFormat.Control, FluxLoRAFormat.Diffusers]:
return False
state_dict = mod.load_state_dict()
for key in state_dict.keys():
if type(key) is int:
continue
if key.startswith(("lora_te_", "lora_unet_", "lora_te1_", "lora_te2_", "lora_transformer_")):
return True
# "lora_A.weight" and "lora_B.weight" are associated with models in PEFT format. We don't support all PEFT
# LoRA models, but as of the time of writing, we support Diffusers FLUX PEFT LoRA models.
if key.endswith(("to_k_lora.up.weight", "to_q_lora.down.weight", "lora_A.weight", "lora_B.weight")):
return True
return False
@classmethod
def parse(cls, mod: ModelOnDisk) -> dict[str, Any]:
return {
"base": cls.base_model(mod),
}
class ControlAdapterConfigBase(ABC, BaseModel):
default_settings: Optional[ControlAdapterDefaultSettings] = Field(
@@ -388,26 +410,11 @@ class ControlLoRADiffusersConfig(ControlAdapterConfigBase, LegacyProbeMixin, Mod
format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers
class LoRADiffusersConfig(LoRAConfigBase, ModelConfigBase):
class LoRADiffusersConfig(LoRAConfigBase, LegacyProbeMixin, ModelConfigBase):
"""Model config for LoRA/Diffusers models."""
format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers
@classmethod
def matches(cls, mod: ModelOnDisk) -> bool:
if mod.path.is_file():
return cls.flux_lora_format(mod) == FluxLoRAFormat.Diffusers
suffixes = ["bin", "safetensors"]
weight_files = [mod.path / f"pytorch_lora_weights.{sfx}" for sfx in suffixes]
return any(wf.exists() for wf in weight_files)
@classmethod
def parse(cls, mod: ModelOnDisk) -> dict[str, Any]:
return {
"base": cls.base_model(mod),
}
class VAECheckpointConfig(CheckpointConfigBase, LegacyProbeMixin, ModelConfigBase):
"""Model config for standalone VAE models."""
@@ -579,7 +586,7 @@ class LlavaOnevisionConfig(DiffusersConfigBase, ModelConfigBase):
@classmethod
def matches(cls, mod: ModelOnDisk) -> bool:
if mod.path.is_file():
if mod.layout == FSLayout.FILE:
return False
config_path = mod.path / "config.json"

View File

@@ -14,7 +14,6 @@ from invokeai.backend.flux.controlnet.state_dict_utils import (
is_state_dict_instantx_controlnet,
is_state_dict_xlabs_controlnet,
)
from invokeai.backend.flux.flux_state_dict_utils import get_flux_in_channels_from_state_dict
from invokeai.backend.flux.ip_adapter.state_dict_utils import is_state_dict_xlabs_ip_adapter
from invokeai.backend.flux.redux.flux_redux_state_dict_utils import is_state_dict_likely_flux_redux
from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
@@ -27,7 +26,6 @@ from invokeai.backend.model_manager.config import (
SubmodelDefinition,
)
from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import ConfigLoader
from invokeai.backend.model_manager.model_on_disk import ModelOnDisk
from invokeai.backend.model_manager.taxonomy import (
AnyVariant,
BaseModelType,
@@ -146,7 +144,6 @@ class ModelProbe(object):
"CLIPTextModelWithProjection": ModelType.CLIPEmbed,
"SiglipModel": ModelType.SigLIP,
"LlavaOnevisionForConditionalGeneration": ModelType.LlavaOnevision,
"CogView4Pipeline": ModelType.Main,
}
TYPE2VARIANT: Dict[ModelType, Callable[[str], Optional[AnyVariant]]] = {ModelType.CLIPEmbed: get_clip_variant_type}
@@ -209,7 +206,6 @@ class ModelProbe(object):
)
fields["format"] = ModelFormat(fields.get("format")) if "format" in fields else probe.get_format()
fields["hash"] = fields.get("hash") or ModelHash(algorithm=hash_algo).hash(model_path)
fields["file_size"] = fields.get("file_size") or ModelOnDisk(model_path).size()
fields["default_settings"] = fields.get("default_settings")
@@ -568,14 +564,7 @@ class CheckpointProbeBase(ProbeBase):
state_dict = self.checkpoint.get("state_dict") or self.checkpoint
if base_type == BaseModelType.Flux:
in_channels = get_flux_in_channels_from_state_dict(state_dict)
if in_channels is None:
# If we cannot find the in_channels, we assume that this is a normal variant. Log a warning.
logger.warning(
f"{self.model_path} does not have img_in.weight or model.diffusion_model.img_in.weight key. Assuming normal variant."
)
return ModelVariantType.Normal
in_channels = state_dict["img_in.weight"].shape[1]
# FLUX Model variant types are distinguished by input channels:
# - Unquantized Dev and Schnell have in_channels=64
@@ -859,8 +848,6 @@ class PipelineFolderProbe(FolderProbeBase):
transformer_conf = json.load(file)
if transformer_conf["_class_name"] == "SD3Transformer2DModel":
return BaseModelType.StableDiffusion3
elif transformer_conf["_class_name"] == "CogView4Transformer2DModel":
return BaseModelType.CogView4
else:
raise InvalidModelConfigException(f"Unknown base model for {self.model_path}")

View File

@@ -1,60 +0,0 @@
from pathlib import Path
from typing import Optional
import torch
from invokeai.backend.model_manager.config import (
AnyModelConfig,
CheckpointConfigBase,
DiffusersConfigBase,
)
from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader
from invokeai.backend.model_manager.taxonomy import (
AnyModel,
BaseModelType,
ModelFormat,
ModelType,
SubModelType,
)
@ModelLoaderRegistry.register(base=BaseModelType.CogView4, type=ModelType.Main, format=ModelFormat.Diffusers)
class CogView4DiffusersModel(GenericDiffusersLoader):
"""Class to load CogView4 main models."""
def _load_model(
self,
config: AnyModelConfig,
submodel_type: Optional[SubModelType] = None,
) -> AnyModel:
if isinstance(config, CheckpointConfigBase):
raise NotImplementedError("CheckpointConfigBase is not implemented for CogView4 models.")
if submodel_type is None:
raise Exception("A submodel type must be provided when loading main pipelines.")
model_path = Path(config.path)
load_class = self.get_hf_load_class(model_path, submodel_type)
repo_variant = config.repo_variant if isinstance(config, DiffusersConfigBase) else None
variant = repo_variant.value if repo_variant else None
model_path = model_path / submodel_type.value
# We force bfloat16 for CogView4 models. It produces black images with float16. I haven't tracked down
# specifically which model(s) is/are responsible.
dtype = torch.bfloat16
try:
result: AnyModel = load_class.from_pretrained(
model_path,
torch_dtype=dtype,
variant=variant,
)
except OSError as e:
if variant and "no file named" in str(
e
): # try without the variant, just in case user's preferences changed
result = load_class.from_pretrained(model_path, torch_dtype=dtype)
else:
raise e
return result

View File

@@ -1,8 +1,7 @@
from pathlib import Path
from typing import Optional
from transformers import LlavaOnevisionForConditionalGeneration
from invokeai.backend.llava_onevision_model import LlavaOnevisionModel
from invokeai.backend.model_manager.config import (
AnyModelConfig,
)
@@ -24,8 +23,6 @@ class LlavaOnevisionModelLoader(ModelLoader):
raise ValueError("Unexpected submodel requested for LLaVA OneVision model.")
model_path = Path(config.path)
model = LlavaOnevisionForConditionalGeneration.from_pretrained(
model_path, local_files_only=True, torch_dtype=self._torch_dtype
)
assert isinstance(model, LlavaOnevisionForConditionalGeneration)
model = LlavaOnevisionModel.load_from_path(model_path)
model.to(dtype=self._torch_dtype)
return model

View File

@@ -1,14 +1,13 @@
from pathlib import Path
from typing import Optional
from transformers import SiglipVisionModel
from invokeai.backend.model_manager.config import (
AnyModelConfig,
)
from invokeai.backend.model_manager.load.load_default import ModelLoader
from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
from invokeai.backend.model_manager.taxonomy import AnyModel, BaseModelType, ModelFormat, ModelType, SubModelType
from invokeai.backend.sig_lip.sig_lip_pipeline import SigLipPipeline
@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.SigLIP, format=ModelFormat.Diffusers)
@@ -24,5 +23,6 @@ class SigLIPModelLoader(ModelLoader):
raise ValueError("Unexpected submodel requested for LLaVA OneVision model.")
model_path = Path(config.path)
model = SiglipVisionModel.from_pretrained(model_path, local_files_only=True, torch_dtype=self._torch_dtype)
model = SigLipPipeline.load_from_path(model_path)
model.to(dtype=self._torch_dtype)
return model

View File

@@ -6,7 +6,6 @@ import logging
from pathlib import Path
from typing import Optional
import onnxruntime as ort
import torch
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
from diffusers.schedulers.scheduling_utils import SchedulerMixin
@@ -16,9 +15,11 @@ from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import D
from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
from invokeai.backend.llava_onevision_model import LlavaOnevisionModel
from invokeai.backend.model_manager.taxonomy import AnyModel
from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
from invokeai.backend.sig_lip.sig_lip_pipeline import SigLipPipeline
from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel
from invokeai.backend.textual_inversion import TextualInversionModelRaw
from invokeai.backend.util.calc_tensor_size import calc_tensor_size
@@ -49,19 +50,11 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
GroundingDinoPipeline,
SegmentAnythingPipeline,
DepthAnythingPipeline,
SigLipPipeline,
LlavaOnevisionModel,
),
):
return model.calc_size()
elif isinstance(model, ort.InferenceSession):
if model._model_bytes is not None:
# If the model is already loaded, return the size of the model bytes
return len(model._model_bytes)
elif model._model_path is not None:
# If the model is not loaded, return the size of the model path
return calc_model_size_by_fs(Path(model._model_path))
else:
# If neither is available, return 0
return 0
elif isinstance(
model,
(

View File

@@ -1,96 +0,0 @@
from pathlib import Path
from typing import Any, Optional, TypeAlias
import safetensors.torch
import torch
from picklescan.scanner import scan_file_path
from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
from invokeai.backend.model_manager.taxonomy import ModelRepoVariant
from invokeai.backend.quantization.gguf.loaders import gguf_sd_loader
from invokeai.backend.util.silence_warnings import SilenceWarnings
StateDict: TypeAlias = dict[str | int, Any] # When are the keys int?
class ModelOnDisk:
"""A utility class representing a model stored on disk."""
def __init__(self, path: Path, hash_algo: HASHING_ALGORITHMS = "blake3_single"):
self.path = path
if self.path.suffix in {".safetensors", ".bin", ".pt", ".ckpt"}:
self.name = path.stem
else:
self.name = path.name
self.hash_algo = hash_algo
# Having a cache helps users of ModelOnDisk (i.e. configs) to save state
# This prevents redundant computations during matching and parsing
self.cache = {"_CACHED_STATE_DICTS": {}}
def hash(self) -> str:
return ModelHash(algorithm=self.hash_algo).hash(self.path)
def size(self) -> int:
if self.path.is_file():
return self.path.stat().st_size
return sum(file.stat().st_size for file in self.path.rglob("*"))
def component_paths(self) -> set[Path]:
if self.path.is_file():
return {self.path}
extensions = {".safetensors", ".pt", ".pth", ".ckpt", ".bin", ".gguf"}
return {f for f in self.path.rglob("*") if f.suffix in extensions}
def repo_variant(self) -> Optional[ModelRepoVariant]:
if self.path.is_file():
return None
weight_files = list(self.path.glob("**/*.safetensors"))
weight_files.extend(list(self.path.glob("**/*.bin")))
for x in weight_files:
if ".fp16" in x.suffixes:
return ModelRepoVariant.FP16
if "openvino_model" in x.name:
return ModelRepoVariant.OpenVINO
if "flax_model" in x.name:
return ModelRepoVariant.Flax
if x.suffix == ".onnx":
return ModelRepoVariant.ONNX
return ModelRepoVariant.Default
def load_state_dict(self, path: Optional[Path] = None) -> StateDict:
sd_cache = self.cache["_CACHED_STATE_DICTS"]
if path in sd_cache:
return sd_cache[path]
if not path:
components = list(self.component_paths())
match components:
case []:
raise ValueError("No weight files found for this model")
case [p]:
path = p
case ps if len(ps) >= 2:
raise ValueError(
f"Multiple weight files found for this model: {ps}. "
f"Please specify the intended file using the 'path' argument"
)
with SilenceWarnings():
if path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")):
scan_result = scan_file_path(path)
if scan_result.infected_files != 0 or scan_result.scan_err:
raise RuntimeError(f"The model {path.stem} is potentially infected by malware. Aborting import.")
checkpoint = torch.load(path, map_location="cpu")
assert isinstance(checkpoint, dict)
elif path.suffix.endswith(".gguf"):
checkpoint = gguf_sd_loader(path, compute_dtype=torch.float32)
elif path.suffix.endswith(".safetensors"):
checkpoint = safetensors.torch.load_file(path)
else:
raise ValueError(f"Unrecognized model extension: {path.suffix}")
state_dict = checkpoint.get("state_dict", checkpoint)
sd_cache[path] = state_dict
return state_dict

View File

@@ -593,16 +593,6 @@ swinir = StarterModel(
# endregion
# region CogView4
cogview4 = StarterModel(
name="CogView4",
base=BaseModelType.CogView4,
source="THUDM/CogView4-6B",
description="The base CogView4 model (~29GB).",
type=ModelType.Main,
)
# endregion
# region SigLIP
siglip = StarterModel(
name="SigLIP - google/siglip-so400m-patch14-384",
@@ -715,7 +705,6 @@ STARTER_MODELS: list[StarterModel] = [
flux_redux,
llava_onevision,
flux_fill,
cogview4,
]
sd1_bundle: list[StarterModel] = [

View File

@@ -25,7 +25,6 @@ class BaseModelType(str, Enum):
StableDiffusionXL = "sdxl"
StableDiffusionXLRefiner = "sdxl-refiner"
Flux = "flux"
CogView4 = "cogview4"
# Kandinsky2_1 = "kandinsky-2.1"
@@ -127,13 +126,4 @@ class ModelSourceType(str, Enum):
HFRepoID = "hf_repo_id"
class FluxLoRAFormat(str, Enum):
"""Flux LoRA formats."""
Diffusers = "flux.diffusers"
Kohya = "flux.kohya"
OneTrainer = "flux.onetrainer"
Control = "flux.control"
AnyVariant: TypeAlias = Union[ModelVariantType, ClipVariantType, None]

View File

@@ -1,24 +0,0 @@
from invokeai.backend.model_manager.taxonomy import FluxLoRAFormat
from invokeai.backend.patches.lora_conversions.flux_control_lora_utils import is_state_dict_likely_flux_control
from invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils import (
is_state_dict_likely_in_flux_diffusers_format,
)
from invokeai.backend.patches.lora_conversions.flux_kohya_lora_conversion_utils import (
is_state_dict_likely_in_flux_kohya_format,
)
from invokeai.backend.patches.lora_conversions.flux_onetrainer_lora_conversion_utils import (
is_state_dict_likely_in_flux_onetrainer_format,
)
def flux_format_from_state_dict(state_dict):
if is_state_dict_likely_in_flux_kohya_format(state_dict):
return FluxLoRAFormat.Kohya
elif is_state_dict_likely_in_flux_onetrainer_format(state_dict):
return FluxLoRAFormat.OneTrainer
elif is_state_dict_likely_in_flux_diffusers_format(state_dict):
return FluxLoRAFormat.Diffusers
elif is_state_dict_likely_flux_control(state_dict):
return FluxLoRAFormat.Control
else:
return None

View File

View File

@@ -0,0 +1,58 @@
import torch
class InpaintExtension:
"""A class for managing inpainting with SD3."""
def __init__(self, init_latents: torch.Tensor, inpaint_mask: torch.Tensor, noise: torch.Tensor):
"""Initialize InpaintExtension.
Args:
init_latents (torch.Tensor): The initial latents (i.e. un-noised at timestep 0).
inpaint_mask (torch.Tensor): A mask specifying which elements to inpaint. Range [0, 1]. Values of 1 will be
re-generated. Values of 0 will remain unchanged. Values between 0 and 1 can be used to blend the
inpainted region with the background.
noise (torch.Tensor): The noise tensor used to noise the init_latents.
"""
assert init_latents.dim() == inpaint_mask.dim() == noise.dim() == 4
assert init_latents.shape[-2:] == inpaint_mask.shape[-2:] == noise.shape[-2:]
self._init_latents = init_latents
self._inpaint_mask = inpaint_mask
self._noise = noise
def _apply_mask_gradient_adjustment(self, t_prev: float) -> torch.Tensor:
"""Applies inpaint mask gradient adjustment and returns the inpaint mask to be used at the current timestep."""
# As we progress through the denoising process, we promote gradient regions of the mask to have a full weight of
# 1.0. This helps to produce more coherent seams around the inpainted region. We experimented with a (small)
# number of promotion strategies (e.g. gradual promotion based on timestep), but found that a simple cutoff
# threshold worked well.
# We use a small epsilon to avoid any potential issues with floating point precision.
eps = 1e-4
mask_gradient_t_cutoff = 0.5
if t_prev > mask_gradient_t_cutoff:
# Early in the denoising process, use the inpaint mask as-is.
return self._inpaint_mask
else:
# After the cut-off, promote all non-zero mask values to 1.0.
mask = self._inpaint_mask.where(self._inpaint_mask <= (0.0 + eps), 1.0)
return mask
def merge_intermediate_latents_with_init_latents(
self, intermediate_latents: torch.Tensor, t_prev: float
) -> torch.Tensor:
"""Merge the intermediate latents with the initial latents for the current timestep using the inpaint mask. I.e.
update the intermediate latents to keep the regions that are not being inpainted on the correct noise
trajectory.
This function should be called after each denoising step.
"""
mask = self._apply_mask_gradient_adjustment(t_prev)
# Noise the init latents for the current timestep.
noised_init_latents = self._noise * t_prev + (1.0 - t_prev) * self._init_latents
# Merge the intermediate latents with the noised_init_latents using the inpaint_mask.
return intermediate_latents * mask + noised_init_latents * (1.0 - mask)

View File

@@ -1,9 +1,14 @@
from pathlib import Path
from typing import Optional
import torch
from PIL import Image
from transformers import SiglipImageProcessor, SiglipVisionModel
from invokeai.backend.raw_model import RawModel
class SigLipPipeline:
class SigLipPipeline(RawModel):
"""A wrapper for a SigLIP model + processor."""
def __init__(
@@ -14,7 +19,25 @@ class SigLipPipeline:
self._siglip_processor = siglip_processor
self._siglip_model = siglip_model
@classmethod
def load_from_path(cls, path: str | Path):
siglip_model = SiglipVisionModel.from_pretrained(path, local_files_only=True)
assert isinstance(siglip_model, SiglipVisionModel)
siglip_processor = SiglipImageProcessor.from_pretrained(path, local_files_only=True)
assert isinstance(siglip_processor, SiglipImageProcessor)
return cls(siglip_processor, siglip_model)
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
self._siglip_model.to(device=device, dtype=dtype)
def encode_image(self, x: Image.Image, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
imgs = self._siglip_processor.preprocess(images=[x], do_resize=True, return_tensors="pt", do_convert_rgb=True)
encoded_x = self._siglip_model(**imgs.to(device=device, dtype=dtype)).last_hidden_state
return encoded_x
def calc_size(self) -> int:
"""Get size of the model in memory in bytes."""
# HACK(ryand): Fix this issue with circular imports.
from invokeai.backend.model_manager.load.model_util import calc_module_size
return calc_module_size(self._siglip_model)

View File

@@ -67,26 +67,13 @@ class SD3ConditioningInfo:
return self
@dataclass
class CogView4ConditioningInfo:
glm_embeds: torch.Tensor
def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
self.glm_embeds = self.glm_embeds.to(device=device, dtype=dtype)
return self
@dataclass
class ConditioningFieldData:
# If you change this class, adding more types, you _must_ update the instantiation of ObjectSerializerDisk in
# invokeai/app/api/dependencies.py, adding the types to the list of safe globals. If you do not, torch will be
# unable to deserialize the object and will raise an error.
conditionings: (
List[BasicConditioningInfo]
| List[SDXLConditioningInfo]
| List[FLUXConditioningInfo]
| List[SD3ConditioningInfo]
| List[CogView4ConditioningInfo]
)

View File

@@ -0,0 +1,245 @@
import math
import diffusers
import torch
if torch.backends.mps.is_available():
torch.empty = torch.zeros
_torch_layer_norm = torch.nn.functional.layer_norm
def new_layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-05):
if input.device.type == "mps" and input.dtype == torch.float16:
input = input.float()
if weight is not None:
weight = weight.float()
if bias is not None:
bias = bias.float()
return _torch_layer_norm(input, normalized_shape, weight, bias, eps).half()
else:
return _torch_layer_norm(input, normalized_shape, weight, bias, eps)
torch.nn.functional.layer_norm = new_layer_norm
_torch_tensor_permute = torch.Tensor.permute
def new_torch_tensor_permute(input, *dims):
result = _torch_tensor_permute(input, *dims)
if input.device == "mps" and input.dtype == torch.float16:
result = result.contiguous()
return result
torch.Tensor.permute = new_torch_tensor_permute
_torch_lerp = torch.lerp
def new_torch_lerp(input, end, weight, *, out=None):
if input.device.type == "mps" and input.dtype == torch.float16:
input = input.float()
end = end.float()
if isinstance(weight, torch.Tensor):
weight = weight.float()
if out is not None:
out_fp32 = torch.zeros_like(out, dtype=torch.float32)
else:
out_fp32 = None
result = _torch_lerp(input, end, weight, out=out_fp32)
if out is not None:
out.copy_(out_fp32.half())
del out_fp32
return result.half()
else:
return _torch_lerp(input, end, weight, out=out)
torch.lerp = new_torch_lerp
_torch_interpolate = torch.nn.functional.interpolate
def new_torch_interpolate(
input,
size=None,
scale_factor=None,
mode="nearest",
align_corners=None,
recompute_scale_factor=None,
antialias=False,
):
if input.device.type == "mps" and input.dtype == torch.float16:
return _torch_interpolate(
input.float(), size, scale_factor, mode, align_corners, recompute_scale_factor, antialias
).half()
else:
return _torch_interpolate(input, size, scale_factor, mode, align_corners, recompute_scale_factor, antialias)
torch.nn.functional.interpolate = new_torch_interpolate
# TODO: refactor it
_SlicedAttnProcessor = diffusers.models.attention_processor.SlicedAttnProcessor
class ChunkedSlicedAttnProcessor:
r"""
Processor for implementing sliced attention.
Args:
slice_size (`int`, *optional*):
The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
`attention_head_dim` must be a multiple of the `slice_size`.
"""
def __init__(self, slice_size):
assert isinstance(slice_size, int)
slice_size = 1 # TODO: maybe implement chunking in batches too when enough memory
self.slice_size = slice_size
self._sliced_attn_processor = _SlicedAttnProcessor(slice_size)
def __call__(self, attn, hidden_states, encoder_hidden_states=None, attention_mask=None):
if self.slice_size != 1 or attn.upcast_attention:
return self._sliced_attn_processor(attn, hidden_states, encoder_hidden_states, attention_mask)
residual = hidden_states
input_ndim = hidden_states.ndim
if input_ndim == 4:
batch_size, channel, height, width = hidden_states.shape
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
batch_size, sequence_length, _ = (
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
)
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
if attn.group_norm is not None:
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
query = attn.to_q(hidden_states)
dim = query.shape[-1]
query = attn.head_to_batch_dim(query)
if encoder_hidden_states is None:
encoder_hidden_states = hidden_states
elif attn.norm_cross:
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
key = attn.to_k(encoder_hidden_states)
value = attn.to_v(encoder_hidden_states)
key = attn.head_to_batch_dim(key)
value = attn.head_to_batch_dim(value)
batch_size_attention, query_tokens, _ = query.shape
hidden_states = torch.zeros(
(batch_size_attention, query_tokens, dim // attn.heads), device=query.device, dtype=query.dtype
)
chunk_tmp_tensor = torch.empty(
self.slice_size, query.shape[1], key.shape[1], dtype=query.dtype, device=query.device
)
for i in range(batch_size_attention // self.slice_size):
start_idx = i * self.slice_size
end_idx = (i + 1) * self.slice_size
query_slice = query[start_idx:end_idx]
key_slice = key[start_idx:end_idx]
attn_mask_slice = attention_mask[start_idx:end_idx] if attention_mask is not None else None
self.get_attention_scores_chunked(
attn,
query_slice,
key_slice,
attn_mask_slice,
hidden_states[start_idx:end_idx],
value[start_idx:end_idx],
chunk_tmp_tensor,
)
hidden_states = attn.batch_to_head_dim(hidden_states)
# linear proj
hidden_states = attn.to_out[0](hidden_states)
# dropout
hidden_states = attn.to_out[1](hidden_states)
if input_ndim == 4:
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
if attn.residual_connection:
hidden_states = hidden_states + residual
hidden_states = hidden_states / attn.rescale_output_factor
return hidden_states
def get_attention_scores_chunked(self, attn, query, key, attention_mask, hidden_states, value, chunk):
# batch size = 1
assert query.shape[0] == 1
assert key.shape[0] == 1
assert value.shape[0] == 1
assert hidden_states.shape[0] == 1
# dtype = query.dtype
if attn.upcast_attention:
query = query.float()
key = key.float()
# out_item_size = query.dtype.itemsize
# if attn.upcast_attention:
# out_item_size = torch.float32.itemsize
out_item_size = query.element_size()
if attn.upcast_attention:
out_item_size = 4
chunk_size = 2**29
out_size = query.shape[1] * key.shape[1] * out_item_size
chunks_count = min(query.shape[1], math.ceil((out_size - 1) / chunk_size))
chunk_step = max(1, int(query.shape[1] / chunks_count))
key = key.transpose(-1, -2)
def _get_chunk_view(tensor, start, length):
if start + length > tensor.shape[1]:
length = tensor.shape[1] - start
# print(f"view: [{tensor.shape[0]},{tensor.shape[1]},{tensor.shape[2]}] - start: {start}, length: {length}")
return tensor[:, start : start + length]
for chunk_pos in range(0, query.shape[1], chunk_step):
if attention_mask is not None:
torch.baddbmm(
_get_chunk_view(attention_mask, chunk_pos, chunk_step),
_get_chunk_view(query, chunk_pos, chunk_step),
key,
beta=1,
alpha=attn.scale,
out=chunk,
)
else:
torch.baddbmm(
torch.zeros((1, 1, 1), device=query.device, dtype=query.dtype),
_get_chunk_view(query, chunk_pos, chunk_step),
key,
beta=0,
alpha=attn.scale,
out=chunk,
)
chunk = chunk.softmax(dim=-1)
torch.bmm(chunk, value, out=_get_chunk_view(hidden_states, chunk_pos, chunk_step))
# del chunk
diffusers.models.attention_processor.SlicedAttnProcessor = ChunkedSlicedAttnProcessor

View File

@@ -62,12 +62,11 @@
"@nanostores/react": "^0.7.3",
"@reduxjs/toolkit": "2.6.1",
"@roarr/browser-log-writer": "^1.3.0",
"@xyflow/react": "^12.5.3",
"@xyflow/react": "^12.4.2",
"async-mutex": "^0.5.0",
"chakra-react-select": "^4.9.2",
"cmdk": "^1.0.0",
"compare-versions": "^6.1.1",
"filesize": "^10.1.6",
"fracturedjsonjs": "^4.0.2",
"framer-motion": "^11.10.0",
"i18next": "^23.15.1",
@@ -151,7 +150,7 @@
"prettier": "^3.3.3",
"rollup-plugin-visualizer": "^5.12.0",
"storybook": "^8.3.4",
"tsafe": "^1.8.5",
"tsafe": "^1.7.5",
"type-fest": "^4.26.1",
"typescript": "^5.6.2",
"vite": "^6.1.0",
@@ -163,6 +162,5 @@
},
"engines": {
"pnpm": "8"
},
"packageManager": "pnpm@8.15.9+sha512.499434c9d8fdd1a2794ebf4552b3b25c0a633abcee5bb15e7b5de90f32f47b513aca98cd5cfd001c31f0db454bc3804edccd578501e4ca293a6816166bbd9f81"
}
}

View File

@@ -36,8 +36,8 @@ dependencies:
specifier: ^1.3.0
version: 1.3.0
'@xyflow/react':
specifier: ^12.5.3
version: 12.5.3(@types/react@18.3.11)(react-dom@18.3.1)(react@18.3.1)
specifier: ^12.4.2
version: 12.4.2(@types/react@18.3.11)(react-dom@18.3.1)(react@18.3.1)
async-mutex:
specifier: ^0.5.0
version: 0.5.0
@@ -50,9 +50,6 @@ dependencies:
compare-versions:
specifier: ^6.1.1
version: 6.1.1
filesize:
specifier: ^10.1.6
version: 10.1.6
fracturedjsonjs:
specifier: ^4.0.2
version: 4.0.2
@@ -287,8 +284,8 @@ devDependencies:
specifier: ^8.3.4
version: 8.3.4
tsafe:
specifier: ^1.8.5
version: 1.8.5
specifier: ^1.7.5
version: 1.7.5
type-fest:
specifier: ^4.26.1
version: 4.26.1
@@ -3326,7 +3323,7 @@ packages:
/@types/d3-drag@3.0.7:
resolution: {integrity: sha512-HE3jVKlzU9AaMazNufooRJ5ZpWmLIoc90A37WU2JMmeq28w1FQqCZswHZ3xR+SuxYftzHq6WU6KJHvqxKzTxxQ==}
dependencies:
'@types/d3-selection': 3.0.11
'@types/d3-selection': 3.0.10
dev: false
/@types/d3-interpolate@3.0.4:
@@ -3335,21 +3332,21 @@ packages:
'@types/d3-color': 3.1.3
dev: false
/@types/d3-selection@3.0.11:
resolution: {integrity: sha512-bhAXu23DJWsrI45xafYpkQ4NtcKMwWnAC/vKrd2l+nxMFuvOT3XMYTIj2opv8vq8AO5Yh7Qac/nSeP/3zjTK0w==}
/@types/d3-selection@3.0.10:
resolution: {integrity: sha512-cuHoUgS/V3hLdjJOLTT691+G2QoqAjCVLmr4kJXR4ha56w1Zdu8UUQ5TxLRqudgNjwXeQxKMq4j+lyf9sWuslg==}
dev: false
/@types/d3-transition@3.0.9:
resolution: {integrity: sha512-uZS5shfxzO3rGlu0cC3bjmMFKsXv+SmZZcgp0KD22ts4uGXp5EVYGzu/0YdwZeKmddhcAccYtREJKkPfXkZuCg==}
/@types/d3-transition@3.0.8:
resolution: {integrity: sha512-ew63aJfQ/ms7QQ4X7pk5NxQ9fZH/z+i24ZfJ6tJSfqxJMrYLiK01EAs2/Rtw/JreGUsS3pLPNV644qXFGnoZNQ==}
dependencies:
'@types/d3-selection': 3.0.11
'@types/d3-selection': 3.0.10
dev: false
/@types/d3-zoom@3.0.8:
resolution: {integrity: sha512-iqMC4/YlFCSlO8+2Ii1GGGliCAY4XdeG748w5vQUbevlbDu0zSjH/+jojorQVBK/se0j6DUFNPBGSqD3YWYnDw==}
dependencies:
'@types/d3-interpolate': 3.0.4
'@types/d3-selection': 3.0.11
'@types/d3-selection': 3.0.10
dev: false
/@types/diff-match-patch@1.0.36:
@@ -3954,28 +3951,28 @@ packages:
resolution: {integrity: sha512-N8tkAACJx2ww8vFMneJmaAgmjAG1tnVBZJRLRcx061tmsLRZHSEZSLuGWnwPtunsSLvSqXQ2wfp7Mgqg1I+2dQ==}
dev: false
/@xyflow/react@12.5.3(@types/react@18.3.11)(react-dom@18.3.1)(react@18.3.1):
resolution: {integrity: sha512-saovy/aQRoW8qQoIqMFUtmC3F6oEV7n6+J1pVbhSG45NI/hOFvK0qozsIPKqX5Va6lGQnkl/o53NHLja3NiweQ==}
/@xyflow/react@12.4.2(@types/react@18.3.11)(react-dom@18.3.1)(react@18.3.1):
resolution: {integrity: sha512-AFJKVc/fCPtgSOnRst3xdYJwiEcUN9lDY7EO/YiRvFHYCJGgfzg+jpvZjkTOnBLGyrMJre9378pRxAc3fsR06A==}
peerDependencies:
react: '>=17'
react-dom: '>=17'
dependencies:
'@xyflow/system': 0.0.53
'@xyflow/system': 0.0.50
classcat: 5.0.5
react: 18.3.1
react-dom: 18.3.1(react@18.3.1)
zustand: 4.5.6(@types/react@18.3.11)(react@18.3.1)
zustand: 4.5.5(@types/react@18.3.11)(react@18.3.1)
transitivePeerDependencies:
- '@types/react'
- immer
dev: false
/@xyflow/system@0.0.53:
resolution: {integrity: sha512-QTWieiTtvNYyQAz1fxpzgtUGXNpnhfh6vvZa7dFWpWS2KOz6bEHODo/DTK3s07lDu0Bq0Db5lx/5M5mNjb9VDQ==}
/@xyflow/system@0.0.50:
resolution: {integrity: sha512-HVUZd4LlY88XAaldFh2nwVxDOcdIBxGpQ5txzwfJPf+CAjj2BfYug1fHs2p4yS7YO8H6A3EFJQovBE8YuHkAdg==}
dependencies:
'@types/d3-drag': 3.0.7
'@types/d3-selection': 3.0.11
'@types/d3-transition': 3.0.9
'@types/d3-selection': 3.0.10
'@types/d3-transition': 3.0.8
'@types/d3-zoom': 3.0.8
d3-drag: 3.0.0
d3-selection: 3.0.0
@@ -5754,11 +5751,6 @@ packages:
tslib: 2.7.0
dev: false
/filesize@10.1.6:
resolution: {integrity: sha512-sJslQKU2uM33qH5nqewAwVB2QgR6w1aMNsYUp3aN5rMRyXEwJGmZvaWzeJFNTOXWlHQyBFCWrdj3fV/fsTOX8w==}
engines: {node: '>= 10.4.0'}
dev: false
/fill-range@7.1.1:
resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==}
engines: {node: '>=8'}
@@ -8799,8 +8791,8 @@ packages:
resolution: {integrity: sha512-tLJxacIQUM82IR7JO1UUkKlYuUTmoY9HBJAmNWFzheSlDS5SPMcNIepejHJa4BpPQLAcbRhRf3GDJzyj6rbKvA==}
dev: false
/tsafe@1.8.5:
resolution: {integrity: sha512-LFWTWQrW6rwSY+IBNFl2ridGfUzVsPwrZ26T4KUJww/py8rzaQ/SY+MIz6YROozpUCaRcuISqagmlwub9YT9kw==}
/tsafe@1.7.5:
resolution: {integrity: sha512-tbNyyBSbwfbilFfiuXkSOj82a6++ovgANwcoqBAcO9/REPoZMEQoE8kWPeO0dy5A2D/2Lajr8Ohue5T0ifIvLQ==}
dev: true
/tsconfck@3.1.5(typescript@5.6.2):
@@ -9131,14 +9123,6 @@ packages:
react: 18.3.1
dev: false
/use-sync-external-store@1.5.0(react@18.3.1):
resolution: {integrity: sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==}
peerDependencies:
react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
dependencies:
react: 18.3.1
dev: false
/util-deprecate@1.0.2:
resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==}
dev: true
@@ -9583,8 +9567,8 @@ packages:
/zod@3.23.8:
resolution: {integrity: sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==}
/zustand@4.5.6(@types/react@18.3.11)(react@18.3.1):
resolution: {integrity: sha512-ibr/n1hBzLLj5Y+yUcU7dYw8p6WnIVzdJbnX+1YpaScvZVF2ziugqHs+LAmHw4lWO9c/zRj+K1ncgWDQuthEdQ==}
/zustand@4.5.5(@types/react@18.3.11)(react@18.3.1):
resolution: {integrity: sha512-+0PALYNJNgK6hldkgDq2vLrw5f6g/jCInz52n9RTpropGgeAf/ioFUCdtsjCqu4gNhW9D01rUQBROoRjdzyn2Q==}
engines: {node: '>=12.7.0'}
peerDependencies:
'@types/react': '>=16.8'
@@ -9600,5 +9584,5 @@ packages:
dependencies:
'@types/react': 18.3.11
react: 18.3.1
use-sync-external-store: 1.5.0(react@18.3.1)
use-sync-external-store: 1.2.2(react@18.3.1)
dev: false

View File

@@ -116,10 +116,7 @@
"combinatorial": "Kombinatorisch",
"saveChanges": "Änderungen speichern",
"error_withCount_one": "{{count}} Fehler",
"error_withCount_other": "{{count}} Fehler",
"value": "Wert",
"label": "Label",
"systemInformation": "Systeminformationen"
"error_withCount_other": "{{count}} Fehler"
},
"gallery": {
"galleryImageSize": "Bildgröße",
@@ -698,10 +695,7 @@
"guidance": "Führung",
"coherenceMode": "Modus",
"recallMetadata": "Metadaten abrufen",
"gaussianBlur": "Gaußsche Unschärfe",
"sendToUpscale": "An Hochskalieren senden",
"useCpuNoise": "CPU-Rauschen verwenden",
"sendToCanvas": "An Leinwand senden"
"gaussianBlur": "Gaußsche Unschärfe"
},
"settings": {
"displayInProgress": "Zwischenbilder anzeigen",
@@ -1334,8 +1328,7 @@
"loadWorkflowDesc2": "Ihr aktueller Arbeitsablauf enthält nicht gespeicherte Änderungen.",
"loadingTemplates": "Lade {{name}}",
"missingSourceOrTargetHandle": "Fehlender Quell- oder Zielgriff",
"missingSourceOrTargetNode": "Fehlender Quell- oder Zielknoten",
"showEdgeLabelsHelp": "Beschriftungen an Kanten anzeigen, um die verknüpften Knoten zu kennzeichnen"
"missingSourceOrTargetNode": "Fehlender Quell- oder Zielknoten"
},
"hrf": {
"enableHrf": "Korrektur für hohe Auflösungen",

Some files were not shown because too many files have changed in this diff Show More