Compare commits
2 Commits
v5.0.0
...
experiment
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2634f0e43a | ||
|
|
704151e8e3 |
1
.gitattributes
vendored
@@ -2,4 +2,3 @@
|
||||
# Only affects text files and ignores other file types.
|
||||
# For more info see: https://www.aleksandrhovhannisyan.com/blog/crlf-vs-lf-normalizing-line-endings-in-git/
|
||||
* text=auto
|
||||
docker/** text eol=lf
|
||||
40
.github/CODEOWNERS
vendored
@@ -1,32 +1,34 @@
|
||||
# continuous integration
|
||||
/.github/workflows/ @lstein @blessedcoolant @hipsterusername @ebr
|
||||
/.github/workflows/ @lstein @blessedcoolant
|
||||
|
||||
# documentation
|
||||
/docs/ @lstein @blessedcoolant @hipsterusername @Millu
|
||||
/mkdocs.yml @lstein @blessedcoolant @hipsterusername @Millu
|
||||
/docs/ @lstein @blessedcoolant @hipsterusername
|
||||
/mkdocs.yml @lstein @blessedcoolant
|
||||
|
||||
# nodes
|
||||
/invokeai/app/ @Kyle0654 @blessedcoolant @psychedelicious @brandonrising @hipsterusername
|
||||
/invokeai/app/ @Kyle0654 @blessedcoolant @psychedelicious @brandonrising
|
||||
|
||||
# installation and configuration
|
||||
/pyproject.toml @lstein @blessedcoolant @hipsterusername
|
||||
/docker/ @lstein @blessedcoolant @hipsterusername @ebr
|
||||
/scripts/ @ebr @lstein @hipsterusername
|
||||
/installer/ @lstein @ebr @hipsterusername
|
||||
/invokeai/assets @lstein @ebr @hipsterusername
|
||||
/invokeai/configs @lstein @hipsterusername
|
||||
/invokeai/version @lstein @blessedcoolant @hipsterusername
|
||||
/pyproject.toml @lstein @blessedcoolant
|
||||
/docker/ @lstein @blessedcoolant
|
||||
/scripts/ @ebr @lstein
|
||||
/installer/ @lstein @ebr
|
||||
/invokeai/assets @lstein @ebr
|
||||
/invokeai/configs @lstein
|
||||
/invokeai/version @lstein @blessedcoolant
|
||||
|
||||
# web ui
|
||||
/invokeai/frontend @blessedcoolant @psychedelicious @lstein @maryhipp @hipsterusername
|
||||
/invokeai/backend @blessedcoolant @psychedelicious @lstein @maryhipp @hipsterusername
|
||||
/invokeai/frontend @blessedcoolant @psychedelicious @lstein @maryhipp
|
||||
/invokeai/backend @blessedcoolant @psychedelicious @lstein @maryhipp
|
||||
|
||||
# generation, model management, postprocessing
|
||||
/invokeai/backend @damian0815 @lstein @blessedcoolant @gregghelt2 @StAlKeR7779 @brandonrising @ryanjdick @hipsterusername
|
||||
/invokeai/backend @damian0815 @lstein @blessedcoolant @gregghelt2 @StAlKeR7779 @brandonrising
|
||||
|
||||
# front ends
|
||||
/invokeai/frontend/CLI @lstein @hipsterusername
|
||||
/invokeai/frontend/install @lstein @ebr @hipsterusername
|
||||
/invokeai/frontend/merge @lstein @blessedcoolant @hipsterusername
|
||||
/invokeai/frontend/training @lstein @blessedcoolant @hipsterusername
|
||||
/invokeai/frontend/web @psychedelicious @blessedcoolant @maryhipp @hipsterusername
|
||||
/invokeai/frontend/CLI @lstein
|
||||
/invokeai/frontend/install @lstein @ebr
|
||||
/invokeai/frontend/merge @lstein @blessedcoolant
|
||||
/invokeai/frontend/training @lstein @blessedcoolant
|
||||
/invokeai/frontend/web @psychedelicious @blessedcoolant @maryhipp
|
||||
|
||||
|
||||
|
||||
98
.github/ISSUE_TEMPLATE/BUG_REPORT.yml
vendored
@@ -6,6 +6,10 @@ title: '[bug]: '
|
||||
|
||||
labels: ['bug']
|
||||
|
||||
# assignees:
|
||||
# - moderator_bot
|
||||
# - lstein
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
@@ -14,9 +18,10 @@ body:
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Is there an existing issue for this problem?
|
||||
label: Is there an existing issue for this?
|
||||
description: |
|
||||
Please [search](https://github.com/invoke-ai/InvokeAI/issues) first to see if an issue already exists for the problem.
|
||||
Please use the [search function](https://github.com/invoke-ai/InvokeAI/issues?q=is%3Aissue+is%3Aopen+label%3Abug)
|
||||
irst to see if an issue already exists for the bug you encountered.
|
||||
options:
|
||||
- label: I have searched the existing issues
|
||||
required: true
|
||||
@@ -28,119 +33,80 @@ body:
|
||||
- type: dropdown
|
||||
id: os_dropdown
|
||||
attributes:
|
||||
label: Operating system
|
||||
description: Your computer's operating system.
|
||||
label: OS
|
||||
description: Which operating System did you use when the bug occured
|
||||
multiple: false
|
||||
options:
|
||||
- 'Linux'
|
||||
- 'Windows'
|
||||
- 'macOS'
|
||||
- 'other'
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: gpu_dropdown
|
||||
attributes:
|
||||
label: GPU vendor
|
||||
description: Your GPU's vendor.
|
||||
label: GPU
|
||||
description: Which kind of Graphic-Adapter is your System using
|
||||
multiple: false
|
||||
options:
|
||||
- 'Nvidia (CUDA)'
|
||||
- 'AMD (ROCm)'
|
||||
- 'Apple Silicon (MPS)'
|
||||
- 'None (CPU)'
|
||||
- 'cuda'
|
||||
- 'amd'
|
||||
- 'mps'
|
||||
- 'cpu'
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: gpu_model
|
||||
attributes:
|
||||
label: GPU model
|
||||
description: Your GPU's model. If on Apple Silicon, this is your Mac's chip. Leave blank if on CPU.
|
||||
placeholder: ex. RTX 2080 Ti, Mac M1 Pro
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: input
|
||||
id: vram
|
||||
attributes:
|
||||
label: GPU VRAM
|
||||
description: Your GPU's VRAM. If on Apple Silicon, this is your Mac's unified memory. Leave blank if on CPU.
|
||||
label: VRAM
|
||||
description: Size of the VRAM if known
|
||||
placeholder: 8GB
|
||||
validations:
|
||||
required: false
|
||||
|
||||
|
||||
- type: input
|
||||
id: version-number
|
||||
attributes:
|
||||
label: Version number
|
||||
label: What version did you experience this issue on?
|
||||
description: |
|
||||
The version of Invoke you have installed. If it is not the latest version, please update and try again to confirm the issue still exists. If you are testing main, please include the commit hash instead.
|
||||
placeholder: ex. 3.6.1
|
||||
Please share the version of Invoke AI that you experienced the issue on. If this is not the latest version, please update first to confirm the issue still exists. If you are testing main, please include the commit hash instead.
|
||||
placeholder: X.X.X
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: browser-version
|
||||
attributes:
|
||||
label: Browser
|
||||
description: Your web browser and version.
|
||||
placeholder: ex. Firefox 123.0b3
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: python-deps
|
||||
attributes:
|
||||
label: Python dependencies
|
||||
description: |
|
||||
If the problem occurred during image generation, click the gear icon at the bottom left corner, click "About", click the copy button and then paste here.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened
|
||||
label: What happened?
|
||||
description: |
|
||||
Describe what happened. Include any relevant error messages, stack traces and screenshots here.
|
||||
placeholder: I clicked button X and then Y happened.
|
||||
Briefly describe what happened, what you expected to happen and how to reproduce this bug.
|
||||
placeholder: When using the webinterface and right-clicking on button X instead of the popup-menu there error Y appears
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: what-you-expected
|
||||
attributes:
|
||||
label: What you expected to happen
|
||||
description: Describe what you expected to happen.
|
||||
placeholder: I expected Z to happen.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: how-to-repro
|
||||
attributes:
|
||||
label: How to reproduce the problem
|
||||
description: List steps to reproduce the problem.
|
||||
placeholder: Start the app, generate an image with these settings, then click button X.
|
||||
label: Screenshots
|
||||
description: If applicable, add screenshots to help explain your problem
|
||||
placeholder: this is what the result looked like <screenshot>
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: additional-context
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Any other context that might help us to understand the problem.
|
||||
description: Add any other context about the problem here
|
||||
placeholder: Only happens when there is full moon and Friday the 13th on Christmas Eve 🎅🏻
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: input
|
||||
id: discord-username
|
||||
id: contact
|
||||
attributes:
|
||||
label: Discord username
|
||||
description: If you are on the Invoke discord and would prefer to be contacted there, please provide your username.
|
||||
placeholder: supercoolusername123
|
||||
label: Contact Details
|
||||
description: __OPTIONAL__ How can we get in touch with you if we need more info (besides this issue)?
|
||||
placeholder: ex. email@example.com, discordname, twitter, ...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
17
.github/ISSUE_TEMPLATE/FEATURE_REQUEST.yml
vendored
@@ -1,5 +1,5 @@
|
||||
name: Feature Request
|
||||
description: Contribute a idea or request a new feature
|
||||
description: Commit a idea or Request a new feature
|
||||
title: '[enhancement]: '
|
||||
labels: ['enhancement']
|
||||
# assignees:
|
||||
@@ -9,14 +9,14 @@ body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this feature request!
|
||||
Thanks for taking the time to fill out this Feature request!
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Is there an existing issue for this?
|
||||
description: |
|
||||
Please make use of the [search function](https://github.com/invoke-ai/InvokeAI/labels/enhancement)
|
||||
to see if a similar issue already exists for the feature you want to request
|
||||
to see if a simmilar issue already exists for the feature you want to request
|
||||
options:
|
||||
- label: I have searched the existing issues
|
||||
required: true
|
||||
@@ -34,9 +34,12 @@ body:
|
||||
id: whatisexpected
|
||||
attributes:
|
||||
label: What should this feature add?
|
||||
description: Explain the functionality this feature should add. Feature requests should be for single features. Please create multiple requests if you want to request multiple features.
|
||||
description: Please try to explain the functionality this feature should add
|
||||
placeholder: |
|
||||
I'd like a button that creates an image of banana sushi every time I press it. Each image should be different. There should be a toggle next to the button that enables strawberry mode, in which the images are of strawberry sushi instead.
|
||||
Instead of one huge textfield, it would be nice to have forms for bug-reports, feature-requests, ...
|
||||
Great benefits with automatic labeling, assigning and other functionalitys not available in that form
|
||||
via old-fashioned markdown-templates. I would also love to see the use of a moderator bot 🤖 like
|
||||
https://github.com/marketplace/actions/issue-moderator-with-commands to auto close old issues and other things
|
||||
validations:
|
||||
required: true
|
||||
|
||||
@@ -48,6 +51,6 @@ body:
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Additional Content
|
||||
label: Aditional Content
|
||||
description: Add any other context or screenshots about the feature request here.
|
||||
placeholder: This is a mockup of the design how I imagine it <screenshot>
|
||||
placeholder: This is a Mockup of the design how I imagine it <screenshot>
|
||||
|
||||
33
.github/actions/install-frontend-deps/action.yml
vendored
@@ -1,33 +0,0 @@
|
||||
name: install frontend dependencies
|
||||
description: Installs frontend dependencies with pnpm, with caching
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: setup node 18
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '18'
|
||||
|
||||
- name: setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 8.15.6
|
||||
run_install: false
|
||||
|
||||
- name: get pnpm store directory
|
||||
shell: bash
|
||||
run: |
|
||||
echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
|
||||
|
||||
- name: setup cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ${{ env.STORE_PATH }}
|
||||
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pnpm-store-
|
||||
|
||||
- name: install frontend dependencies
|
||||
run: pnpm install --prefer-frozen-lockfile
|
||||
shell: bash
|
||||
working-directory: invokeai/frontend/web
|
||||
59
.github/pr_labels.yml
vendored
@@ -1,59 +0,0 @@
|
||||
root:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '*'
|
||||
|
||||
python-deps:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'pyproject.toml'
|
||||
|
||||
python:
|
||||
- changed-files:
|
||||
- all-globs-to-any-file:
|
||||
- 'invokeai/**'
|
||||
- '!invokeai/frontend/web/**'
|
||||
|
||||
python-tests:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'tests/**'
|
||||
|
||||
ci-cd:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: .github/**
|
||||
|
||||
docker:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: docker/**
|
||||
|
||||
installer:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: installer/**
|
||||
|
||||
docs:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: docs/**
|
||||
|
||||
invocations:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'invokeai/app/invocations/**'
|
||||
|
||||
backend:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'invokeai/backend/**'
|
||||
|
||||
api:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'invokeai/app/api/**'
|
||||
|
||||
services:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'invokeai/app/services/**'
|
||||
|
||||
frontend-deps:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- '**/*/package.json'
|
||||
- '**/*/pnpm-lock.yaml'
|
||||
|
||||
frontend:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'invokeai/frontend/web/**'
|
||||
21
.github/pull_request_template.md
vendored
@@ -1,21 +0,0 @@
|
||||
## Summary
|
||||
|
||||
<!--A description of the changes in this PR. Include the kind of change (fix, feature, docs, etc), the "why" and the "how". Screenshots or videos are useful for frontend changes.-->
|
||||
|
||||
## Related Issues / Discussions
|
||||
|
||||
<!--WHEN APPLICABLE: List any related issues or discussions on github or discord. If this PR closes an issue, please use the "Closes #1234" format, so that the issue will be automatically closed when the PR merges.-->
|
||||
|
||||
## QA Instructions
|
||||
|
||||
<!--WHEN APPLICABLE: Describe how you have tested the changes in this PR. Provide enough detail that a reviewer can reproduce your tests.-->
|
||||
|
||||
## Merge Plan
|
||||
|
||||
<!--WHEN APPLICABLE: Large PRs, or PRs that touch sensitive things like DB schemas, may need some care when merging. For example, a careful rebase by the change author, timing to not interfere with a pending release, or a message to contributors on discord after merging.-->
|
||||
|
||||
## Checklist
|
||||
|
||||
- [ ] _The PR has a short but descriptive title, suitable for a changelog_
|
||||
- [ ] _Tests added / updated (if applicable)_
|
||||
- [ ] _Documentation added / updated (if applicable)_
|
||||
44
.github/workflows/build-container.yml
vendored
@@ -11,14 +11,8 @@ on:
|
||||
- 'docker/docker-entrypoint.sh'
|
||||
- 'workflows/build-container.yml'
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
- 'v*'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
push-to-registry:
|
||||
description: Push the built image to the container registry
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -46,25 +40,22 @@ jobs:
|
||||
- name: Free up more disk space on the runner
|
||||
# https://github.com/actions/runner-images/issues/2840#issuecomment-1284059930
|
||||
run: |
|
||||
echo "----- Free space before cleanup"
|
||||
df -h
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
|
||||
sudo swapoff /mnt/swapfile
|
||||
sudo rm -rf /mnt/swapfile
|
||||
echo "----- Free space after cleanup"
|
||||
df -h
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
images: |
|
||||
ghcr.io/${{ github.repository }}
|
||||
${{ env.DOCKERHUB_REPOSITORY }}
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=ref,event=tag
|
||||
@@ -77,33 +68,48 @@ jobs:
|
||||
suffix=-${{ matrix.gpu-driver }},onlatest=false
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@v2
|
||||
with:
|
||||
platforms: ${{ env.PLATFORMS }}
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# - name: Login to Docker Hub
|
||||
# if: github.event_name != 'pull_request' && vars.DOCKERHUB_REPOSITORY != ''
|
||||
# uses: docker/login-action@v2
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Build container
|
||||
timeout-minutes: 40
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: docker/Dockerfile
|
||||
platforms: ${{ env.PLATFORMS }}
|
||||
push: ${{ github.ref == 'refs/heads/main' || github.ref_type == 'tag' || github.event.inputs.push-to-registry }}
|
||||
push: ${{ github.ref == 'refs/heads/main' || github.ref_type == 'tag' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: |
|
||||
type=gha,scope=${{ github.ref_name }}-${{ matrix.gpu-driver }}
|
||||
type=gha,scope=main-${{ matrix.gpu-driver }}
|
||||
cache-to: type=gha,mode=max,scope=${{ github.ref_name }}-${{ matrix.gpu-driver }}
|
||||
|
||||
# - name: Docker Hub Description
|
||||
# if: github.ref == 'refs/heads/main' || github.ref == 'refs/tags/*' && vars.DOCKERHUB_REPOSITORY != ''
|
||||
# uses: peter-evans/dockerhub-description@v3
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
# repository: ${{ vars.DOCKERHUB_REPOSITORY }}
|
||||
# short-description: ${{ github.event.repository.description }}
|
||||
|
||||
45
.github/workflows/build-installer.yml
vendored
@@ -1,45 +0,0 @@
|
||||
# Builds and uploads the installer and python build artifacts.
|
||||
|
||||
name: build installer
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
workflow_call:
|
||||
|
||||
jobs:
|
||||
build-installer:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5 # expected run time: <2 min
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: pip
|
||||
cache-dependency-path: pyproject.toml
|
||||
|
||||
- name: install pypa/build
|
||||
run: pip install --upgrade build
|
||||
|
||||
- name: setup frontend
|
||||
uses: ./.github/actions/install-frontend-deps
|
||||
|
||||
- name: create installer
|
||||
id: create_installer
|
||||
run: ./create_installer.sh
|
||||
working-directory: installer
|
||||
|
||||
- name: upload python distribution artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ steps.create_installer.outputs.DIST_PATH }}
|
||||
|
||||
- name: upload installer artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: installer
|
||||
path: ${{ steps.create_installer.outputs.INSTALLER_PATH }}
|
||||
80
.github/workflows/frontend-checks.yml
vendored
@@ -1,80 +0,0 @@
|
||||
# Runs frontend code quality checks.
|
||||
#
|
||||
# Checks for changes to frontend files before running the checks.
|
||||
# If always_run is true, always runs the checks.
|
||||
|
||||
name: 'frontend checks'
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
pull_request:
|
||||
types:
|
||||
- 'ready_for_review'
|
||||
- 'opened'
|
||||
- 'synchronize'
|
||||
merge_group:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
always_run:
|
||||
description: 'Always run the checks'
|
||||
required: true
|
||||
type: boolean
|
||||
default: true
|
||||
workflow_call:
|
||||
inputs:
|
||||
always_run:
|
||||
description: 'Always run the checks'
|
||||
required: true
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: invokeai/frontend/web
|
||||
|
||||
jobs:
|
||||
frontend-checks:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10 # expected run time: <2 min
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: check for changed frontend files
|
||||
if: ${{ inputs.always_run != true }}
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v42
|
||||
with:
|
||||
files_yaml: |
|
||||
frontend:
|
||||
- 'invokeai/frontend/web/**'
|
||||
|
||||
- name: install dependencies
|
||||
if: ${{ steps.changed-files.outputs.frontend_any_changed == 'true' || inputs.always_run == true }}
|
||||
uses: ./.github/actions/install-frontend-deps
|
||||
|
||||
- name: tsc
|
||||
if: ${{ steps.changed-files.outputs.frontend_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: 'pnpm lint:tsc'
|
||||
shell: bash
|
||||
|
||||
- name: dpdm
|
||||
if: ${{ steps.changed-files.outputs.frontend_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: 'pnpm lint:dpdm'
|
||||
shell: bash
|
||||
|
||||
- name: eslint
|
||||
if: ${{ steps.changed-files.outputs.frontend_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: 'pnpm lint:eslint'
|
||||
shell: bash
|
||||
|
||||
- name: prettier
|
||||
if: ${{ steps.changed-files.outputs.frontend_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: 'pnpm lint:prettier'
|
||||
shell: bash
|
||||
|
||||
- name: knip
|
||||
if: ${{ steps.changed-files.outputs.frontend_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: 'pnpm lint:knip'
|
||||
shell: bash
|
||||
60
.github/workflows/frontend-tests.yml
vendored
@@ -1,60 +0,0 @@
|
||||
# Runs frontend tests.
|
||||
#
|
||||
# Checks for changes to frontend files before running the tests.
|
||||
# If always_run is true, always runs the tests.
|
||||
|
||||
name: 'frontend tests'
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
pull_request:
|
||||
types:
|
||||
- 'ready_for_review'
|
||||
- 'opened'
|
||||
- 'synchronize'
|
||||
merge_group:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
always_run:
|
||||
description: 'Always run the tests'
|
||||
required: true
|
||||
type: boolean
|
||||
default: true
|
||||
workflow_call:
|
||||
inputs:
|
||||
always_run:
|
||||
description: 'Always run the tests'
|
||||
required: true
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: invokeai/frontend/web
|
||||
|
||||
jobs:
|
||||
frontend-tests:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10 # expected run time: <2 min
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: check for changed frontend files
|
||||
if: ${{ inputs.always_run != true }}
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v42
|
||||
with:
|
||||
files_yaml: |
|
||||
frontend:
|
||||
- 'invokeai/frontend/web/**'
|
||||
|
||||
- name: install dependencies
|
||||
if: ${{ steps.changed-files.outputs.frontend_any_changed == 'true' || inputs.always_run == true }}
|
||||
uses: ./.github/actions/install-frontend-deps
|
||||
|
||||
- name: vitest
|
||||
if: ${{ steps.changed-files.outputs.frontend_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: 'pnpm test:no-watch'
|
||||
shell: bash
|
||||
18
.github/workflows/label-pr.yml
vendored
@@ -1,18 +0,0 @@
|
||||
name: 'label PRs'
|
||||
on:
|
||||
- pull_request_target
|
||||
|
||||
jobs:
|
||||
labeler:
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: label PRs
|
||||
uses: actions/labeler@v5
|
||||
with:
|
||||
configuration-path: .github/pr_labels.yml
|
||||
33
.github/workflows/lint-frontend.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
name: Lint frontend
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- 'ready_for_review'
|
||||
- 'opened'
|
||||
- 'synchronize'
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
merge_group:
|
||||
workflow_dispatch:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: invokeai/frontend/web
|
||||
|
||||
jobs:
|
||||
lint-frontend:
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Setup Node 18
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '18'
|
||||
- uses: actions/checkout@v3
|
||||
- run: 'yarn install --frozen-lockfile'
|
||||
- run: 'yarn run lint:tsc'
|
||||
- run: 'yarn run lint:madge'
|
||||
- run: 'yarn run lint:eslint'
|
||||
- run: 'yarn run lint:prettier'
|
||||
54
.github/workflows/mkdocs-material.yml
vendored
@@ -1,49 +1,51 @@
|
||||
# This is a mostly a copy-paste from https://github.com/squidfunk/mkdocs-material/blob/master/docs/publishing-your-site.md
|
||||
|
||||
name: mkdocs
|
||||
|
||||
name: mkdocs-material
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
- 'refs/heads/main'
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
mkdocs-material:
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
REPO_URL: '${{ github.server_url }}/${{ github.repository }}'
|
||||
REPO_NAME: '${{ github.repository }}'
|
||||
SITE_URL: 'https://${{ github.repository_owner }}.github.io/InvokeAI'
|
||||
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: checkout sources
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: pip
|
||||
cache-dependency-path: pyproject.toml
|
||||
|
||||
- name: set cache id
|
||||
run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
|
||||
- name: install requirements
|
||||
env:
|
||||
PIP_USE_PEP517: 1
|
||||
run: |
|
||||
python -m \
|
||||
pip install ".[docs]"
|
||||
|
||||
- name: use cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
key: mkdocs-material-${{ env.cache_id }}
|
||||
path: .cache
|
||||
restore-keys: |
|
||||
mkdocs-material-
|
||||
- name: confirm buildability
|
||||
run: |
|
||||
python -m \
|
||||
mkdocs build \
|
||||
--clean \
|
||||
--verbose
|
||||
|
||||
- name: install dependencies
|
||||
run: python -m pip install ".[docs]"
|
||||
|
||||
- name: build & deploy
|
||||
run: mkdocs gh-deploy --force
|
||||
- name: deploy to gh-pages
|
||||
if: ${{ github.ref == 'refs/heads/main' }}
|
||||
run: |
|
||||
python -m \
|
||||
mkdocs gh-deploy \
|
||||
--clean \
|
||||
--force
|
||||
|
||||
20
.github/workflows/pyflakes.yml
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- development
|
||||
- 'release-candidate-*'
|
||||
|
||||
jobs:
|
||||
pyflakes:
|
||||
name: runner / pyflakes
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: pyflakes
|
||||
uses: reviewdog/action-pyflakes@v1
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
reporter: github-pr-review
|
||||
41
.github/workflows/pypi-release.yml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: PyPI Release
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'invokeai/version/invokeai_version.py'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
release:
|
||||
if: github.repository == 'invoke-ai/InvokeAI'
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
TWINE_NON_INTERACTIVE: 1
|
||||
steps:
|
||||
- name: checkout sources
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install deps
|
||||
run: pip install --upgrade build twine
|
||||
|
||||
- name: build package
|
||||
run: python3 -m build
|
||||
|
||||
- name: check distribution
|
||||
run: twine check dist/*
|
||||
|
||||
- name: check PyPI versions
|
||||
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/v2.3'
|
||||
run: |
|
||||
pip install --upgrade requests
|
||||
python -c "\
|
||||
import scripts.pypi_helper; \
|
||||
EXISTS=scripts.pypi_helper.local_on_pypi(); \
|
||||
print(f'PACKAGE_EXISTS={EXISTS}')" >> $GITHUB_ENV
|
||||
|
||||
- name: upload package
|
||||
if: env.PACKAGE_EXISTS == 'False' && env.TWINE_PASSWORD != ''
|
||||
run: twine upload dist/*
|
||||
76
.github/workflows/python-checks.yml
vendored
@@ -1,76 +0,0 @@
|
||||
# Runs python code quality checks.
|
||||
#
|
||||
# Checks for changes to python files before running the checks.
|
||||
# If always_run is true, always runs the checks.
|
||||
#
|
||||
# TODO: Add mypy or pyright to the checks.
|
||||
|
||||
name: 'python checks'
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
pull_request:
|
||||
types:
|
||||
- 'ready_for_review'
|
||||
- 'opened'
|
||||
- 'synchronize'
|
||||
merge_group:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
always_run:
|
||||
description: 'Always run the checks'
|
||||
required: true
|
||||
type: boolean
|
||||
default: true
|
||||
workflow_call:
|
||||
inputs:
|
||||
always_run:
|
||||
description: 'Always run the checks'
|
||||
required: true
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
jobs:
|
||||
python-checks:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5 # expected run time: <1 min
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: check for changed python files
|
||||
if: ${{ inputs.always_run != true }}
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v42
|
||||
with:
|
||||
files_yaml: |
|
||||
python:
|
||||
- 'pyproject.toml'
|
||||
- 'invokeai/**'
|
||||
- '!invokeai/frontend/web/**'
|
||||
- 'tests/**'
|
||||
|
||||
- name: setup python
|
||||
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: pip
|
||||
cache-dependency-path: pyproject.toml
|
||||
|
||||
- name: install ruff
|
||||
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: pip install ruff==0.6.0
|
||||
shell: bash
|
||||
|
||||
- name: ruff check
|
||||
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: ruff check --output-format=github .
|
||||
shell: bash
|
||||
|
||||
- name: ruff format
|
||||
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: ruff format --check .
|
||||
shell: bash
|
||||
106
.github/workflows/python-tests.yml
vendored
@@ -1,106 +0,0 @@
|
||||
# Runs python tests on a matrix of python versions and platforms.
|
||||
#
|
||||
# Checks for changes to python files before running the tests.
|
||||
# If always_run is true, always runs the tests.
|
||||
|
||||
name: 'python tests'
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
pull_request:
|
||||
types:
|
||||
- 'ready_for_review'
|
||||
- 'opened'
|
||||
- 'synchronize'
|
||||
merge_group:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
always_run:
|
||||
description: 'Always run the tests'
|
||||
required: true
|
||||
type: boolean
|
||||
default: true
|
||||
workflow_call:
|
||||
inputs:
|
||||
always_run:
|
||||
description: 'Always run the tests'
|
||||
required: true
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
matrix:
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- '3.10'
|
||||
- '3.11'
|
||||
platform:
|
||||
- linux-cuda-11_7
|
||||
- linux-rocm-5_2
|
||||
- linux-cpu
|
||||
- macos-default
|
||||
- windows-cpu
|
||||
include:
|
||||
- platform: linux-cuda-11_7
|
||||
os: ubuntu-22.04
|
||||
github-env: $GITHUB_ENV
|
||||
- platform: linux-rocm-5_2
|
||||
os: ubuntu-22.04
|
||||
extra-index-url: 'https://download.pytorch.org/whl/rocm5.2'
|
||||
github-env: $GITHUB_ENV
|
||||
- platform: linux-cpu
|
||||
os: ubuntu-22.04
|
||||
extra-index-url: 'https://download.pytorch.org/whl/cpu'
|
||||
github-env: $GITHUB_ENV
|
||||
- platform: macos-default
|
||||
os: macOS-14
|
||||
github-env: $GITHUB_ENV
|
||||
- platform: windows-cpu
|
||||
os: windows-2022
|
||||
github-env: $env:GITHUB_ENV
|
||||
name: 'py${{ matrix.python-version }}: ${{ matrix.platform }}'
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 15 # expected run time: 2-6 min, depending on platform
|
||||
env:
|
||||
PIP_USE_PEP517: '1'
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: check for changed python files
|
||||
if: ${{ inputs.always_run != true }}
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v42
|
||||
with:
|
||||
files_yaml: |
|
||||
python:
|
||||
- 'pyproject.toml'
|
||||
- 'invokeai/**'
|
||||
- '!invokeai/frontend/web/**'
|
||||
- 'tests/**'
|
||||
|
||||
- name: setup python
|
||||
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: pip
|
||||
cache-dependency-path: pyproject.toml
|
||||
|
||||
- name: install dependencies
|
||||
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: ${{ matrix.extra-index-url }}
|
||||
run: >
|
||||
pip3 install --editable=".[test]"
|
||||
|
||||
- name: run pytest
|
||||
if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
|
||||
run: pytest
|
||||
108
.github/workflows/release.yml
vendored
@@ -1,108 +0,0 @@
|
||||
# Main release workflow. Triggered on tag push or manual trigger.
|
||||
#
|
||||
# - Runs all code checks and tests
|
||||
# - Verifies the app version matches the tag version.
|
||||
# - Builds the installer and build, uploading them as artifacts.
|
||||
# - Publishes to TestPyPI and PyPI. Both are conditional on the previous steps passing and require a manual approval.
|
||||
#
|
||||
# See docs/RELEASE.md for more information on the release process.
|
||||
|
||||
name: release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
check-version:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: check python version
|
||||
uses: samuelcolvin/check-python-version@v4
|
||||
id: check-python-version
|
||||
with:
|
||||
version_file_path: invokeai/version/invokeai_version.py
|
||||
|
||||
frontend-checks:
|
||||
uses: ./.github/workflows/frontend-checks.yml
|
||||
with:
|
||||
always_run: true
|
||||
|
||||
frontend-tests:
|
||||
uses: ./.github/workflows/frontend-tests.yml
|
||||
with:
|
||||
always_run: true
|
||||
|
||||
python-checks:
|
||||
uses: ./.github/workflows/python-checks.yml
|
||||
with:
|
||||
always_run: true
|
||||
|
||||
python-tests:
|
||||
uses: ./.github/workflows/python-tests.yml
|
||||
with:
|
||||
always_run: true
|
||||
|
||||
build:
|
||||
uses: ./.github/workflows/build-installer.yml
|
||||
|
||||
publish-testpypi:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5 # expected run time: <1 min
|
||||
needs:
|
||||
[
|
||||
check-version,
|
||||
frontend-checks,
|
||||
frontend-tests,
|
||||
python-checks,
|
||||
python-tests,
|
||||
build,
|
||||
]
|
||||
environment:
|
||||
name: testpypi
|
||||
url: https://test.pypi.org/p/invokeai
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- name: download distribution from build job
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: dist/
|
||||
|
||||
- name: publish distribution to TestPyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
|
||||
publish-pypi:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5 # expected run time: <1 min
|
||||
needs:
|
||||
[
|
||||
check-version,
|
||||
frontend-checks,
|
||||
frontend-tests,
|
||||
python-checks,
|
||||
python-tests,
|
||||
build,
|
||||
]
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/invokeai
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- name: download distribution from build job
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: dist/
|
||||
|
||||
- name: publish distribution to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
27
.github/workflows/style-checks.yml
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
name: Black # TODO: add isort and flake8 later
|
||||
|
||||
on:
|
||||
pull_request: {}
|
||||
push:
|
||||
branches: master
|
||||
tags: "*"
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Install dependencies with pip
|
||||
run: |
|
||||
pip install --upgrade pip wheel
|
||||
pip install .[test]
|
||||
|
||||
# - run: isort --check-only .
|
||||
- run: black --check .
|
||||
# - run: flake8
|
||||
50
.github/workflows/test-invoke-pip-skip.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
name: Test invoke.py pip
|
||||
|
||||
# This is a dummy stand-in for the actual tests
|
||||
# we don't need to run python tests on non-Python changes
|
||||
# But PRs require passing tests to be mergeable
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- '**'
|
||||
- '!pyproject.toml'
|
||||
- '!invokeai/**'
|
||||
- '!tests/**'
|
||||
- 'invokeai/frontend/web/**'
|
||||
merge_group:
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
matrix:
|
||||
if: github.event.pull_request.draft == false
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- '3.10'
|
||||
pytorch:
|
||||
- linux-cuda-11_7
|
||||
- linux-rocm-5_2
|
||||
- linux-cpu
|
||||
- macos-default
|
||||
- windows-cpu
|
||||
include:
|
||||
- pytorch: linux-cuda-11_7
|
||||
os: ubuntu-22.04
|
||||
- pytorch: linux-rocm-5_2
|
||||
os: ubuntu-22.04
|
||||
- pytorch: linux-cpu
|
||||
os: ubuntu-22.04
|
||||
- pytorch: macos-default
|
||||
os: macOS-12
|
||||
- pytorch: windows-cpu
|
||||
os: windows-2022
|
||||
name: ${{ matrix.pytorch }} on ${{ matrix.python-version }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- name: skip
|
||||
run: echo "no build required"
|
||||
123
.github/workflows/test-invoke-pip.yml
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
name: Test invoke.py pip
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
- 'invokeai/**'
|
||||
- '!invokeai/frontend/web/**'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
- 'invokeai/**'
|
||||
- 'tests/**'
|
||||
- '!invokeai/frontend/web/**'
|
||||
types:
|
||||
- 'ready_for_review'
|
||||
- 'opened'
|
||||
- 'synchronize'
|
||||
merge_group:
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
matrix:
|
||||
if: github.event.pull_request.draft == false
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
# - '3.9'
|
||||
- '3.10'
|
||||
pytorch:
|
||||
- linux-cuda-11_7
|
||||
- linux-rocm-5_2
|
||||
- linux-cpu
|
||||
- macos-default
|
||||
- windows-cpu
|
||||
include:
|
||||
- pytorch: linux-cuda-11_7
|
||||
os: ubuntu-22.04
|
||||
github-env: $GITHUB_ENV
|
||||
- pytorch: linux-rocm-5_2
|
||||
os: ubuntu-22.04
|
||||
extra-index-url: 'https://download.pytorch.org/whl/rocm5.2'
|
||||
github-env: $GITHUB_ENV
|
||||
- pytorch: linux-cpu
|
||||
os: ubuntu-22.04
|
||||
extra-index-url: 'https://download.pytorch.org/whl/cpu'
|
||||
github-env: $GITHUB_ENV
|
||||
- pytorch: macos-default
|
||||
os: macOS-12
|
||||
github-env: $GITHUB_ENV
|
||||
- pytorch: windows-cpu
|
||||
os: windows-2022
|
||||
github-env: $env:GITHUB_ENV
|
||||
name: ${{ matrix.pytorch }} on ${{ matrix.python-version }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
PIP_USE_PEP517: '1'
|
||||
steps:
|
||||
- name: Checkout sources
|
||||
id: checkout-sources
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: set test prompt to main branch validation
|
||||
run: echo "TEST_PROMPTS=tests/validate_pr_prompt.txt" >> ${{ matrix.github-env }}
|
||||
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: pip
|
||||
cache-dependency-path: pyproject.toml
|
||||
|
||||
- name: install invokeai
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: ${{ matrix.extra-index-url }}
|
||||
run: >
|
||||
pip3 install
|
||||
--editable=".[test]"
|
||||
|
||||
- name: run pytest
|
||||
id: run-pytest
|
||||
run: pytest
|
||||
|
||||
# - name: run invokeai-configure
|
||||
# env:
|
||||
# HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
|
||||
# run: >
|
||||
# invokeai-configure
|
||||
# --yes
|
||||
# --default_only
|
||||
# --full-precision
|
||||
# # can't use fp16 weights without a GPU
|
||||
|
||||
# - name: run invokeai
|
||||
# id: run-invokeai
|
||||
# env:
|
||||
# # Set offline mode to make sure configure preloaded successfully.
|
||||
# HF_HUB_OFFLINE: 1
|
||||
# HF_DATASETS_OFFLINE: 1
|
||||
# TRANSFORMERS_OFFLINE: 1
|
||||
# INVOKEAI_OUTDIR: ${{ github.workspace }}/results
|
||||
# run: >
|
||||
# invokeai
|
||||
# --no-patchmatch
|
||||
# --no-nsfw_checker
|
||||
# --precision=float32
|
||||
# --always_use_cpu
|
||||
# --use_memory_db
|
||||
# --outdir ${{ env.INVOKEAI_OUTDIR }}/${{ matrix.python-version }}/${{ matrix.pytorch }}
|
||||
# --from_file ${{ env.TEST_PROMPTS }}
|
||||
|
||||
# - name: Archive results
|
||||
# env:
|
||||
# INVOKEAI_OUTDIR: ${{ github.workspace }}/results
|
||||
# uses: actions/upload-artifact@v3
|
||||
# with:
|
||||
# name: results
|
||||
# path: ${{ env.INVOKEAI_OUTDIR }}
|
||||
52
.gitignore
vendored
@@ -1,4 +1,22 @@
|
||||
# ignore default image save location and model symbolic link
|
||||
.idea/
|
||||
embeddings/
|
||||
outputs/
|
||||
models/ldm/stable-diffusion-v1/model.ckpt
|
||||
**/restoration/codeformer/weights
|
||||
|
||||
# ignore user models config
|
||||
configs/models.user.yaml
|
||||
config/models.user.yml
|
||||
invokeai.init
|
||||
.version
|
||||
.last_model
|
||||
|
||||
# ignore the Anaconda/Miniconda installer used while building Docker image
|
||||
anaconda.sh
|
||||
|
||||
# ignore a directory which serves as a place for initial images
|
||||
inputs/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
@@ -16,7 +34,7 @@ __pycache__/
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
# dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
@@ -133,10 +151,12 @@ celerybeat.pid
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv*
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
@@ -169,17 +189,44 @@ cython_debug/
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
src
|
||||
**/__pycache__/
|
||||
outputs
|
||||
|
||||
# Logs and associated folders
|
||||
# created from generated embeddings.
|
||||
logs
|
||||
testtube
|
||||
checkpoints
|
||||
# If it's a Mac
|
||||
.DS_Store
|
||||
|
||||
invokeai/frontend/yarn.lock
|
||||
invokeai/frontend/node_modules
|
||||
|
||||
# Let the frontend manage its own gitignore
|
||||
!invokeai/frontend/web/*
|
||||
|
||||
# Scratch folder
|
||||
.scratch/
|
||||
.vscode/
|
||||
gfpgan/
|
||||
models/ldm/stable-diffusion-v1/*.sha256
|
||||
|
||||
|
||||
# GFPGAN model files
|
||||
gfpgan/
|
||||
|
||||
# config file (will be created by installer)
|
||||
configs/models.yaml
|
||||
|
||||
# ignore initfile
|
||||
.invokeai
|
||||
|
||||
# ignore environment.yml and requirements.txt
|
||||
# these are links to the real files in environments-and-requirements
|
||||
environment.yml
|
||||
requirements.txt
|
||||
|
||||
# source installer files
|
||||
installer/*zip
|
||||
@@ -187,4 +234,3 @@ installer/install.bat
|
||||
installer/install.sh
|
||||
installer/update.bat
|
||||
installer/update.sh
|
||||
installer/InvokeAI-Installer/
|
||||
|
||||
@@ -8,17 +8,3 @@ repos:
|
||||
language: system
|
||||
entry: black
|
||||
types: [python]
|
||||
|
||||
- id: flake8
|
||||
name: flake8
|
||||
stages: [commit]
|
||||
language: system
|
||||
entry: flake8
|
||||
types: [python]
|
||||
|
||||
- id: isort
|
||||
name: isort
|
||||
stages: [commit]
|
||||
language: system
|
||||
entry: isort
|
||||
types: [python]
|
||||
@@ -7,7 +7,7 @@ embeddedLanguageFormatting: auto
|
||||
overrides:
|
||||
- files: '*.md'
|
||||
options:
|
||||
proseWrap: preserve
|
||||
proseWrap: always
|
||||
printWidth: 80
|
||||
parser: markdown
|
||||
cursorOffset: -1
|
||||
|
||||
82
Makefile
@@ -1,82 +0,0 @@
|
||||
# simple Makefile with scripts that are otherwise hard to remember
|
||||
# to use, run from the repo root `make <command>`
|
||||
|
||||
default: help
|
||||
|
||||
help:
|
||||
@echo Developer commands:
|
||||
@echo
|
||||
@echo "ruff Run ruff, fixing any safely-fixable errors and formatting"
|
||||
@echo "ruff-unsafe Run ruff, fixing all fixable errors and formatting"
|
||||
@echo "mypy Run mypy using the config in pyproject.toml to identify type mismatches and other coding errors"
|
||||
@echo "mypy-all Run mypy ignoring the config in pyproject.tom but still ignoring missing imports"
|
||||
@echo "test Run the unit tests."
|
||||
@echo "update-config-docstring Update the app's config docstring so mkdocs can autogenerate it correctly."
|
||||
@echo "frontend-install Install the pnpm modules needed for the front end"
|
||||
@echo "frontend-build Build the frontend in order to run on localhost:9090"
|
||||
@echo "frontend-dev Run the frontend in developer mode on localhost:5173"
|
||||
@echo "frontend-typegen Generate types for the frontend from the OpenAPI schema"
|
||||
@echo "installer-zip Build the installer .zip file for the current version"
|
||||
@echo "tag-release Tag the GitHub repository with the current version (use at release time only!)"
|
||||
@echo "openapi Generate the OpenAPI schema for the app, outputting to stdout"
|
||||
@echo "docs Serve the mkdocs site with live reload"
|
||||
|
||||
# Runs ruff, fixing any safely-fixable errors and formatting
|
||||
ruff:
|
||||
ruff check . --fix
|
||||
ruff format .
|
||||
|
||||
# Runs ruff, fixing all errors it can fix and formatting
|
||||
ruff-unsafe:
|
||||
ruff check . --fix --unsafe-fixes
|
||||
ruff format .
|
||||
|
||||
# Runs mypy, using the config in pyproject.toml
|
||||
mypy:
|
||||
mypy scripts/invokeai-web.py
|
||||
|
||||
# Runs mypy, ignoring the config in pyproject.toml but still ignoring missing (untyped) imports
|
||||
# (many files are ignored by the config, so this is useful for checking all files)
|
||||
mypy-all:
|
||||
mypy scripts/invokeai-web.py --config-file= --ignore-missing-imports
|
||||
|
||||
# Run the unit tests
|
||||
test:
|
||||
pytest ./tests
|
||||
|
||||
# Update config docstring
|
||||
update-config-docstring:
|
||||
python scripts/update_config_docstring.py
|
||||
|
||||
# Install the pnpm modules needed for the front end
|
||||
frontend-install:
|
||||
rm -rf invokeai/frontend/web/node_modules
|
||||
cd invokeai/frontend/web && pnpm install
|
||||
|
||||
# Build the frontend
|
||||
frontend-build:
|
||||
cd invokeai/frontend/web && pnpm build
|
||||
|
||||
# Run the frontend in dev mode
|
||||
frontend-dev:
|
||||
cd invokeai/frontend/web && pnpm dev
|
||||
|
||||
frontend-typegen:
|
||||
cd invokeai/frontend/web && python ../../../scripts/generate_openapi_schema.py | pnpm typegen
|
||||
|
||||
# Installer zip file
|
||||
installer-zip:
|
||||
cd installer && ./create_installer.sh
|
||||
|
||||
# Tag the release
|
||||
tag-release:
|
||||
cd installer && ./tag_release.sh
|
||||
|
||||
# Generate the OpenAPI Schema for the app
|
||||
openapi:
|
||||
python scripts/generate_openapi_schema.py
|
||||
|
||||
# Serve the mkdocs site w/ live reload
|
||||
.PHONY: docs
|
||||
docs:
|
||||
mkdocs serve
|
||||
525
README.md
@@ -1,142 +1,22 @@
|
||||
<div align="center">
|
||||
|
||||

|
||||

|
||||
|
||||
# Invoke - Professional Creative AI Tools for Visual Media
|
||||
|
||||
#### To learn more about Invoke, or implement our Business solutions, visit [invoke.com]
|
||||
|
||||
[![discord badge]][discord link] [![latest release badge]][latest release link] [![github stars badge]][github stars link] [![github forks badge]][github forks link] [![CI checks on main badge]][CI checks on main link] [![latest commit to main badge]][latest commit to main link] [![github open issues badge]][github open issues link] [![github open prs badge]][github open prs link] [![translation status badge]][translation status link]
|
||||
|
||||
</div>
|
||||
|
||||
Invoke is a leading creative engine built to empower professionals and enthusiasts alike. Generate and create stunning visual media using the latest AI-driven technologies. Invoke offers an industry leading web-based UI, and serves as the foundation for multiple commercial products.
|
||||
|
||||
Invoke is available in two editions:
|
||||
|
||||
| **Community Edition** | **Professional Edition** |
|
||||
|----------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------|
|
||||
| **For users looking for a locally installed, self-hosted and self-managed service** | **For users or teams looking for a cloud-hosted, fully managed service** |
|
||||
| - Free to use under a commercially-friendly license | - Monthly subscription fee with three different plan levels |
|
||||
| - Download and install on compatible hardware | - Offers additional benefits, including multi-user support, improved model training, and more |
|
||||
| - Includes all core studio features: generate, refine, iterate on images, and build workflows | - Hosted in the cloud for easy, secure model access and scalability |
|
||||
| Quick Start -> [Installation and Updates][installation docs] | More Information -> [www.invoke.com/pricing](https://www.invoke.com/pricing) |
|
||||
# Invoke AI - Generative AI for Professional Creatives
|
||||
## Professional Creative Tools for Stable Diffusion, Custom-Trained Models, and more.
|
||||
To learn more about Invoke AI, get started instantly, or implement our Business solutions, visit [invoke.ai](https://invoke.ai)
|
||||
|
||||
|
||||

|
||||
[![discord badge]][discord link]
|
||||
|
||||
# Documentation
|
||||
| **Quick Links** |
|
||||
|----------------------------------------------------------------------------------------------------------------------------|
|
||||
| [Installation and Updates][installation docs] - [Documentation and Tutorials][docs home] - [Bug Reports][github issues] - [Contributing][contributing docs] |
|
||||
[![latest release badge]][latest release link] [![github stars badge]][github stars link] [![github forks badge]][github forks link]
|
||||
|
||||
</div>
|
||||
[![CI checks on main badge]][CI checks on main link] [![latest commit to main badge]][latest commit to main link]
|
||||
|
||||
## Quick Start
|
||||
[![github open issues badge]][github open issues link] [![github open prs badge]][github open prs link] [![translation status badge]][translation status link]
|
||||
|
||||
1. Download and unzip the installer from the bottom of the [latest release][latest release link].
|
||||
2. Run the installer script.
|
||||
|
||||
- **Windows**: Double-click on the `install.bat` script.
|
||||
- **macOS**: Open a Terminal window, drag the file `install.sh` from Finder into the Terminal, and press enter.
|
||||
- **Linux**: Run `install.sh`.
|
||||
|
||||
3. When prompted, enter a location for the install and select your GPU type.
|
||||
4. Once the install finishes, find the directory you selected during install. The default location is `C:\Users\Username\invokeai` for Windows or `~/invokeai` for Linux/macOS.
|
||||
5. Run the launcher script (`invoke.bat` for Windows, `invoke.sh` for macOS and Linux) the same way you ran the installer script in step 2.
|
||||
6. Select option 1 to start the application. Once it starts up, open your browser and go to <http://localhost:9090>.
|
||||
7. Open the model manager tab to install a starter model and then you'll be ready to generate.
|
||||
|
||||
More detail, including hardware requirements and manual install instructions, are available in the [installation documentation][installation docs].
|
||||
|
||||
## Docker Container
|
||||
|
||||
We publish official container images in Github Container Registry: https://github.com/invoke-ai/InvokeAI/pkgs/container/invokeai. Both CUDA and ROCm images are available. Check the above link for relevant tags.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Ensure that Docker is set up to use the GPU. Refer to [NVIDIA][nvidia docker docs] or [AMD][amd docker docs] documentation.
|
||||
|
||||
### Generate!
|
||||
|
||||
Run the container, modifying the command as necessary:
|
||||
|
||||
```bash
|
||||
docker run --runtime=nvidia --gpus=all --publish 9090:9090 ghcr.io/invoke-ai/invokeai
|
||||
```
|
||||
|
||||
Then open `http://localhost:9090` and install some models using the Model Manager tab to begin generating.
|
||||
|
||||
For ROCm, add `--device /dev/kfd --device /dev/dri` to the `docker run` command.
|
||||
|
||||
### Persist your data
|
||||
|
||||
You will likely want to persist your workspace outside of the container. Use the `--volume /home/myuser/invokeai:/invokeai` flag to mount some local directory (using its **absolute** path) to the `/invokeai` path inside the container. Your generated images and models will reside there. You can use this directory with other InvokeAI installations, or switch between runtime directories as needed.
|
||||
|
||||
### DIY
|
||||
|
||||
Build your own image and customize the environment to match your needs using our `docker-compose` stack. See [README.md](./docker/README.md) in the [docker](./docker) directory.
|
||||
|
||||
## Troubleshooting, FAQ and Support
|
||||
|
||||
Please review our [FAQ][faq] for solutions to common installation problems and other issues.
|
||||
|
||||
For more help, please join our [Discord][discord link].
|
||||
|
||||
## Features
|
||||
|
||||
Full details on features can be found in [our documentation][features docs].
|
||||
|
||||
### Web Server & UI
|
||||
|
||||
Invoke runs a locally hosted web server & React UI with an industry-leading user experience.
|
||||
|
||||
### Unified Canvas
|
||||
|
||||
The Unified Canvas is a fully integrated canvas implementation with support for all core generation capabilities, in/out-painting, brush tools, and more. This creative tool unlocks the capability for artists to create with AI as a creative collaborator, and can be used to augment AI-generated imagery, sketches, photography, renders, and more.
|
||||
|
||||
### Workflows & Nodes
|
||||
|
||||
Invoke offers a fully featured workflow management solution, enabling users to combine the power of node-based workflows with the easy of a UI. This allows for customizable generation pipelines to be developed and shared by users looking to create specific workflows to support their production use-cases.
|
||||
|
||||
### Board & Gallery Management
|
||||
|
||||
Invoke features an organized gallery system for easily storing, accessing, and remixing your content in the Invoke workspace. Images can be dragged/dropped onto any Image-base UI element in the application, and rich metadata within the Image allows for easy recall of key prompts or settings used in your workflow.
|
||||
|
||||
### Other features
|
||||
|
||||
- Support for both ckpt and diffusers models
|
||||
- SD1.5, SD2.0, and SDXL support
|
||||
- Upscaling Tools
|
||||
- Embedding Manager & Support
|
||||
- Model Manager & Support
|
||||
- Workflow creation & management
|
||||
- Node-Based Architecture
|
||||
|
||||
## Contributing
|
||||
|
||||
Anyone who wishes to contribute to this project - whether documentation, features, bug fixes, code cleanup, testing, or code reviews - is very much encouraged to do so.
|
||||
|
||||
Get started with contributing by reading our [contribution documentation][contributing docs], joining the [#dev-chat] or the GitHub discussion board.
|
||||
|
||||
We hope you enjoy using Invoke as much as we enjoy creating it, and we hope you will elect to become part of our community.
|
||||
|
||||
## Thanks
|
||||
|
||||
Invoke is a combined effort of [passionate and talented people from across the world][contributors]. We thank them for their time, hard work and effort.
|
||||
|
||||
Original portions of the software are Copyright © 2024 by respective contributors.
|
||||
|
||||
[features docs]: https://invoke-ai.github.io/InvokeAI/features/database/
|
||||
[faq]: https://invoke-ai.github.io/InvokeAI/faq/
|
||||
[contributors]: https://invoke-ai.github.io/InvokeAI/contributing/contributors/
|
||||
[invoke.com]: https://www.invoke.com/about
|
||||
[github issues]: https://github.com/invoke-ai/InvokeAI/issues
|
||||
[docs home]: https://invoke-ai.github.io/InvokeAI
|
||||
[installation docs]: https://invoke-ai.github.io/InvokeAI/installation/
|
||||
[#dev-chat]: https://discord.com/channels/1020123559063990373/1049495067846524939
|
||||
[contributing docs]: https://invoke-ai.github.io/InvokeAI/contributing/
|
||||
[CI checks on main badge]: https://flat.badgen.net/github/checks/invoke-ai/InvokeAI/main?label=CI%20status%20on%20main&cache=900&icon=github
|
||||
[CI checks on main link]: https://github.com/invoke-ai/InvokeAI/actions?query=branch%3Amain
|
||||
[CI checks on main link]:https://github.com/invoke-ai/InvokeAI/actions?query=branch%3Amain
|
||||
[discord badge]: https://flat.badgen.net/discord/members/ZmtBAhwWhy?icon=discord
|
||||
[discord link]: https://discord.gg/ZmtBAhwWhy
|
||||
[github forks badge]: https://flat.badgen.net/github/forks/invoke-ai/InvokeAI?icon=github
|
||||
@@ -150,8 +30,389 @@ Original portions of the software are Copyright © 2024 by respective contributo
|
||||
[latest commit to main badge]: https://flat.badgen.net/github/last-commit/invoke-ai/InvokeAI/main?icon=github&color=yellow&label=last%20dev%20commit&cache=900
|
||||
[latest commit to main link]: https://github.com/invoke-ai/InvokeAI/commits/main
|
||||
[latest release badge]: https://flat.badgen.net/github/release/invoke-ai/InvokeAI/development?icon=github
|
||||
[latest release link]: https://github.com/invoke-ai/InvokeAI/releases/latest
|
||||
[latest release link]: https://github.com/invoke-ai/InvokeAI/releases
|
||||
[translation status badge]: https://hosted.weblate.org/widgets/invokeai/-/svg-badge.svg
|
||||
[translation status link]: https://hosted.weblate.org/engage/invokeai/
|
||||
[nvidia docker docs]: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
|
||||
[amd docker docs]: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html
|
||||
|
||||
</div>
|
||||
|
||||
InvokeAI is a leading creative engine built to empower professionals
|
||||
and enthusiasts alike. Generate and create stunning visual media using
|
||||
the latest AI-driven technologies. InvokeAI offers an industry leading
|
||||
Web Interface, interactive Command Line Interface, and also serves as
|
||||
the foundation for multiple commercial products.
|
||||
|
||||
**Quick links**: [[How to
|
||||
Install](https://invoke-ai.github.io/InvokeAI/#installation)] [<a
|
||||
href="https://discord.gg/ZmtBAhwWhy">Discord Server</a>] [<a
|
||||
href="https://invoke-ai.github.io/InvokeAI/">Documentation and
|
||||
Tutorials</a>] [<a
|
||||
href="https://github.com/invoke-ai/InvokeAI/">Code and
|
||||
Downloads</a>] [<a
|
||||
href="https://github.com/invoke-ai/InvokeAI/issues">Bug Reports</a>]
|
||||
[<a
|
||||
href="https://github.com/invoke-ai/InvokeAI/discussions">Discussion,
|
||||
Ideas & Q&A</a>]
|
||||
|
||||
<div align="center">
|
||||
|
||||

|
||||
|
||||
</div>
|
||||
|
||||
## Table of Contents
|
||||
|
||||
Table of Contents 📝
|
||||
|
||||
**Getting Started**
|
||||
1. 🏁 [Quick Start](#quick-start)
|
||||
3. 🖥️ [Hardware Requirements](#hardware-requirements)
|
||||
|
||||
**More About Invoke**
|
||||
1. 🌟 [Features](#features)
|
||||
2. 📣 [Latest Changes](#latest-changes)
|
||||
3. 🛠️ [Troubleshooting](#troubleshooting)
|
||||
|
||||
**Supporting the Project**
|
||||
1. 🤝 [Contributing](#contributing)
|
||||
2. 👥 [Contributors](#contributors)
|
||||
3. 💕 [Support](#support)
|
||||
|
||||
## Quick Start
|
||||
|
||||
For full installation and upgrade instructions, please see:
|
||||
[InvokeAI Installation Overview](https://invoke-ai.github.io/InvokeAI/installation/)
|
||||
|
||||
If upgrading from version 2.3, please read [Migrating a 2.3 root
|
||||
directory to 3.0](#migrating-to-3) first.
|
||||
|
||||
### Automatic Installer (suggested for 1st time users)
|
||||
|
||||
1. Go to the bottom of the [Latest Release Page](https://github.com/invoke-ai/InvokeAI/releases/latest)
|
||||
|
||||
2. Download the .zip file for your OS (Windows/macOS/Linux).
|
||||
|
||||
3. Unzip the file.
|
||||
|
||||
4. **Windows:** double-click on the `install.bat` script. **macOS:** Open a Terminal window, drag the file `install.sh` from Finder
|
||||
into the Terminal, and press return. **Linux:** run `install.sh`.
|
||||
|
||||
5. You'll be asked to confirm the location of the folder in which
|
||||
to install InvokeAI and its image generation model files. Pick a
|
||||
location with at least 15 GB of free memory. More if you plan on
|
||||
installing lots of models.
|
||||
|
||||
6. Wait while the installer does its thing. After installing the software,
|
||||
the installer will launch a script that lets you configure InvokeAI and
|
||||
select a set of starting image generation models.
|
||||
|
||||
7. Find the folder that InvokeAI was installed into (it is not the
|
||||
same as the unpacked zip file directory!) The default location of this
|
||||
folder (if you didn't change it in step 5) is `~/invokeai` on
|
||||
Linux/Mac systems, and `C:\Users\YourName\invokeai` on Windows. This directory will contain launcher scripts named `invoke.sh` and `invoke.bat`.
|
||||
|
||||
8. On Windows systems, double-click on the `invoke.bat` file. On
|
||||
macOS, open a Terminal window, drag `invoke.sh` from the folder into
|
||||
the Terminal, and press return. On Linux, run `invoke.sh`
|
||||
|
||||
9. Press 2 to open the "browser-based UI", press enter/return, wait a
|
||||
minute or two for Stable Diffusion to start up, then open your browser
|
||||
and go to http://localhost:9090.
|
||||
|
||||
10. Type `banana sushi` in the box on the top left and click `Invoke`
|
||||
|
||||
### Command-Line Installation (for developers and users familiar with Terminals)
|
||||
|
||||
You must have Python 3.9 through 3.11 installed on your machine. Earlier or
|
||||
later versions are not supported.
|
||||
Node.js also needs to be installed along with yarn (can be installed with
|
||||
the command `npm install -g yarn` if needed)
|
||||
|
||||
1. Open a command-line window on your machine. The PowerShell is recommended for Windows.
|
||||
2. Create a directory to install InvokeAI into. You'll need at least 15 GB of free space:
|
||||
|
||||
```terminal
|
||||
mkdir invokeai
|
||||
````
|
||||
|
||||
3. Create a virtual environment named `.venv` inside this directory and activate it:
|
||||
|
||||
```terminal
|
||||
cd invokeai
|
||||
python -m venv .venv --prompt InvokeAI
|
||||
```
|
||||
|
||||
4. Activate the virtual environment (do it every time you run InvokeAI)
|
||||
|
||||
_For Linux/Mac users:_
|
||||
|
||||
```sh
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
_For Windows users:_
|
||||
|
||||
```ps
|
||||
.venv\Scripts\activate
|
||||
```
|
||||
|
||||
5. Install the InvokeAI module and its dependencies. Choose the command suited for your platform & GPU.
|
||||
|
||||
_For Windows/Linux with an NVIDIA GPU:_
|
||||
|
||||
```terminal
|
||||
pip install "InvokeAI[xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu117
|
||||
```
|
||||
|
||||
_For Linux with an AMD GPU:_
|
||||
|
||||
```sh
|
||||
pip install InvokeAI --use-pep517 --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
|
||||
```
|
||||
|
||||
_For non-GPU systems:_
|
||||
```terminal
|
||||
pip install InvokeAI --use-pep517 --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
```
|
||||
|
||||
_For Macintoshes, either Intel or M1/M2:_
|
||||
|
||||
```sh
|
||||
pip install InvokeAI --use-pep517
|
||||
```
|
||||
|
||||
6. Configure InvokeAI and install a starting set of image generation models (you only need to do this once):
|
||||
|
||||
```terminal
|
||||
invokeai-configure
|
||||
```
|
||||
|
||||
7. Launch the web server (do it every time you run InvokeAI):
|
||||
|
||||
```terminal
|
||||
invokeai-web
|
||||
```
|
||||
|
||||
8. Build Node.js assets
|
||||
|
||||
```terminal
|
||||
cd invokeai/frontend/web/
|
||||
yarn vite build
|
||||
```
|
||||
|
||||
9. Point your browser to http://localhost:9090 to bring up the web interface.
|
||||
10. Type `banana sushi` in the box on the top left and click `Invoke`.
|
||||
|
||||
Be sure to activate the virtual environment each time before re-launching InvokeAI,
|
||||
using `source .venv/bin/activate` or `.venv\Scripts\activate`.
|
||||
|
||||
## Detailed Installation Instructions
|
||||
|
||||
This fork is supported across Linux, Windows and Macintosh. Linux
|
||||
users can use either an Nvidia-based card (with CUDA support) or an
|
||||
AMD card (using the ROCm driver). For full installation and upgrade
|
||||
instructions, please see:
|
||||
[InvokeAI Installation Overview](https://invoke-ai.github.io/InvokeAI/installation/INSTALL_SOURCE/)
|
||||
|
||||
<a name="migrating-to-3"></a>
|
||||
### Migrating a v2.3 InvokeAI root directory
|
||||
|
||||
The InvokeAI root directory is where the InvokeAI startup file,
|
||||
installed models, and generated images are stored. It is ordinarily
|
||||
named `invokeai` and located in your home directory. The contents and
|
||||
layout of this directory has changed between versions 2.3 and 3.0 and
|
||||
cannot be used directly.
|
||||
|
||||
We currently recommend that you use the installer to create a new root
|
||||
directory named differently from the 2.3 one, e.g. `invokeai-3` and
|
||||
then use a migration script to copy your 2.3 models into the new
|
||||
location. However, if you choose, you can upgrade this directory in
|
||||
place. This section gives both recipes.
|
||||
|
||||
#### Creating a new root directory and migrating old models
|
||||
|
||||
This is the safer recipe because it leaves your old root directory in
|
||||
place to fall back on.
|
||||
|
||||
1. Follow the instructions above to create and install InvokeAI in a
|
||||
directory that has a different name from the 2.3 invokeai directory.
|
||||
In this example, we will use "invokeai-3"
|
||||
|
||||
2. When you are prompted to select models to install, select a minimal
|
||||
set of models, such as stable-diffusion-v1.5 only.
|
||||
|
||||
3. After installation is complete launch `invokeai.sh` (Linux/Mac) or
|
||||
`invokeai.bat` and select option 8 "Open the developers console". This
|
||||
will take you to the command line.
|
||||
|
||||
4. Issue the command `invokeai-migrate3 --from /path/to/v2.3-root --to
|
||||
/path/to/invokeai-3-root`. Provide the correct `--from` and `--to`
|
||||
paths for your v2.3 and v3.0 root directories respectively.
|
||||
|
||||
This will copy and convert your old models from 2.3 format to 3.0
|
||||
format and create a new `models` directory in the 3.0 directory. The
|
||||
old models directory (which contains the models selected at install
|
||||
time) will be renamed `models.orig` and can be deleted once you have
|
||||
confirmed that the migration was successful.
|
||||
|
||||
If you wish, you can pass the 2.3 root directory to both `--from` and
|
||||
`--to` in order to update in place. Warning: this directory will no
|
||||
longer be usable with InvokeAI 2.3.
|
||||
|
||||
#### Migrating in place
|
||||
|
||||
For the adventurous, you may do an in-place upgrade from 2.3 to 3.0
|
||||
without touching the command line. ***This recipe does not work on
|
||||
Windows platforms due to a bug in the Windows version of the 2.3
|
||||
upgrade script.** See the next section for a Windows recipe.
|
||||
|
||||
##### For Mac and Linux Users:
|
||||
|
||||
1. Launch the InvokeAI launcher script in your current v2.3 root directory.
|
||||
|
||||
2. Select option [9] "Update InvokeAI" to bring up the updater dialog.
|
||||
|
||||
3. Select option [1] to upgrade to the latest release.
|
||||
|
||||
4. Once the upgrade is finished you will be returned to the launcher
|
||||
menu. Select option [7] "Re-run the configure script to fix a broken
|
||||
install or to complete a major upgrade".
|
||||
|
||||
This will run the configure script against the v2.3 directory and
|
||||
update it to the 3.0 format. The following files will be replaced:
|
||||
|
||||
- The invokeai.init file, replaced by invokeai.yaml
|
||||
- The models directory
|
||||
- The configs/models.yaml model index
|
||||
|
||||
The original versions of these files will be saved with the suffix
|
||||
".orig" appended to the end. Once you have confirmed that the upgrade
|
||||
worked, you can safely remove these files. Alternatively you can
|
||||
restore a working v2.3 directory by removing the new files and
|
||||
restoring the ".orig" files' original names.
|
||||
|
||||
##### For Windows Users:
|
||||
|
||||
Windows Users can upgrade with the
|
||||
|
||||
1. Enter the 2.3 root directory you wish to upgrade
|
||||
2. Launch `invoke.sh` or `invoke.bat`
|
||||
3. Select the "Developer's console" option [8]
|
||||
4. Type the following commands
|
||||
|
||||
```
|
||||
pip install "invokeai @ https://github.com/invoke-ai/InvokeAI/archive/refs/tags/v3.0.0" --use-pep517 --upgrade
|
||||
invokeai-configure --root .
|
||||
```
|
||||
(Replace `v3.0.0` with the current release number if this document is out of date).
|
||||
|
||||
The first command will install and upgrade new software to run
|
||||
InvokeAI. The second will prepare the 2.3 directory for use with 3.0.
|
||||
You may now launch the WebUI in the usual way, by selecting option [1]
|
||||
from the launcher script
|
||||
|
||||
#### Migration Caveats
|
||||
|
||||
The migration script will migrate your invokeai settings and models,
|
||||
including textual inversion models, LoRAs and merges that you may have
|
||||
installed previously. However it does **not** migrate the generated
|
||||
images stored in your 2.3-format outputs directory. You will need to
|
||||
manually import selected images into the 3.0 gallery via drag-and-drop.
|
||||
|
||||
## Hardware Requirements
|
||||
|
||||
InvokeAI is supported across Linux, Windows and macOS. Linux
|
||||
users can use either an Nvidia-based card (with CUDA support) or an
|
||||
AMD card (using the ROCm driver).
|
||||
|
||||
### System
|
||||
|
||||
You will need one of the following:
|
||||
|
||||
- An NVIDIA-based graphics card with 4 GB or more VRAM memory. 6-8 GB
|
||||
of VRAM is highly recommended for rendering using the Stable
|
||||
Diffusion XL models
|
||||
- An Apple computer with an M1 chip.
|
||||
- An AMD-based graphics card with 4GB or more VRAM memory (Linux
|
||||
only), 6-8 GB for XL rendering.
|
||||
|
||||
We do not recommend the GTX 1650 or 1660 series video cards. They are
|
||||
unable to run in half-precision mode and do not have sufficient VRAM
|
||||
to render 512x512 images.
|
||||
|
||||
**Memory** - At least 12 GB Main Memory RAM.
|
||||
|
||||
**Disk** - At least 12 GB of free disk space for the machine learning model, Python, and all its dependencies.
|
||||
|
||||
## Features
|
||||
|
||||
Feature documentation can be reviewed by navigating to [the InvokeAI Documentation page](https://invoke-ai.github.io/InvokeAI/features/)
|
||||
|
||||
### *Web Server & UI*
|
||||
|
||||
InvokeAI offers a locally hosted Web Server & React Frontend, with an industry leading user experience. The Web-based UI allows for simple and intuitive workflows, and is responsive for use on mobile devices and tablets accessing the web server.
|
||||
|
||||
### *Unified Canvas*
|
||||
|
||||
The Unified Canvas is a fully integrated canvas implementation with support for all core generation capabilities, in/outpainting, brush tools, and more. This creative tool unlocks the capability for artists to create with AI as a creative collaborator, and can be used to augment AI-generated imagery, sketches, photography, renders, and more.
|
||||
|
||||
### *Node Architecture & Editor (Beta)*
|
||||
|
||||
Invoke AI's backend is built on a graph-based execution architecture. This allows for customizable generation pipelines to be developed by professional users looking to create specific workflows to support their production use-cases, and will be extended in the future with additional capabilities.
|
||||
|
||||
### *Board & Gallery Management*
|
||||
|
||||
Invoke AI provides an organized gallery system for easily storing, accessing, and remixing your content in the Invoke workspace. Images can be dragged/dropped onto any Image-base UI element in the application, and rich metadata within the Image allows for easy recall of key prompts or settings used in your workflow.
|
||||
|
||||
### Other features
|
||||
|
||||
- *Support for both ckpt and diffusers models*
|
||||
- *SD 2.0, 2.1, XL support*
|
||||
- *Upscaling Tools*
|
||||
- *Embedding Manager & Support*
|
||||
- *Model Manager & Support*
|
||||
- *Node-Based Architecture*
|
||||
- *Node-Based Plug-&-Play UI (Beta)*
|
||||
|
||||
### Latest Changes
|
||||
|
||||
For our latest changes, view our [Release
|
||||
Notes](https://github.com/invoke-ai/InvokeAI/releases) and the
|
||||
[CHANGELOG](docs/CHANGELOG.md).
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
Please check out our **[Q&A](https://invoke-ai.github.io/InvokeAI/help/TROUBLESHOOT/#faq)** to get solutions for common installation
|
||||
problems and other issues.
|
||||
|
||||
## Contributing
|
||||
|
||||
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code
|
||||
cleanup, testing, or code reviews, is very much encouraged to do so.
|
||||
|
||||
To join, just raise your hand on the InvokeAI Discord server (#dev-chat) or the GitHub discussion board.
|
||||
|
||||
If you'd like to help with translation, please see our [translation guide](docs/other/TRANSLATION.md).
|
||||
|
||||
If you are unfamiliar with how
|
||||
to contribute to GitHub projects, here is a
|
||||
[Getting Started Guide](https://opensource.com/article/19/7/create-pull-request-github). A full set of contribution guidelines, along with templates, are in progress. You can **make your pull request against the "main" branch**.
|
||||
|
||||
We hope you enjoy using our software as much as we enjoy creating it,
|
||||
and we hope that some of those of you who are reading this will elect
|
||||
to become part of our community.
|
||||
|
||||
Welcome to InvokeAI!
|
||||
|
||||
### Contributors
|
||||
|
||||
This fork is a combined effort of various people from across the world.
|
||||
[Check out the list of all these amazing people](https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/). We thank them for
|
||||
their time, hard work and effort.
|
||||
|
||||
### Support
|
||||
|
||||
For support, please use this repository's GitHub Issues tracking service, or join the Discord.
|
||||
|
||||
Original portions of the software are Copyright (c) 2023 by respective contributors.
|
||||
|
||||
|
||||
@@ -1,27 +1,13 @@
|
||||
## Make a copy of this file named `.env` and fill in the values below.
|
||||
## Any environment variables supported by InvokeAI can be specified here,
|
||||
## in addition to the examples below.
|
||||
## Any environment variables supported by InvokeAI can be specified here.
|
||||
|
||||
## INVOKEAI_ROOT is the path *on the host system* where Invoke will store its data.
|
||||
## It is mounted into the container and allows both containerized and non-containerized usage of Invoke.
|
||||
# Usually this is the only variable you need to set. It can be relative or absolute.
|
||||
# INVOKEAI_ROOT=~/invokeai
|
||||
# INVOKEAI_ROOT is the path to a path on the local filesystem where InvokeAI will store data.
|
||||
# Outputs will also be stored here by default.
|
||||
# This **must** be an absolute path.
|
||||
INVOKEAI_ROOT=
|
||||
|
||||
## HOST_INVOKEAI_ROOT and CONTAINER_INVOKEAI_ROOT can be used to control the on-host
|
||||
## and in-container paths separately, if needed.
|
||||
## HOST_INVOKEAI_ROOT is the path on the docker host's filesystem where Invoke will store data.
|
||||
## If relative, it will be relative to the docker directory in which the docker-compose.yml file is located
|
||||
## CONTAINER_INVOKEAI_ROOT is the path within the container where Invoke will expect to find the runtime directory.
|
||||
## It MUST be absolute. There is usually no need to change this.
|
||||
# HOST_INVOKEAI_ROOT=../../invokeai-data
|
||||
# CONTAINER_INVOKEAI_ROOT=/invokeai
|
||||
HUGGINGFACE_TOKEN=
|
||||
|
||||
## INVOKEAI_PORT is the port on which the InvokeAI web interface will be available
|
||||
# INVOKEAI_PORT=9090
|
||||
|
||||
## GPU_DRIVER can be set to either `cuda` or `rocm` to enable GPU support in the container accordingly.
|
||||
# GPU_DRIVER=cuda #| rocm
|
||||
|
||||
## CONTAINER_UID can be set to the UID of the user on the host system that should own the files in the container.
|
||||
## It is usually not necessary to change this. Use `id -u` on the host system to find the UID.
|
||||
# CONTAINER_UID=1000
|
||||
## optional variables specific to the docker setup
|
||||
# GPU_DRIVER=cuda
|
||||
# CONTAINER_UID=1000
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
## Builder stage
|
||||
|
||||
FROM library/ubuntu:23.04 AS builder
|
||||
FROM library/ubuntu:22.04 AS builder
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
||||
@@ -10,7 +10,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt update && apt-get install -y \
|
||||
git \
|
||||
python3-venv \
|
||||
python3.10-venv \
|
||||
python3-pip \
|
||||
build-essential
|
||||
|
||||
@@ -18,6 +18,8 @@ ENV INVOKEAI_SRC=/opt/invokeai
|
||||
ENV VIRTUAL_ENV=/opt/venv/invokeai
|
||||
|
||||
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
ARG TORCH_VERSION=2.0.1
|
||||
ARG TORCHVISION_VERSION=0.15.2
|
||||
ARG GPU_DRIVER=cuda
|
||||
ARG TARGETPLATFORM="linux/amd64"
|
||||
# unused but available
|
||||
@@ -25,12 +27,7 @@ ARG BUILDPLATFORM
|
||||
|
||||
WORKDIR ${INVOKEAI_SRC}
|
||||
|
||||
COPY invokeai ./invokeai
|
||||
COPY pyproject.toml ./
|
||||
|
||||
# Editable mode helps use the same image for development:
|
||||
# the local working copy can be bind-mounted into the image
|
||||
# at path defined by ${INVOKEAI_SRC}
|
||||
# Install pytorch before all other pip packages
|
||||
# NOTE: there are no pytorch builds for arm64 + cuda, only cpu
|
||||
# x86_64/CUDA is default
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
@@ -38,35 +35,42 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then \
|
||||
extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/cpu"; \
|
||||
elif [ "$GPU_DRIVER" = "rocm" ]; then \
|
||||
extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/rocm5.6"; \
|
||||
extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/rocm5.4.2"; \
|
||||
else \
|
||||
extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/cu121"; \
|
||||
extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/cu118"; \
|
||||
fi &&\
|
||||
pip install $extra_index_url_arg \
|
||||
torch==$TORCH_VERSION \
|
||||
torchvision==$TORCHVISION_VERSION
|
||||
|
||||
# Install the local package.
|
||||
# Editable mode helps use the same image for development:
|
||||
# the local working copy can be bind-mounted into the image
|
||||
# at path defined by ${INVOKEAI_SRC}
|
||||
COPY invokeai ./invokeai
|
||||
COPY pyproject.toml ./
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
# xformers + triton fails to install on arm64
|
||||
if [ "$GPU_DRIVER" = "cuda" ] && [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
|
||||
pip install $extra_index_url_arg -e ".[xformers]"; \
|
||||
pip install -e ".[xformers]"; \
|
||||
else \
|
||||
pip install $extra_index_url_arg -e "."; \
|
||||
pip install -e "."; \
|
||||
fi
|
||||
|
||||
# #### Build the Web UI ------------------------------------
|
||||
|
||||
FROM node:20-slim AS web-builder
|
||||
ENV PNPM_HOME="/pnpm"
|
||||
ENV PATH="$PNPM_HOME:$PATH"
|
||||
RUN corepack use pnpm@8.x
|
||||
RUN corepack enable
|
||||
|
||||
FROM node:18 AS web-builder
|
||||
WORKDIR /build
|
||||
COPY invokeai/frontend/web/ ./
|
||||
RUN --mount=type=cache,target=/pnpm/store \
|
||||
pnpm install --frozen-lockfile
|
||||
RUN npx vite build
|
||||
RUN --mount=type=cache,target=/usr/lib/node_modules \
|
||||
npm install --include dev
|
||||
RUN --mount=type=cache,target=/usr/lib/node_modules \
|
||||
yarn vite build
|
||||
|
||||
|
||||
#### Runtime stage ---------------------------------------
|
||||
|
||||
FROM library/ubuntu:23.04 AS runtime
|
||||
FROM library/ubuntu:22.04 AS runtime
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
@@ -81,7 +85,6 @@ RUN apt update && apt install -y --no-install-recommends \
|
||||
iotop \
|
||||
bzip2 \
|
||||
gosu \
|
||||
magic-wormhole \
|
||||
libglib2.0-0 \
|
||||
libgl1-mesa-glx \
|
||||
python3-venv \
|
||||
@@ -91,15 +94,15 @@ RUN apt update && apt install -y --no-install-recommends \
|
||||
libstdc++-10-dev &&\
|
||||
apt-get clean && apt-get autoclean
|
||||
|
||||
# globally add magic-wormhole
|
||||
# for ease of transferring data to and from the container
|
||||
# when running in sandboxed cloud environments; e.g. Runpod etc.
|
||||
RUN pip install magic-wormhole
|
||||
|
||||
ENV INVOKEAI_SRC=/opt/invokeai
|
||||
ENV VIRTUAL_ENV=/opt/venv/invokeai
|
||||
ENV INVOKEAI_ROOT=/invokeai
|
||||
ENV INVOKEAI_HOST=0.0.0.0
|
||||
ENV INVOKEAI_PORT=9090
|
||||
ENV PATH="$VIRTUAL_ENV/bin:$INVOKEAI_SRC:$PATH"
|
||||
ENV CONTAINER_UID=${CONTAINER_UID:-1000}
|
||||
ENV CONTAINER_GID=${CONTAINER_GID:-1000}
|
||||
|
||||
# --link requires buldkit w/ dockerfile syntax 1.4
|
||||
COPY --link --from=builder ${INVOKEAI_SRC} ${INVOKEAI_SRC}
|
||||
@@ -117,8 +120,10 @@ WORKDIR ${INVOKEAI_SRC}
|
||||
RUN cd /usr/lib/$(uname -p)-linux-gnu/pkgconfig/ && ln -sf opencv4.pc opencv.pc
|
||||
RUN python3 -c "from patchmatch import patch_match"
|
||||
|
||||
RUN mkdir -p ${INVOKEAI_ROOT} && chown -R ${CONTAINER_UID}:${CONTAINER_GID} ${INVOKEAI_ROOT}
|
||||
# Create unprivileged user and make the local dir
|
||||
RUN useradd --create-home --shell /bin/bash -u 1000 --comment "container local user" invoke
|
||||
RUN mkdir -p ${INVOKEAI_ROOT} && chown -R invoke:invoke ${INVOKEAI_ROOT}
|
||||
|
||||
COPY docker/docker-entrypoint.sh ./
|
||||
ENTRYPOINT ["/opt/invokeai/docker-entrypoint.sh"]
|
||||
CMD ["invokeai-web"]
|
||||
CMD ["invokeai-web", "--host", "0.0.0.0"]
|
||||
|
||||
118
docker/README.md
@@ -1,88 +1,34 @@
|
||||
# Invoke in Docker
|
||||
# InvokeAI Containerized
|
||||
|
||||
First things first:
|
||||
|
||||
- Ensure that Docker can use your [NVIDIA][nvidia docker docs] or [AMD][amd docker docs] GPU.
|
||||
- This document assumes a Linux system, but should work similarly under Windows with WSL2.
|
||||
- We don't recommend running Invoke in Docker on macOS at this time. It works, but very slowly.
|
||||
|
||||
## Quickstart
|
||||
|
||||
No `docker compose`, no persistence, single command, using the official images:
|
||||
|
||||
**CUDA (NVIDIA GPU):**
|
||||
|
||||
```bash
|
||||
docker run --runtime=nvidia --gpus=all --publish 9090:9090 ghcr.io/invoke-ai/invokeai
|
||||
```
|
||||
|
||||
**ROCm (AMD GPU):**
|
||||
|
||||
```bash
|
||||
docker run --device /dev/kfd --device /dev/dri --publish 9090:9090 ghcr.io/invoke-ai/invokeai:main-rocm
|
||||
```
|
||||
|
||||
Open `http://localhost:9090` in your browser once the container finishes booting, install some models, and generate away!
|
||||
|
||||
### Data persistence
|
||||
|
||||
To persist your generated images and downloaded models outside of the container, add a `--volume/-v` flag to the above command, e.g.:
|
||||
|
||||
```bash
|
||||
docker run --volume /some/local/path:/invokeai {...etc...}
|
||||
```
|
||||
|
||||
`/some/local/path/invokeai` will contain all your data.
|
||||
It can *usually* be reused between different installs of Invoke. Tread with caution and read the release notes!
|
||||
|
||||
## Customize the container
|
||||
|
||||
The included `run.sh` script is a convenience wrapper around `docker compose`. It can be helpful for passing additional build arguments to `docker compose`. Alternatively, the familiar `docker compose` commands work just as well.
|
||||
|
||||
```bash
|
||||
cd docker
|
||||
cp .env.sample .env
|
||||
# edit .env to your liking if you need to; it is well commented.
|
||||
./run.sh
|
||||
```
|
||||
|
||||
It will take a few minutes to build the image the first time. Once the application starts up, open `http://localhost:9090` in your browser to invoke!
|
||||
|
||||
>[!TIP]
|
||||
>When using the `run.sh` script, the container will continue running after Ctrl+C. To shut it down, use the `docker compose down` command.
|
||||
|
||||
## Docker setup in detail
|
||||
All commands are to be run from the `docker` directory: `cd docker`
|
||||
|
||||
#### Linux
|
||||
|
||||
1. Ensure buildkit is enabled in the Docker daemon settings (`/etc/docker/daemon.json`)
|
||||
2. Install the `docker compose` plugin using your package manager, or follow a [tutorial](https://docs.docker.com/compose/install/linux/#install-using-the-repository).
|
||||
- The deprecated `docker-compose` (hyphenated) CLI probably won't work. Update to a recent version.
|
||||
1. Ensure builkit is enabled in the Docker daemon settings (`/etc/docker/daemon.json`)
|
||||
2. Install the `docker compose` plugin using your package manager, or follow a [tutorial](https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-compose-on-ubuntu-22-04).
|
||||
- The deprecated `docker-compose` (hyphenated) CLI continues to work for now.
|
||||
3. Ensure docker daemon is able to access the GPU.
|
||||
- [NVIDIA docs](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
|
||||
- [AMD docs](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html)
|
||||
- You may need to install [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
|
||||
|
||||
#### macOS
|
||||
|
||||
> [!TIP]
|
||||
> You'll be better off installing Invoke directly on your system, because Docker can not use the GPU on macOS.
|
||||
|
||||
If you are still reading:
|
||||
|
||||
1. Ensure Docker has at least 16GB RAM
|
||||
2. Enable VirtioFS for file sharing
|
||||
3. Enable `docker compose` V2 support
|
||||
|
||||
This is done via Docker Desktop preferences.
|
||||
This is done via Docker Desktop preferences
|
||||
|
||||
### Configure the Invoke Environment
|
||||
## Quickstart
|
||||
|
||||
1. Make a copy of `.env.sample` and name it `.env` (`cp .env.sample .env` (Mac/Linux) or `copy example.env .env` (Windows)). Make changes as necessary. Set `INVOKEAI_ROOT` to an absolute path to the desired location of the InvokeAI runtime directory. It may be an existing directory from a previous installation (post 4.0.0).
|
||||
1. Execute `run.sh`
|
||||
|
||||
1. Make a copy of `env.sample` and name it `.env` (`cp env.sample .env` (Mac/Linux) or `copy example.env .env` (Windows)). Make changes as necessary. Set `INVOKEAI_ROOT` to an absolute path to:
|
||||
a. the desired location of the InvokeAI runtime directory, or
|
||||
b. an existing, v3.0.0 compatible runtime directory.
|
||||
1. `docker compose up`
|
||||
|
||||
The image will be built automatically if needed.
|
||||
|
||||
The runtime directory (holding models and outputs) will be created in the location specified by `INVOKEAI_ROOT`. The default location is `~/invokeai`. Navigate to the Model Manager tab and install some models before generating.
|
||||
The runtime directory (holding models and outputs) will be created in the location specified by `INVOKEAI_ROOT`. The default location is `~/invokeai`. The runtime directory will be populated with the base configs and models necessary to start generating.
|
||||
|
||||
### Use a GPU
|
||||
|
||||
@@ -90,28 +36,42 @@ The runtime directory (holding models and outputs) will be created in the locati
|
||||
- WSL2 is *required* for Windows.
|
||||
- only `x86_64` architecture is supported.
|
||||
|
||||
The Docker daemon on the system must be already set up to use the GPU. In case of Linux, this involves installing `nvidia-docker-runtime` and configuring the `nvidia` runtime as default. Steps will be different for AMD. Please see Docker/NVIDIA/AMD documentation for the most up-to-date instructions for using your GPU with Docker.
|
||||
|
||||
To use an AMD GPU, set `GPU_DRIVER=rocm` in your `.env` file before running `./run.sh`.
|
||||
The Docker daemon on the system must be already set up to use the GPU. In case of Linux, this involves installing `nvidia-docker-runtime` and configuring the `nvidia` runtime as default. Steps will be different for AMD. Please see Docker documentation for the most up-to-date instructions for using your GPU with Docker.
|
||||
|
||||
## Customize
|
||||
|
||||
Check the `.env.sample` file. It contains some environment variables for running in Docker. Copy it, name it `.env`, and fill it in with your own values. Next time you run `run.sh`, your custom values will be used.
|
||||
Check the `.env.sample` file. It contains some environment variables for running in Docker. Copy it, name it `.env`, and fill it in with your own values. Next time you run `docker compose up`, your custom values will be used.
|
||||
|
||||
You can also set these values in `docker-compose.yml` directly, but `.env` will help avoid conflicts when code is updated.
|
||||
You can also set these values in `docker compose.yml` directly, but `.env` will help avoid conflicts when code is updated.
|
||||
|
||||
Values are optional, but setting `INVOKEAI_ROOT` is highly recommended. The default is `~/invokeai`. Example:
|
||||
Example (most values are optional):
|
||||
|
||||
```bash
|
||||
```
|
||||
INVOKEAI_ROOT=/Volumes/WorkDrive/invokeai
|
||||
HUGGINGFACE_TOKEN=the_actual_token
|
||||
CONTAINER_UID=1000
|
||||
GPU_DRIVER=cuda
|
||||
```
|
||||
|
||||
Any environment variables supported by InvokeAI can be set here. See the [Configuration docs](https://invoke-ai.github.io/InvokeAI/features/CONFIGURATION/) for further detail.
|
||||
## Even Moar Customizing!
|
||||
|
||||
---
|
||||
See the `docker compose.yaml` file. The `command` instruction can be uncommented and used to run arbitrary startup commands. Some examples below.
|
||||
|
||||
[nvidia docker docs]: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
|
||||
[amd docker docs]: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html
|
||||
### Reconfigure the runtime directory
|
||||
|
||||
Can be used to download additional models from the supported model list
|
||||
|
||||
In conjunction with `INVOKEAI_ROOT` can be also used to initialize a runtime directory
|
||||
|
||||
```
|
||||
command:
|
||||
- invokeai-configure
|
||||
- --yes
|
||||
```
|
||||
|
||||
Or install models:
|
||||
|
||||
```
|
||||
command:
|
||||
- invokeai-model-install
|
||||
```
|
||||
11
docker/build.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
build_args=""
|
||||
|
||||
[[ -f ".env" ]] && build_args=$(awk '$1 ~ /\=[^$]/ {print "--build-arg " $0 " "}' .env)
|
||||
|
||||
echo "docker-compose build args:"
|
||||
echo $build_args
|
||||
|
||||
docker-compose build $build_args
|
||||
@@ -1,37 +1,13 @@
|
||||
# Copyright (c) 2023 Eugene Brodsky https://github.com/ebr
|
||||
|
||||
x-invokeai: &invokeai
|
||||
image: "local/invokeai:latest"
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker/Dockerfile
|
||||
|
||||
# Create a .env file in the same directory as this docker-compose.yml file
|
||||
# and populate it with environment variables. See .env.sample
|
||||
env_file:
|
||||
- .env
|
||||
|
||||
# variables without a default will automatically inherit from the host environment
|
||||
environment:
|
||||
# if set, CONTAINER_INVOKEAI_ROOT will override the Invoke runtime directory location *inside* the container
|
||||
- INVOKEAI_ROOT=${CONTAINER_INVOKEAI_ROOT:-/invokeai}
|
||||
- HF_HOME
|
||||
ports:
|
||||
- "${INVOKEAI_PORT:-9090}:${INVOKEAI_PORT:-9090}"
|
||||
volumes:
|
||||
- type: bind
|
||||
source: ${HOST_INVOKEAI_ROOT:-${INVOKEAI_ROOT:-~/invokeai}}
|
||||
target: ${CONTAINER_INVOKEAI_ROOT:-/invokeai}
|
||||
bind:
|
||||
create_host_path: true
|
||||
- ${HF_HOME:-~/.cache/huggingface}:${HF_HOME:-/invokeai/.cache/huggingface}
|
||||
tty: true
|
||||
stdin_open: true
|
||||
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
invokeai-cuda:
|
||||
<<: *invokeai
|
||||
invokeai:
|
||||
image: "local/invokeai:latest"
|
||||
# edit below to run on a container runtime other than nvidia-container-runtime.
|
||||
# not yet tested with rocm/AMD GPUs
|
||||
# Comment out the "deploy" section to run on CPU only
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -39,16 +15,34 @@ services:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker/Dockerfile
|
||||
|
||||
invokeai-cpu:
|
||||
<<: *invokeai
|
||||
profiles:
|
||||
- cpu
|
||||
# variables without a default will automatically inherit from the host environment
|
||||
environment:
|
||||
- INVOKEAI_ROOT
|
||||
- HF_HOME
|
||||
|
||||
invokeai-rocm:
|
||||
<<: *invokeai
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
profiles:
|
||||
- rocm
|
||||
# Create a .env file in the same directory as this docker-compose.yml file
|
||||
# and populate it with environment variables. See .env.sample
|
||||
env_file:
|
||||
- .env
|
||||
|
||||
ports:
|
||||
- "${INVOKEAI_PORT:-9090}:9090"
|
||||
volumes:
|
||||
- ${INVOKEAI_ROOT:-~/invokeai}:${INVOKEAI_ROOT:-/invokeai}
|
||||
- ${HF_HOME:-~/.cache/huggingface}:${HF_HOME:-/invokeai/.cache/huggingface}
|
||||
# - ${INVOKEAI_MODELS_DIR:-${INVOKEAI_ROOT:-/invokeai/models}}
|
||||
# - ${INVOKEAI_MODELS_CONFIG_PATH:-${INVOKEAI_ROOT:-/invokeai/configs/models.yaml}}
|
||||
tty: true
|
||||
stdin_open: true
|
||||
|
||||
# # Example of running alternative commands/scripts in the container
|
||||
# command:
|
||||
# - bash
|
||||
# - -c
|
||||
# - |
|
||||
# invokeai-model-install --yes --default-only --config_file ${INVOKEAI_ROOT}/config_custom.yaml
|
||||
# invokeai-nodes-web --host 0.0.0.0
|
||||
|
||||
@@ -9,33 +9,57 @@ set -e -o pipefail
|
||||
### Set INVOKEAI_ROOT pointing to a valid runtime directory
|
||||
# Otherwise configure the runtime dir first.
|
||||
|
||||
### Configure the InvokeAI runtime directory (done by default)):
|
||||
# docker run --rm -it <this image> --configure
|
||||
# or skip with --no-configure
|
||||
|
||||
### Set the CONTAINER_UID envvar to match your user.
|
||||
# Ensures files created in the container are owned by you:
|
||||
# docker run --rm -it -v /some/path:/invokeai -e CONTAINER_UID=$(id -u) <this image>
|
||||
# Default UID: 1000 chosen due to popularity on Linux systems. Possibly 501 on MacOS.
|
||||
|
||||
USER_ID=${CONTAINER_UID:-1000}
|
||||
USER=ubuntu
|
||||
USER=invoke
|
||||
usermod -u ${USER_ID} ${USER} 1>/dev/null
|
||||
|
||||
configure() {
|
||||
# Configure the runtime directory
|
||||
if [[ -f ${INVOKEAI_ROOT}/invokeai.yaml ]]; then
|
||||
echo "${INVOKEAI_ROOT}/invokeai.yaml exists. InvokeAI is already configured."
|
||||
echo "To reconfigure InvokeAI, delete the above file."
|
||||
echo "======================================================================"
|
||||
else
|
||||
mkdir -p ${INVOKEAI_ROOT}
|
||||
chown --recursive ${USER} ${INVOKEAI_ROOT}
|
||||
gosu ${USER} invokeai-configure --yes --default_only
|
||||
fi
|
||||
}
|
||||
|
||||
## Skip attempting to configure.
|
||||
## Must be passed first, before any other args.
|
||||
if [[ $1 != "--no-configure" ]]; then
|
||||
configure
|
||||
else
|
||||
shift
|
||||
fi
|
||||
|
||||
### Set the $PUBLIC_KEY env var to enable SSH access.
|
||||
# We do not install openssh-server in the image by default to avoid bloat.
|
||||
# but it is useful to have the full SSH server e.g. on Runpod.
|
||||
# (use SCP to copy files to/from the image, etc)
|
||||
if [[ -v "PUBLIC_KEY" ]] && [[ ! -d "${HOME}/.ssh" ]]; then
|
||||
apt-get update
|
||||
apt-get install -y openssh-server
|
||||
pushd "$HOME"
|
||||
mkdir -p .ssh
|
||||
echo "${PUBLIC_KEY}" >.ssh/authorized_keys
|
||||
chmod -R 700 .ssh
|
||||
popd
|
||||
service ssh start
|
||||
apt-get update
|
||||
apt-get install -y openssh-server
|
||||
pushd $HOME
|
||||
mkdir -p .ssh
|
||||
echo ${PUBLIC_KEY} > .ssh/authorized_keys
|
||||
chmod -R 700 .ssh
|
||||
popd
|
||||
service ssh start
|
||||
fi
|
||||
|
||||
mkdir -p "${INVOKEAI_ROOT}"
|
||||
chown --recursive ${USER} "${INVOKEAI_ROOT}" || true
|
||||
cd "${INVOKEAI_ROOT}"
|
||||
|
||||
cd ${INVOKEAI_ROOT}
|
||||
|
||||
# Run the CMD as the Container User (not root).
|
||||
exec gosu ${USER} "$@"
|
||||
|
||||
@@ -1,36 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e -o pipefail
|
||||
set -e
|
||||
|
||||
run() {
|
||||
local scriptdir=$(dirname "${BASH_SOURCE[0]}")
|
||||
cd "$scriptdir" || exit 1
|
||||
SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}")
|
||||
cd "$SCRIPTDIR" || exit 1
|
||||
|
||||
local build_args=""
|
||||
local profile=""
|
||||
|
||||
# create .env file if it doesn't exist, otherwise docker compose will fail
|
||||
touch .env
|
||||
|
||||
# parse .env file for build args
|
||||
build_args=$(awk '$1 ~ /=[^$]/ && $0 !~ /^#/ {print "--build-arg " $0 " "}' .env) &&
|
||||
profile="$(awk -F '=' '/GPU_DRIVER/ {print $2}' .env)"
|
||||
|
||||
# default to 'cuda' profile
|
||||
[[ -z "$profile" ]] && profile="cuda"
|
||||
|
||||
local service_name="invokeai-$profile"
|
||||
|
||||
if [[ ! -z "$build_args" ]]; then
|
||||
printf "%s\n" "docker compose build args:"
|
||||
printf "%s\n" "$build_args"
|
||||
fi
|
||||
|
||||
docker compose build $build_args $service_name
|
||||
unset build_args
|
||||
|
||||
printf "%s\n" "starting service $service_name"
|
||||
docker compose --profile "$profile" up -d "$service_name"
|
||||
docker compose logs -f
|
||||
}
|
||||
|
||||
run
|
||||
docker-compose up --build -d
|
||||
docker-compose logs -f
|
||||
|
||||
815
docs/CHANGELOG.md
Normal file
@@ -0,0 +1,815 @@
|
||||
---
|
||||
title: Changelog
|
||||
---
|
||||
|
||||
# :octicons-log-16: **Changelog**
|
||||
|
||||
## v2.3.5 <small>(22 May 2023)</small>
|
||||
|
||||
This release (along with the post1 and post2 follow-on releases) expands support for additional LoRA and LyCORIS models, upgrades diffusers versions, and fixes a few bugs.
|
||||
|
||||
### LoRA and LyCORIS Support Improvement
|
||||
|
||||
A number of LoRA/LyCORIS fine-tune files (those which alter the text encoder as well as the unet model) were not having the desired effect in InvokeAI. This bug has now been fixed. Full documentation of LoRA support is available at InvokeAI LoRA Support.
|
||||
Previously, InvokeAI did not distinguish between LoRA/LyCORIS models based on Stable Diffusion v1.5 vs those based on v2.0 and 2.1, leading to a crash when an incompatible model was loaded. This has now been fixed. In addition, the web pulldown menus for LoRA and Textual Inversion selection have been enhanced to show only those files that are compatible with the currently-selected Stable Diffusion model.
|
||||
Support for the newer LoKR LyCORIS files has been added.
|
||||
|
||||
### Library Updates and Speed/Reproducibility Advancements
|
||||
The major enhancement in this version is that NVIDIA users no longer need to decide between speed and reproducibility. Previously, if you activated the Xformers library, you would see improvements in speed and memory usage, but multiple images generated with the same seed and other parameters would be slightly different from each other. This is no longer the case. Relative to 2.3.5 you will see improved performance when running without Xformers, and even better performance when Xformers is activated. In both cases, images generated with the same settings will be identical.
|
||||
|
||||
Here are the new library versions:
|
||||
Library Version
|
||||
Torch 2.0.0
|
||||
Diffusers 0.16.1
|
||||
Xformers 0.0.19
|
||||
Compel 1.1.5
|
||||
Other Improvements
|
||||
|
||||
### Performance Improvements
|
||||
|
||||
When a model is loaded for the first time, InvokeAI calculates its checksum for incorporation into the PNG metadata. This process could take up to a minute on network-mounted disks and WSL mounts. This release noticeably speeds up the process.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
The "import models from directory" and "import from URL" functionality in the console-based model installer has now been fixed.
|
||||
When running the WebUI, we have reduced the number of times that InvokeAI reaches out to HuggingFace to fetch the list of embeddable Textual Inversion models. We have also caught and fixed a problem with the updater not correctly detecting when another instance of the updater is running
|
||||
|
||||
|
||||
## v2.3.4 <small>(7 April 2023)</small>
|
||||
|
||||
What's New in 2.3.4
|
||||
|
||||
This features release adds support for LoRA (Low-Rank Adaptation) and LyCORIS (Lora beYond Conventional) models, as well as some minor bug fixes.
|
||||
### LoRA and LyCORIS Support
|
||||
|
||||
LoRA files contain fine-tuning weights that enable particular styles, subjects or concepts to be applied to generated images. LyCORIS files are an extended variant of LoRA. InvokeAI supports the most common LoRA/LyCORIS format, which ends in the suffix .safetensors. You will find numerous LoRA and LyCORIS models for download at Civitai, and a small but growing number at Hugging Face. Full documentation of LoRA support is available at InvokeAI LoRA Support.( Pre-release note: this page will only be available after release)
|
||||
|
||||
To use LoRA/LyCORIS models in InvokeAI:
|
||||
|
||||
Download the .safetensors files of your choice and place in /path/to/invokeai/loras. This directory was not present in earlier version of InvokeAI but will be created for you the first time you run the command-line or web client. You can also create the directory manually.
|
||||
|
||||
Add withLora(lora-file,weight) to your prompts. The weight is optional and will default to 1.0. A few examples, assuming that a LoRA file named loras/sushi.safetensors is present:
|
||||
|
||||
family sitting at dinner table eating sushi withLora(sushi,0.9)
|
||||
family sitting at dinner table eating sushi withLora(sushi, 0.75)
|
||||
family sitting at dinner table eating sushi withLora(sushi)
|
||||
|
||||
Multiple withLora() prompt fragments are allowed. The weight can be arbitrarily large, but the useful range is roughly 0.5 to 1.0. Higher weights make the LoRA's influence stronger. Negative weights are also allowed, which can lead to some interesting effects.
|
||||
|
||||
Generate as you usually would! If you find that the image is too "crisp" try reducing the overall CFG value or reducing individual LoRA weights. As is the case with all fine-tunes, you'll get the best results when running the LoRA on top of the model similar to, or identical with, the one that was used during the LoRA's training. Don't try to load a SD 1.x-trained LoRA into a SD 2.x model, and vice versa. This will trigger a non-fatal error message and generation will not proceed.
|
||||
|
||||
You can change the location of the loras directory by passing the --lora_directory option to `invokeai.
|
||||
|
||||
### New WebUI LoRA and Textual Inversion Buttons
|
||||
|
||||
This version adds two new web interface buttons for inserting LoRA and Textual Inversion triggers into the prompt as shown in the screenshot below.
|
||||
|
||||
Clicking on one or the other of the buttons will bring up a menu of available LoRA/LyCORIS or Textual Inversion trigger terms. Select a menu item to insert the properly-formatted withLora() or <textual-inversion> prompt fragment into the positive prompt. The number in parentheses indicates the number of trigger terms currently in the prompt. You may click the button again and deselect the LoRA or trigger to remove it from the prompt, or simply edit the prompt directly.
|
||||
|
||||
Currently terms are inserted into the positive prompt textbox only. However, some textual inversion embeddings are designed to be used with negative prompts. To move a textual inversion trigger into the negative prompt, simply cut and paste it.
|
||||
|
||||
By default the Textual Inversion menu only shows locally installed models found at startup time in /path/to/invokeai/embeddings. However, InvokeAI has the ability to dynamically download and install additional Textual Inversion embeddings from the HuggingFace Concepts Library. You may choose to display the most popular of these (with five or more likes) in the Textual Inversion menu by going to Settings and turning on "Show Textual Inversions from HF Concepts Library." When this option is activated, the locally-installed TI embeddings will be shown first, followed by uninstalled terms from Hugging Face. See The Hugging Face Concepts Library and Importing Textual Inversion files for more information.
|
||||
### Minor features and fixes
|
||||
|
||||
This release changes model switching behavior so that the command-line and Web UIs save the last model used and restore it the next time they are launched. It also improves the behavior of the installer so that the pip utility is kept up to date.
|
||||
|
||||
### Known Bugs in 2.3.4
|
||||
|
||||
These are known bugs in the release.
|
||||
|
||||
The Ancestral DPMSolverMultistepScheduler (k_dpmpp_2a) sampler is not yet implemented for diffusers models and will disappear from the WebUI Sampler menu when a diffusers model is selected.
|
||||
Windows Defender will sometimes raise Trojan or backdoor alerts for the codeformer.pth face restoration model, as well as the CIDAS/clipseg and runwayml/stable-diffusion-v1.5 models. These are false positives and can be safely ignored. InvokeAI performs a malware scan on all models as they are loaded. For additional security, you should use safetensors models whenever they are available.
|
||||
|
||||
|
||||
## v2.3.3 <small>(28 March 2023)</small>
|
||||
|
||||
This is a bugfix and minor feature release.
|
||||
### Bugfixes
|
||||
|
||||
Since version 2.3.2 the following bugs have been fixed:
|
||||
Bugs
|
||||
|
||||
When using legacy checkpoints with an external VAE, the VAE file is now scanned for malware prior to loading. Previously only the main model weights file was scanned.
|
||||
Textual inversion will select an appropriate batchsize based on whether xformers is active, and will default to xformers enabled if the library is detected.
|
||||
The batch script log file names have been fixed to be compatible with Windows.
|
||||
Occasional corruption of the .next_prefix file (which stores the next output file name in sequence) on Windows systems is now detected and corrected.
|
||||
Support loading of legacy config files that have no personalization (textual inversion) section.
|
||||
An infinite loop when opening the developer's console from within the invoke.sh script has been corrected.
|
||||
Documentation fixes, including a recipe for detecting and fixing problems with the AMD GPU ROCm driver.
|
||||
|
||||
Enhancements
|
||||
|
||||
It is now possible to load and run several community-contributed SD-2.0 based models, including the often-requested "Illuminati" model.
|
||||
The "NegativePrompts" embedding file, and others like it, can now be loaded by placing it in the InvokeAI embeddings directory.
|
||||
If no --model is specified at launch time, InvokeAI will remember the last model used and restore it the next time it is launched.
|
||||
On Linux systems, the invoke.sh launcher now uses a prettier console-based interface. To take advantage of it, install the dialog package using your package manager (e.g. sudo apt install dialog).
|
||||
When loading legacy models (safetensors/ckpt) you can specify a custom config file and/or a VAE by placing like-named files in the same directory as the model following this example:
|
||||
|
||||
my-favorite-model.ckpt
|
||||
my-favorite-model.yaml
|
||||
my-favorite-model.vae.pt # or my-favorite-model.vae.safetensors
|
||||
|
||||
### Known Bugs in 2.3.3
|
||||
|
||||
These are known bugs in the release.
|
||||
|
||||
The Ancestral DPMSolverMultistepScheduler (k_dpmpp_2a) sampler is not yet implemented for diffusers models and will disappear from the WebUI Sampler menu when a diffusers model is selected.
|
||||
Windows Defender will sometimes raise Trojan or backdoor alerts for the codeformer.pth face restoration model, as well as the CIDAS/clipseg and runwayml/stable-diffusion-v1.5 models. These are false positives and can be safely ignored. InvokeAI performs a malware scan on all models as they are loaded. For additional security, you should use safetensors models whenever they are available.
|
||||
|
||||
|
||||
## v2.3.2 <small>(11 March 2023)</small>
|
||||
This is a bugfix and minor feature release.
|
||||
|
||||
### Bugfixes
|
||||
|
||||
Since version 2.3.1 the following bugs have been fixed:
|
||||
|
||||
Black images appearing for potential NSFW images when generating with legacy checkpoint models and both --no-nsfw_checker and --ckpt_convert turned on.
|
||||
Black images appearing when generating from models fine-tuned on Stable-Diffusion-2-1-base. When importing V2-derived models, you may be asked to select whether the model was derived from a "base" model (512 pixels) or the 768-pixel SD-2.1 model.
|
||||
The "Use All" button was not restoring the Hi-Res Fix setting on the WebUI
|
||||
When using the model installer console app, models failed to import correctly when importing from directories with spaces in their names. A similar issue with the output directory was also fixed.
|
||||
Crashes that occurred during model merging.
|
||||
Restore previous naming of Stable Diffusion base and 768 models.
|
||||
Upgraded to latest versions of diffusers, transformers, safetensors and accelerate libraries upstream. We hope that this will fix the assertion NDArray > 2**32 issue that MacOS users have had when generating images larger than 768x768 pixels. Please report back.
|
||||
|
||||
As part of the upgrade to diffusers, the location of the diffusers-based models has changed from models/diffusers to models/hub. When you launch InvokeAI for the first time, it will prompt you to OK a one-time move. This should be quick and harmless, but if you have modified your models/diffusers directory in some way, for example using symlinks, you may wish to cancel the migration and make appropriate adjustments.
|
||||
New "Invokeai-batch" script
|
||||
|
||||
### Invoke AI Batch
|
||||
2.3.2 introduces a new command-line only script called invokeai-batch that can be used to generate hundreds of images from prompts and settings that vary systematically. This can be used to try the same prompt across multiple combinations of models, steps, CFG settings and so forth. It also allows you to template prompts and generate a combinatorial list like:
|
||||
|
||||
a shack in the mountains, photograph
|
||||
a shack in the mountains, watercolor
|
||||
a shack in the mountains, oil painting
|
||||
a chalet in the mountains, photograph
|
||||
a chalet in the mountains, watercolor
|
||||
a chalet in the mountains, oil painting
|
||||
a shack in the desert, photograph
|
||||
...
|
||||
|
||||
If you have a system with multiple GPUs, or a single GPU with lots of VRAM, you can parallelize generation across the combinatorial set, reducing wait times and using your system's resources efficiently (make sure you have good GPU cooling).
|
||||
|
||||
To try invokeai-batch out. Launch the "developer's console" using the invoke launcher script, or activate the invokeai virtual environment manually. From the console, give the command invokeai-batch --help in order to learn how the script works and create your first template file for dynamic prompt generation.
|
||||
|
||||
|
||||
### Known Bugs in 2.3.2
|
||||
|
||||
These are known bugs in the release.
|
||||
|
||||
The Ancestral DPMSolverMultistepScheduler (k_dpmpp_2a) sampler is not yet implemented for diffusers models and will disappear from the WebUI Sampler menu when a diffusers model is selected.
|
||||
Windows Defender will sometimes raise a Trojan alert for the codeformer.pth face restoration model. As far as we have been able to determine, this is a false positive and can be safely whitelisted.
|
||||
|
||||
|
||||
## v2.3.1 <small>(22 February 2023)</small>
|
||||
This is primarily a bugfix release, but it does provide several new features that will improve the user experience.
|
||||
|
||||
### Enhanced support for model management
|
||||
|
||||
InvokeAI now makes it convenient to add, remove and modify models. You can individually import models that are stored on your local system, scan an entire folder and its subfolders for models and import them automatically, and even directly import models from the internet by providing their download URLs. You also have the option of designating a local folder to scan for new models each time InvokeAI is restarted.
|
||||
|
||||
There are three ways of accessing the model management features:
|
||||
|
||||
From the WebUI, click on the cube to the right of the model selection menu. This will bring up a form that allows you to import models individually from your local disk or scan a directory for models to import.
|
||||
|
||||
Using the Model Installer App
|
||||
|
||||
Choose option (5) download and install models from the invoke launcher script to start a new console-based application for model management. You can use this to select from a curated set of starter models, or import checkpoint, safetensors, and diffusers models from a local disk or the internet. The example below shows importing two checkpoint URLs from popular SD sites and a HuggingFace diffusers model using its Repository ID. It also shows how to designate a folder to be scanned at startup time for new models to import.
|
||||
|
||||
Command-line users can start this app using the command invokeai-model-install.
|
||||
|
||||
Using the Command Line Client (CLI)
|
||||
|
||||
The !install_model and !convert_model commands have been enhanced to allow entering of URLs and local directories to scan and import. The first command installs .ckpt and .safetensors files as-is. The second one converts them into the faster diffusers format before installation.
|
||||
|
||||
Internally InvokeAI is able to probe the contents of a .ckpt or .safetensors file to distinguish among v1.x, v2.x and inpainting models. This means that you do not need to include "inpaint" in your model names to use an inpainting model. Note that Stable Diffusion v2.x models will be autoconverted into a diffusers model the first time you use it.
|
||||
|
||||
Please see INSTALLING MODELS for more information on model management.
|
||||
|
||||
### An Improved Installer Experience
|
||||
|
||||
The installer now launches a console-based UI for setting and changing commonly-used startup options:
|
||||
|
||||
After selecting the desired options, the installer installs several support models needed by InvokeAI's face reconstruction and upscaling features and then launches the interface for selecting and installing models shown earlier. At any time, you can edit the startup options by launching invoke.sh/invoke.bat and entering option (6) change InvokeAI startup options
|
||||
|
||||
Command-line users can launch the new configure app using invokeai-configure.
|
||||
|
||||
This release also comes with a renewed updater. To do an update without going through a whole reinstallation, launch invoke.sh or invoke.bat and choose option (9) update InvokeAI . This will bring you to a screen that prompts you to update to the latest released version, to the most current development version, or any released or unreleased version you choose by selecting the tag or branch of the desired version.
|
||||
|
||||
Command-line users can run this interface by typing invokeai-configure
|
||||
|
||||
### Image Symmetry Options
|
||||
|
||||
There are now features to generate horizontal and vertical symmetry during generation. The way these work is to wait until a selected step in the generation process and then to turn on a mirror image effect. In addition to generating some cool images, you can also use this to make side-by-side comparisons of how an image will look with more or fewer steps. Access this option from the WebUI by selecting Symmetry from the image generation settings, or within the CLI by using the options --h_symmetry_time_pct and --v_symmetry_time_pct (these can be abbreviated to --h_sym and --v_sym like all other options).
|
||||
|
||||
### A New Unified Canvas Look
|
||||
|
||||
This release introduces a beta version of the WebUI Unified Canvas. To try it out, open up the settings dialogue in the WebUI (gear icon) and select Use Canvas Beta Layout:
|
||||
|
||||
Refresh the screen and go to to Unified Canvas (left side of screen, third icon from the top). The new layout is designed to provide more space to work in and to keep the image controls close to the image itself:
|
||||
|
||||
Model conversion and merging within the WebUI
|
||||
|
||||
The WebUI now has an intuitive interface for model merging, as well as for permanent conversion of models from legacy .ckpt/.safetensors formats into diffusers format. These options are also available directly from the invoke.sh/invoke.bat scripts.
|
||||
An easier way to contribute translations to the WebUI
|
||||
|
||||
We have migrated our translation efforts to Weblate, a FOSS translation product. Maintaining the growing project's translations is now far simpler for the maintainers and community. Please review our brief translation guide for more information on how to contribute.
|
||||
Numerous internal bugfixes and performance issues
|
||||
|
||||
### Bug Fixes
|
||||
This releases quashes multiple bugs that were reported in 2.3.0. Major internal changes include upgrading to diffusers 0.13.0, and using the compel library for prompt parsing. See Detailed Change Log for a detailed list of bugs caught and squished.
|
||||
Summary of InvokeAI command line scripts (all accessible via the launcher menu)
|
||||
Command Description
|
||||
invokeai Command line interface
|
||||
invokeai --web Web interface
|
||||
invokeai-model-install Model installer with console forms-based front end
|
||||
invokeai-ti --gui Textual inversion, with a console forms-based front end
|
||||
invokeai-merge --gui Model merging, with a console forms-based front end
|
||||
invokeai-configure Startup configuration; can also be used to reinstall support models
|
||||
invokeai-update InvokeAI software updater
|
||||
|
||||
### Known Bugs in 2.3.1
|
||||
|
||||
These are known bugs in the release.
|
||||
MacOS users generating 768x768 pixel images or greater using diffusers models may experience a hard crash with assertion NDArray > 2**32 This appears to be an issu...
|
||||
|
||||
|
||||
|
||||
## v2.3.0 <small>(15 January 2023)</small>
|
||||
|
||||
**Transition to diffusers
|
||||
|
||||
Version 2.3 provides support for both the traditional `.ckpt` weight
|
||||
checkpoint files as well as the HuggingFace `diffusers` format. This
|
||||
introduces several changes you should know about.
|
||||
|
||||
1. The models.yaml format has been updated. There are now two
|
||||
different type of configuration stanza. The traditional ckpt
|
||||
one will look like this, with a `format` of `ckpt` and a
|
||||
`weights` field that points to the absolute or ROOTDIR-relative
|
||||
location of the ckpt file.
|
||||
|
||||
```
|
||||
inpainting-1.5:
|
||||
description: RunwayML SD 1.5 model optimized for inpainting (4.27 GB)
|
||||
repo_id: runwayml/stable-diffusion-inpainting
|
||||
format: ckpt
|
||||
width: 512
|
||||
height: 512
|
||||
weights: models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt
|
||||
config: configs/stable-diffusion/v1-inpainting-inference.yaml
|
||||
vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
```
|
||||
|
||||
A configuration stanza for a diffusers model hosted at HuggingFace will look like this,
|
||||
with a `format` of `diffusers` and a `repo_id` that points to the
|
||||
repository ID of the model on HuggingFace:
|
||||
|
||||
```
|
||||
stable-diffusion-2.1:
|
||||
description: Stable Diffusion version 2.1 diffusers model (5.21 GB)
|
||||
repo_id: stabilityai/stable-diffusion-2-1
|
||||
format: diffusers
|
||||
```
|
||||
|
||||
A configuration stanza for a diffuers model stored locally should
|
||||
look like this, with a `format` of `diffusers`, but a `path` field
|
||||
that points at the directory that contains `model_index.json`:
|
||||
|
||||
```
|
||||
waifu-diffusion:
|
||||
description: Latest waifu diffusion 1.4
|
||||
format: diffusers
|
||||
path: models/diffusers/hakurei-haifu-diffusion-1.4
|
||||
```
|
||||
|
||||
2. In order of precedence, InvokeAI will now use HF_HOME, then
|
||||
XDG_CACHE_HOME, then finally default to `ROOTDIR/models` to
|
||||
store HuggingFace diffusers models.
|
||||
|
||||
Consequently, the format of the models directory has changed to
|
||||
mimic the HuggingFace cache directory. When HF_HOME and XDG_HOME
|
||||
are not set, diffusers models are now automatically downloaded
|
||||
and retrieved from the directory `ROOTDIR/models/diffusers`,
|
||||
while other models are stored in the directory
|
||||
`ROOTDIR/models/hub`. This organization is the same as that used
|
||||
by HuggingFace for its cache management.
|
||||
|
||||
This allows you to share diffusers and ckpt model files easily with
|
||||
other machine learning applications that use the HuggingFace
|
||||
libraries. To do this, set the environment variable HF_HOME
|
||||
before starting up InvokeAI to tell it what directory to
|
||||
cache models in. To tell InvokeAI to use the standard HuggingFace
|
||||
cache directory, you would set HF_HOME like this (Linux/Mac):
|
||||
|
||||
`export HF_HOME=~/.cache/huggingface`
|
||||
|
||||
Both HuggingFace and InvokeAI will fall back to the XDG_CACHE_HOME
|
||||
environment variable if HF_HOME is not set; this path
|
||||
takes precedence over `ROOTDIR/models` to allow for the same sharing
|
||||
with other machine learning applications that use HuggingFace
|
||||
libraries.
|
||||
|
||||
3. If you upgrade to InvokeAI 2.3.* from an earlier version, there
|
||||
will be a one-time migration from the old models directory format
|
||||
to the new one. You will see a message about this the first time
|
||||
you start `invoke.py`.
|
||||
|
||||
4. Both the front end back ends of the model manager have been
|
||||
rewritten to accommodate diffusers. You can import models using
|
||||
their local file path, using their URLs, or their HuggingFace
|
||||
repo_ids. On the command line, all these syntaxes work:
|
||||
|
||||
```
|
||||
!import_model stabilityai/stable-diffusion-2-1-base
|
||||
!import_model /opt/sd-models/sd-1.4.ckpt
|
||||
!import_model https://huggingface.co/Fictiverse/Stable_Diffusion_PaperCut_Model/blob/main/PaperCut_v1.ckpt
|
||||
```
|
||||
|
||||
**KNOWN BUGS (15 January 2023)
|
||||
|
||||
1. On CUDA systems, the 768 pixel stable-diffusion-2.0 and
|
||||
stable-diffusion-2.1 models can only be run as `diffusers` models
|
||||
when the `xformer` library is installed and configured. Without
|
||||
`xformers`, InvokeAI returns black images.
|
||||
|
||||
2. Inpainting and outpainting have regressed in quality.
|
||||
|
||||
Both these issues are being actively worked on.
|
||||
|
||||
## v2.2.4 <small>(11 December 2022)</small>
|
||||
|
||||
**the `invokeai` directory**
|
||||
|
||||
Previously there were two directories to worry about, the directory that
|
||||
contained the InvokeAI source code and the launcher scripts, and the `invokeai`
|
||||
directory that contained the models files, embeddings, configuration and
|
||||
outputs. With the 2.2.4 release, this dual system is done away with, and
|
||||
everything, including the `invoke.bat` and `invoke.sh` launcher scripts, now
|
||||
live in a directory named `invokeai`. By default this directory is located in
|
||||
your home directory (e.g. `\Users\yourname` on Windows), but you can select
|
||||
where it goes at install time.
|
||||
|
||||
After installation, you can delete the install directory (the one that the zip
|
||||
file creates when it unpacks). Do **not** delete or move the `invokeai`
|
||||
directory!
|
||||
|
||||
**Initialization file `invokeai/invokeai.init`**
|
||||
|
||||
You can place frequently-used startup options in this file, such as the default
|
||||
number of steps or your preferred sampler. To keep everything in one place, this
|
||||
file has now been moved into the `invokeai` directory and is named
|
||||
`invokeai.init`.
|
||||
|
||||
**To update from Version 2.2.3**
|
||||
|
||||
The easiest route is to download and unpack one of the 2.2.4 installer files.
|
||||
When it asks you for the location of the `invokeai` runtime directory, respond
|
||||
with the path to the directory that contains your 2.2.3 `invokeai`. That is, if
|
||||
`invokeai` lives at `C:\Users\fred\invokeai`, then answer with `C:\Users\fred`
|
||||
and answer "Y" when asked if you want to reuse the directory.
|
||||
|
||||
The `update.sh` (`update.bat`) script that came with the 2.2.3 source installer
|
||||
does not know about the new directory layout and won't be fully functional.
|
||||
|
||||
**To update to 2.2.5 (and beyond) there's now an update path**
|
||||
|
||||
As they become available, you can update to more recent versions of InvokeAI
|
||||
using an `update.sh` (`update.bat`) script located in the `invokeai` directory.
|
||||
Running it without any arguments will install the most recent version of
|
||||
InvokeAI. Alternatively, you can get set releases by running the `update.sh`
|
||||
script with an argument in the command shell. This syntax accepts the path to
|
||||
the desired release's zip file, which you can find by clicking on the green
|
||||
"Code" button on this repository's home page.
|
||||
|
||||
**Other 2.2.4 Improvements**
|
||||
|
||||
- Fix InvokeAI GUI initialization by @addianto in #1687
|
||||
- fix link in documentation by @lstein in #1728
|
||||
- Fix broken link by @ShawnZhong in #1736
|
||||
- Remove reference to binary installer by @lstein in #1731
|
||||
- documentation fixes for 2.2.3 by @lstein in #1740
|
||||
- Modify installer links to point closer to the source installer by @ebr in
|
||||
#1745
|
||||
- add documentation warning about 1650/60 cards by @lstein in #1753
|
||||
- Fix Linux source URL in installation docs by @andybearman in #1756
|
||||
- Make install instructions discoverable in readme by @damian0815 in #1752
|
||||
- typo fix by @ofirkris in #1755
|
||||
- Non-interactive model download (support HUGGINGFACE_TOKEN) by @ebr in #1578
|
||||
- fix(srcinstall): shell installer - cp scripts instead of linking by @tildebyte
|
||||
in #1765
|
||||
- stability and usage improvements to binary & source installers by @lstein in
|
||||
#1760
|
||||
- fix off-by-one bug in cross-attention-control by @damian0815 in #1774
|
||||
- Eventually update APP_VERSION to 2.2.3 by @spezialspezial in #1768
|
||||
- invoke script cds to its location before running by @lstein in #1805
|
||||
- Make PaperCut and VoxelArt models load again by @lstein in #1730
|
||||
- Fix --embedding_directory / --embedding_path not working by @blessedcoolant in
|
||||
#1817
|
||||
- Clean up readme by @hipsterusername in #1820
|
||||
- Optimized Docker build with support for external working directory by @ebr in
|
||||
#1544
|
||||
- disable pushing the cloud container by @mauwii in #1831
|
||||
- Fix docker push github action and expand with additional metadata by @ebr in
|
||||
#1837
|
||||
- Fix Broken Link To Notebook by @VedantMadane in #1821
|
||||
- Account for flat models by @spezialspezial in #1766
|
||||
- Update invoke.bat.in isolate environment variables by @lynnewu in #1833
|
||||
- Arch Linux Specific PatchMatch Instructions & fixing conda install on linux by
|
||||
@SammCheese in #1848
|
||||
- Make force free GPU memory work in img2img by @addianto in #1844
|
||||
- New installer by @lstein
|
||||
|
||||
## v2.2.3 <small>(2 December 2022)</small>
|
||||
|
||||
!!! Note
|
||||
|
||||
This point release removes references to the binary installer from the
|
||||
installation guide. The binary installer is not stable at the current
|
||||
time. First time users are encouraged to use the "source" installer as
|
||||
described in [Installing InvokeAI with the Source Installer](installation/deprecated_documentation/INSTALL_SOURCE.md)
|
||||
|
||||
With InvokeAI 2.2, this project now provides enthusiasts and professionals a
|
||||
robust workflow solution for creating AI-generated and human facilitated
|
||||
compositions. Additional enhancements have been made as well, improving safety,
|
||||
ease of use, and installation.
|
||||
|
||||
Optimized for efficiency, InvokeAI needs only ~3.5GB of VRAM to generate a
|
||||
512x768 image (and less for smaller images), and is compatible with
|
||||
Windows/Linux/Mac (M1 & M2).
|
||||
|
||||
You can see the [release video](https://youtu.be/hIYBfDtKaus) here, which
|
||||
introduces the main WebUI enhancement for version 2.2 -
|
||||
[The Unified Canvas](features/UNIFIED_CANVAS.md). This new workflow is the
|
||||
biggest enhancement added to the WebUI to date, and unlocks a stunning amount of
|
||||
potential for users to create and iterate on their creations. The following
|
||||
sections describe what's new for InvokeAI.
|
||||
|
||||
## v2.2.2 <small>(30 November 2022)</small>
|
||||
|
||||
!!! note
|
||||
|
||||
The binary installer is not ready for prime time. First time users are recommended to install via the "source" installer accessible through the links at the bottom of this page.****
|
||||
|
||||
With InvokeAI 2.2, this project now provides enthusiasts and professionals a
|
||||
robust workflow solution for creating AI-generated and human facilitated
|
||||
compositions. Additional enhancements have been made as well, improving safety,
|
||||
ease of use, and installation.
|
||||
|
||||
Optimized for efficiency, InvokeAI needs only ~3.5GB of VRAM to generate a
|
||||
512x768 image (and less for smaller images), and is compatible with
|
||||
Windows/Linux/Mac (M1 & M2).
|
||||
|
||||
You can see the [release video](https://youtu.be/hIYBfDtKaus) here, which
|
||||
introduces the main WebUI enhancement for version 2.2 -
|
||||
[The Unified Canvas](https://invoke-ai.github.io/InvokeAI/features/UNIFIED_CANVAS/).
|
||||
This new workflow is the biggest enhancement added to the WebUI to date, and
|
||||
unlocks a stunning amount of potential for users to create and iterate on their
|
||||
creations. The following sections describe what's new for InvokeAI.
|
||||
|
||||
## v2.2.0 <small>(2 December 2022)</small>
|
||||
|
||||
With InvokeAI 2.2, this project now provides enthusiasts and professionals a
|
||||
robust workflow solution for creating AI-generated and human facilitated
|
||||
compositions. Additional enhancements have been made as well, improving safety,
|
||||
ease of use, and installation.
|
||||
|
||||
Optimized for efficiency, InvokeAI needs only ~3.5GB of VRAM to generate a
|
||||
512x768 image (and less for smaller images), and is compatible with
|
||||
Windows/Linux/Mac (M1 & M2).
|
||||
|
||||
You can see the [release video](https://youtu.be/hIYBfDtKaus) here, which
|
||||
introduces the main WebUI enhancement for version 2.2 -
|
||||
[The Unified Canvas](features/UNIFIED_CANVAS.md). This new workflow is the
|
||||
biggest enhancement added to the WebUI to date, and unlocks a stunning amount of
|
||||
potential for users to create and iterate on their creations. The following
|
||||
sections describe what's new for InvokeAI.
|
||||
|
||||
## v2.1.3 <small>(13 November 2022)</small>
|
||||
|
||||
- A choice of installer scripts that automate installation and configuration.
|
||||
See
|
||||
[Installation](installation/index.md).
|
||||
- A streamlined manual installation process that works for both Conda and
|
||||
PIP-only installs. See
|
||||
[Manual Installation](installation/020_INSTALL_MANUAL.md).
|
||||
- The ability to save frequently-used startup options (model to load, steps,
|
||||
sampler, etc) in a `.invokeai` file. See
|
||||
[Client](deprecated/CLI.md)
|
||||
- Support for AMD GPU cards (non-CUDA) on Linux machines.
|
||||
- Multiple bugs and edge cases squashed.
|
||||
|
||||
## v2.1.0 <small>(2 November 2022)</small>
|
||||
|
||||
- update mac instructions to use invokeai for env name by @willwillems in #1030
|
||||
- Update .gitignore by @blessedcoolant in #1040
|
||||
- reintroduce fix for m1 from #579 missing after merge by @skurovec in #1056
|
||||
- Update Stable_Diffusion_AI_Notebook.ipynb (Take 2) by @ChloeL19 in #1060
|
||||
- Print out the device type which is used by @manzke in #1073
|
||||
- Hires Addition by @hipsterusername in #1063
|
||||
- fix for "1 leaked semaphore objects to clean up at shutdown" on M1 by
|
||||
@skurovec in #1081
|
||||
- Forward dream.py to invoke.py using the same interpreter, add deprecation
|
||||
warning by @db3000 in #1077
|
||||
- fix noisy images at high step counts by @lstein in #1086
|
||||
- Generalize facetool strength argument by @db3000 in #1078
|
||||
- Enable fast switching among models at the invoke> command line by @lstein in
|
||||
#1066
|
||||
- Fix Typo, committed changing ldm environment to invokeai by @jdries3 in #1095
|
||||
- Update generate.py by @unreleased in #1109
|
||||
- Update 'ldm' env to 'invokeai' in troubleshooting steps by @19wolf in #1125
|
||||
- Fixed documentation typos and resolved merge conflicts by @rupeshs in #1123
|
||||
- Fix broken doc links, fix malaprop in the project subtitle by @majick in #1131
|
||||
- Only output facetool parameters if enhancing faces by @db3000 in #1119
|
||||
- Update gitignore to ignore codeformer weights at new location by
|
||||
@spezialspezial in #1136
|
||||
- fix links to point to invoke-ai.github.io #1117 by @mauwii in #1143
|
||||
- Rework-mkdocs by @mauwii in #1144
|
||||
- add option to CLI and pngwriter that allows user to set PNG compression level
|
||||
by @lstein in #1127
|
||||
- Fix img2img DDIM index out of bound by @wfng92 in #1137
|
||||
- Fix gh actions by @mauwii in #1128
|
||||
- update mac instructions to use invokeai for env name by @willwillems in #1030
|
||||
- Update .gitignore by @blessedcoolant in #1040
|
||||
- reintroduce fix for m1 from #579 missing after merge by @skurovec in #1056
|
||||
- Update Stable_Diffusion_AI_Notebook.ipynb (Take 2) by @ChloeL19 in #1060
|
||||
- Print out the device type which is used by @manzke in #1073
|
||||
- Hires Addition by @hipsterusername in #1063
|
||||
- fix for "1 leaked semaphore objects to clean up at shutdown" on M1 by
|
||||
@skurovec in #1081
|
||||
- Forward dream.py to invoke.py using the same interpreter, add deprecation
|
||||
warning by @db3000 in #1077
|
||||
- fix noisy images at high step counts by @lstein in #1086
|
||||
- Generalize facetool strength argument by @db3000 in #1078
|
||||
- Enable fast switching among models at the invoke> command line by @lstein in
|
||||
#1066
|
||||
- Fix Typo, committed changing ldm environment to invokeai by @jdries3 in #1095
|
||||
- Fixed documentation typos and resolved merge conflicts by @rupeshs in #1123
|
||||
- Only output facetool parameters if enhancing faces by @db3000 in #1119
|
||||
- add option to CLI and pngwriter that allows user to set PNG compression level
|
||||
by @lstein in #1127
|
||||
- Fix img2img DDIM index out of bound by @wfng92 in #1137
|
||||
- Add text prompt to inpaint mask support by @lstein in #1133
|
||||
- Respect http[s] protocol when making socket.io middleware by @damian0815 in
|
||||
#976
|
||||
- WebUI: Adds Codeformer support by @psychedelicious in #1151
|
||||
- Skips normalizing prompts for web UI metadata by @psychedelicious in #1165
|
||||
- Add Asymmetric Tiling by @carson-katri in #1132
|
||||
- Web UI: Increases max CFG Scale to 200 by @psychedelicious in #1172
|
||||
- Corrects color channels in face restoration; Fixes #1167 by @psychedelicious
|
||||
in #1175
|
||||
- Flips channels using array slicing instead of using OpenCV by @psychedelicious
|
||||
in #1178
|
||||
- Fix typo in docs: s/Formally/Formerly by @noodlebox in #1176
|
||||
- fix clipseg loading problems by @lstein in #1177
|
||||
- Correct color channels in upscale using array slicing by @wfng92 in #1181
|
||||
- Web UI: Filters existing images when adding new images; Fixes #1085 by
|
||||
@psychedelicious in #1171
|
||||
- fix a number of bugs in textual inversion by @lstein in #1190
|
||||
- Improve !fetch, add !replay command by @ArDiouscuros in #882
|
||||
- Fix generation of image with s>1000 by @holstvoogd in #951
|
||||
- Web UI: Gallery improvements by @psychedelicious in #1198
|
||||
- Update CLI.md by @krummrey in #1211
|
||||
- outcropping improvements by @lstein in #1207
|
||||
- add support for loading VAE autoencoders by @lstein in #1216
|
||||
- remove duplicate fix_func for MPS by @wfng92 in #1210
|
||||
- Metadata storage and retrieval fixes by @lstein in #1204
|
||||
- nix: add shell.nix file by @Cloudef in #1170
|
||||
- Web UI: Changes vite dist asset paths to relative by @psychedelicious in #1185
|
||||
- Web UI: Removes isDisabled from PromptInput by @psychedelicious in #1187
|
||||
- Allow user to generate images with initial noise as on M1 / mps system by
|
||||
@ArDiouscuros in #981
|
||||
- feat: adding filename format template by @plucked in #968
|
||||
- Web UI: Fixes broken bundle by @psychedelicious in #1242
|
||||
- Support runwayML custom inpainting model by @lstein in #1243
|
||||
- Update IMG2IMG.md by @talitore in #1262
|
||||
- New dockerfile - including a build- and a run- script as well as a GH-Action
|
||||
by @mauwii in #1233
|
||||
- cut over from karras to model noise schedule for higher steps by @lstein in
|
||||
#1222
|
||||
- Prompt tweaks by @lstein in #1268
|
||||
- Outpainting implementation by @Kyle0654 in #1251
|
||||
- fixing aspect ratio on hires by @tjennings in #1249
|
||||
- Fix-build-container-action by @mauwii in #1274
|
||||
- handle all unicode characters by @damian0815 in #1276
|
||||
- adds models.user.yml to .gitignore by @JakeHL in #1281
|
||||
- remove debug branch, set fail-fast to false by @mauwii in #1284
|
||||
- Protect-secrets-on-pr by @mauwii in #1285
|
||||
- Web UI: Adds initial inpainting implementation by @psychedelicious in #1225
|
||||
- fix environment-mac.yml - tested on x64 and arm64 by @mauwii in #1289
|
||||
- Use proper authentication to download model by @mauwii in #1287
|
||||
- Prevent indexing error for mode RGB by @spezialspezial in #1294
|
||||
- Integrate sd-v1-5 model into test matrix (easily expandable), remove
|
||||
unecesarry caches by @mauwii in #1293
|
||||
- add --no-interactive to configure_invokeai step by @mauwii in #1302
|
||||
- 1-click installer and updater. Uses micromamba to install git and conda into a
|
||||
contained environment (if necessary) before running the normal installation
|
||||
script by @cmdr2 in #1253
|
||||
- configure_invokeai.py script downloads the weight files by @lstein in #1290
|
||||
|
||||
## v2.0.1 <small>(13 October 2022)</small>
|
||||
|
||||
- fix noisy images at high step count when using k\* samplers
|
||||
- dream.py script now calls invoke.py module directly rather than via a new
|
||||
python process (which could break the environment)
|
||||
|
||||
## v2.0.0 <small>(9 October 2022)</small>
|
||||
|
||||
- `dream.py` script renamed `invoke.py`. A `dream.py` script wrapper remains for
|
||||
backward compatibility.
|
||||
- Completely new WebGUI - launch with `python3 scripts/invoke.py --web`
|
||||
- img2img runs on all k\* samplers
|
||||
- Support for
|
||||
[negative prompts](features/PROMPTS.md#negative-and-unconditioned-prompts)
|
||||
- Support for CodeFormer face reconstruction
|
||||
- Support for Textual Inversion on Macintoshes
|
||||
- Support in both WebGUI and CLI for
|
||||
[post-processing of previously-generated images](features/POSTPROCESS.md)
|
||||
using facial reconstruction, ESRGAN upscaling, outcropping (similar to DALL-E
|
||||
infinite canvas), and "embiggen" upscaling. See the `!fix` command.
|
||||
- New `--hires` option on `invoke>` line allows
|
||||
[larger images to be created without duplicating elements](deprecated/CLI.md#this-is-an-example-of-txt2img),
|
||||
at the cost of some performance.
|
||||
- New `--perlin` and `--threshold` options allow you to add and control
|
||||
variation during image generation (see
|
||||
[Thresholding and Perlin Noise Initialization](features/OTHER.md#thresholding-and-perlin-noise-initialization-options))
|
||||
- Extensive metadata now written into PNG files, allowing reliable regeneration
|
||||
of images and tweaking of previous settings.
|
||||
- Command-line completion in `invoke.py` now works on Windows, Linux and Mac
|
||||
platforms.
|
||||
- Improved [command-line completion behavior](deprecated/CLI.md) New commands
|
||||
added:
|
||||
- List command-line history with `!history`
|
||||
- Search command-line history with `!search`
|
||||
- Clear history with `!clear`
|
||||
- Deprecated `--full_precision` / `-F`. Simply omit it and `invoke.py` will auto
|
||||
configure. To switch away from auto use the new flag like
|
||||
`--precision=float32`.
|
||||
|
||||
## v1.14 <small>(11 September 2022)</small>
|
||||
|
||||
- Memory optimizations for small-RAM cards. 512x512 now possible on 4 GB GPUs.
|
||||
- Full support for Apple hardware with M1 or M2 chips.
|
||||
- Add "seamless mode" for circular tiling of image. Generates beautiful effects.
|
||||
([prixt](https://github.com/prixt)).
|
||||
- Inpainting support.
|
||||
- Improved web server GUI.
|
||||
- Lots of code and documentation cleanups.
|
||||
|
||||
## v1.13 <small>(3 September 2022)</small>
|
||||
|
||||
- Support image variations (see [VARIATIONS](features/VARIATIONS.md)
|
||||
([Kevin Gibbons](https://github.com/bakkot) and many contributors and
|
||||
reviewers)
|
||||
- Supports a Google Colab notebook for a standalone server running on Google
|
||||
hardware [Arturo Mendivil](https://github.com/artmen1516)
|
||||
- WebUI supports GFPGAN/ESRGAN facial reconstruction and upscaling
|
||||
[Kevin Gibbons](https://github.com/bakkot)
|
||||
- WebUI supports incremental display of in-progress images during generation
|
||||
[Kevin Gibbons](https://github.com/bakkot)
|
||||
- A new configuration file scheme that allows new models (including upcoming
|
||||
stable-diffusion-v1.5) to be added without altering the code.
|
||||
([David Wager](https://github.com/maddavid12))
|
||||
- Can specify --grid on invoke.py command line as the default.
|
||||
- Miscellaneous internal bug and stability fixes.
|
||||
- Works on M1 Apple hardware.
|
||||
- Multiple bug fixes.
|
||||
|
||||
---
|
||||
|
||||
## v1.12 <small>(28 August 2022)</small>
|
||||
|
||||
- Improved file handling, including ability to read prompts from standard input.
|
||||
(kudos to [Yunsaki](https://github.com/yunsaki)
|
||||
- The web server is now integrated with the invoke.py script. Invoke by adding
|
||||
--web to the invoke.py command arguments.
|
||||
- Face restoration and upscaling via GFPGAN and Real-ESGAN are now automatically
|
||||
enabled if the GFPGAN directory is located as a sibling to Stable Diffusion.
|
||||
VRAM requirements are modestly reduced. Thanks to both
|
||||
[Blessedcoolant](https://github.com/blessedcoolant) and
|
||||
[Oceanswave](https://github.com/oceanswave) for their work on this.
|
||||
- You can now swap samplers on the invoke> command line.
|
||||
[Blessedcoolant](https://github.com/blessedcoolant)
|
||||
|
||||
---
|
||||
|
||||
## v1.11 <small>(26 August 2022)</small>
|
||||
|
||||
- NEW FEATURE: Support upscaling and face enhancement using the GFPGAN module.
|
||||
(kudos to [Oceanswave](https://github.com/Oceanswave)
|
||||
- You now can specify a seed of -1 to use the previous image's seed, -2 to use
|
||||
the seed for the image generated before that, etc. Seed memory only extends
|
||||
back to the previous command, but will work on all images generated with the
|
||||
-n# switch.
|
||||
- Variant generation support temporarily disabled pending more general solution.
|
||||
- Created a feature branch named **yunsaki-morphing-invoke** which adds
|
||||
experimental support for iteratively modifying the prompt and its parameters.
|
||||
Please
|
||||
see[Pull Request #86](https://github.com/lstein/stable-diffusion/pull/86) for
|
||||
a synopsis of how this works. Note that when this feature is eventually added
|
||||
to the main branch, it will may be modified significantly.
|
||||
|
||||
---
|
||||
|
||||
## v1.10 <small>(25 August 2022)</small>
|
||||
|
||||
- A barebones but fully functional interactive web server for online generation
|
||||
of txt2img and img2img.
|
||||
|
||||
---
|
||||
|
||||
## v1.09 <small>(24 August 2022)</small>
|
||||
|
||||
- A new -v option allows you to generate multiple variants of an initial image
|
||||
in img2img mode. (kudos to [Oceanswave](https://github.com/Oceanswave).
|
||||
[ See this discussion in the PR for examples and details on use](https://github.com/lstein/stable-diffusion/pull/71#issuecomment-1226700810))
|
||||
- Added ability to personalize text to image generation (kudos to
|
||||
[Oceanswave](https://github.com/Oceanswave) and
|
||||
[nicolai256](https://github.com/nicolai256))
|
||||
- Enabled all of the samplers from k_diffusion
|
||||
|
||||
---
|
||||
|
||||
## v1.08 <small>(24 August 2022)</small>
|
||||
|
||||
- Escape single quotes on the invoke> command before trying to parse. This
|
||||
avoids parse errors.
|
||||
- Removed instruction to get Python3.8 as first step in Windows install.
|
||||
Anaconda3 does it for you.
|
||||
- Added bounds checks for numeric arguments that could cause crashes.
|
||||
- Cleaned up the copyright and license agreement files.
|
||||
|
||||
---
|
||||
|
||||
## v1.07 <small>(23 August 2022)</small>
|
||||
|
||||
- Image filenames will now never fill gaps in the sequence, but will be assigned
|
||||
the next higher name in the chosen directory. This ensures that the alphabetic
|
||||
and chronological sort orders are the same.
|
||||
|
||||
---
|
||||
|
||||
## v1.06 <small>(23 August 2022)</small>
|
||||
|
||||
- Added weighted prompt support contributed by
|
||||
[xraxra](https://github.com/xraxra)
|
||||
- Example of using weighted prompts to tweak a demonic figure contributed by
|
||||
[bmaltais](https://github.com/bmaltais)
|
||||
|
||||
---
|
||||
|
||||
## v1.05 <small>(22 August 2022 - after the drop)</small>
|
||||
|
||||
- Filenames now use the following formats: 000010.95183149.png -- Two files
|
||||
produced by the same command (e.g. -n2), 000010.26742632.png -- distinguished
|
||||
by a different seed.
|
||||
|
||||
000011.455191342.01.png -- Two files produced by the same command using
|
||||
000011.455191342.02.png -- a batch size>1 (e.g. -b2). They have the same seed.
|
||||
|
||||
000011.4160627868.grid#1-4.png -- a grid of four images (-g); the whole grid
|
||||
can be regenerated with the indicated key
|
||||
|
||||
- It should no longer be possible for one image to overwrite another
|
||||
- You can use the "cd" and "pwd" commands at the invoke> prompt to set and
|
||||
retrieve the path of the output directory.
|
||||
|
||||
---
|
||||
|
||||
## v1.04 <small>(22 August 2022 - after the drop)</small>
|
||||
|
||||
- Updated README to reflect installation of the released weights.
|
||||
- Suppressed very noisy and inconsequential warning when loading the frozen CLIP
|
||||
tokenizer.
|
||||
|
||||
---
|
||||
|
||||
## v1.03 <small>(22 August 2022)</small>
|
||||
|
||||
- The original txt2img and img2img scripts from the CompViz repository have been
|
||||
moved into a subfolder named "orig_scripts", to reduce confusion.
|
||||
|
||||
---
|
||||
|
||||
## v1.02 <small>(21 August 2022)</small>
|
||||
|
||||
- A copy of the prompt and all of its switches and options is now stored in the
|
||||
corresponding image in a tEXt metadata field named "Dream". You can read the
|
||||
prompt using scripts/images2prompt.py, or an image editor that allows you to
|
||||
explore the full metadata. **Please run "conda env update" to load the k_lms
|
||||
dependencies!!**
|
||||
|
||||
---
|
||||
|
||||
## v1.01 <small>(21 August 2022)</small>
|
||||
|
||||
- added k_lms sampling. **Please run "conda env update" to load the k_lms
|
||||
dependencies!!**
|
||||
- use half precision arithmetic by default, resulting in faster execution and
|
||||
lower memory requirements Pass argument --full_precision to invoke.py to get
|
||||
slower but more accurate image generation
|
||||
|
||||
---
|
||||
|
||||
## Links
|
||||
|
||||
- **[Read Me](index.md)**
|
||||
173
docs/RELEASE.md
@@ -1,173 +0,0 @@
|
||||
# Release Process
|
||||
|
||||
The app is published in twice, in different build formats.
|
||||
|
||||
- A [PyPI] distribution. This includes both a source distribution and built distribution (a wheel). Users install with `pip install invokeai`. The updater uses this build.
|
||||
- An installer on the [InvokeAI Releases Page]. This is a zip file with install scripts and a wheel. This is only used for new installs.
|
||||
|
||||
## General Prep
|
||||
|
||||
Make a developer call-out for PRs to merge. Merge and test things out.
|
||||
|
||||
While the release workflow does not include end-to-end tests, it does pause before publishing so you can download and test the final build.
|
||||
|
||||
## Release Workflow
|
||||
|
||||
The `release.yml` workflow runs a number of jobs to handle code checks, tests, build and publish on PyPI.
|
||||
|
||||
It is triggered on **tag push**, when the tag matches `v*`. It doesn't matter if you've prepped a release branch like `release/v3.5.0` or are releasing from `main` - it works the same.
|
||||
|
||||
> Because commits are reference-counted, it is safe to create a release branch, tag it, let the workflow run, then delete the branch. So long as the tag exists, that commit will exist.
|
||||
|
||||
### Triggering the Workflow
|
||||
|
||||
Run `make tag-release` to tag the current commit and kick off the workflow.
|
||||
|
||||
The release may also be dispatched [manually].
|
||||
|
||||
### Workflow Jobs and Process
|
||||
|
||||
The workflow consists of a number of concurrently-run jobs, and two final publish jobs.
|
||||
|
||||
The publish jobs require manual approval and are only run if the other jobs succeed.
|
||||
|
||||
#### `check-version` Job
|
||||
|
||||
This job checks that the git ref matches the app version. It matches the ref against the `__version__` variable in `invokeai/version/invokeai_version.py`.
|
||||
|
||||
When the workflow is triggered by tag push, the ref is the tag. If the workflow is run manually, the ref is the target selected from the **Use workflow from** dropdown.
|
||||
|
||||
This job uses [samuelcolvin/check-python-version].
|
||||
|
||||
> Any valid [version specifier] works, so long as the tag matches the version. The release workflow works exactly the same for `RC`, `post`, `dev`, etc.
|
||||
|
||||
#### Check and Test Jobs
|
||||
|
||||
- **`python-tests`**: runs `pytest` on matrix of platforms
|
||||
- **`python-checks`**: runs `ruff` (format and lint)
|
||||
- **`frontend-tests`**: runs `vitest`
|
||||
- **`frontend-checks`**: runs `prettier` (format), `eslint` (lint), `dpdm` (circular refs), `tsc` (static type check) and `knip` (unused imports)
|
||||
|
||||
> **TODO** We should add `mypy` or `pyright` to the **`check-python`** job.
|
||||
|
||||
> **TODO** We should add an end-to-end test job that generates an image.
|
||||
|
||||
#### `build-installer` Job
|
||||
|
||||
This sets up both python and frontend dependencies and builds the python package. Internally, this runs `installer/create_installer.sh` and uploads two artifacts:
|
||||
|
||||
- **`dist`**: the python distribution, to be published on PyPI
|
||||
- **`InvokeAI-installer-${VERSION}.zip`**: the installer to be included in the GitHub release
|
||||
|
||||
#### Sanity Check & Smoke Test
|
||||
|
||||
At this point, the release workflow pauses as the remaining publish jobs require approval. Time to test the installer.
|
||||
|
||||
Because the installer pulls from PyPI, and we haven't published to PyPI yet, you will need to install from the wheel:
|
||||
|
||||
- Download and unzip `dist.zip` and the installer from the **Summary** tab of the workflow
|
||||
- Run the installer script using the `--wheel` CLI arg, pointing at the wheel:
|
||||
|
||||
```sh
|
||||
./install.sh --wheel ../InvokeAI-4.0.0rc6-py3-none-any.whl
|
||||
```
|
||||
|
||||
- Install to a temporary directory so you get the new user experience
|
||||
- Download a model and generate
|
||||
|
||||
> The same wheel file is bundled in the installer and in the `dist` artifact, which is uploaded to PyPI. You should end up with the exactly the same installation as if the installer got the wheel from PyPI.
|
||||
|
||||
##### Something isn't right
|
||||
|
||||
If testing reveals any issues, no worries. Cancel the workflow, which will cancel the pending publish jobs (you didn't approve them prematurely, right?).
|
||||
|
||||
Now you can start from the top:
|
||||
|
||||
- Fix the issues and PR the fixes per usual
|
||||
- Get the PR approved and merged per usual
|
||||
- Switch to `main` and pull in the fixes
|
||||
- Run `make tag-release` to move the tag to `HEAD` (which has the fixes) and kick off the release workflow again
|
||||
- Re-do the sanity check
|
||||
|
||||
#### PyPI Publish Jobs
|
||||
|
||||
The publish jobs will run if any of the previous jobs fail.
|
||||
|
||||
They use [GitHub environments], which are configured as [trusted publishers] on PyPI.
|
||||
|
||||
Both jobs require a maintainer to approve them from the workflow's **Summary** tab.
|
||||
|
||||
- Click the **Review deployments** button
|
||||
- Select the environment (either `testpypi` or `pypi`)
|
||||
- Click **Approve and deploy**
|
||||
|
||||
> **If the version already exists on PyPI, the publish jobs will fail.** PyPI only allows a given version to be published once - you cannot change it. If version published on PyPI has a problem, you'll need to "fail forward" by bumping the app version and publishing a followup release.
|
||||
|
||||
##### Failing PyPI Publish
|
||||
|
||||
Check the [python infrastructure status page] for incidents.
|
||||
|
||||
If there are no incidents, contact @hipsterusername or @lstein, who have owner access to GH and PyPI, to see if access has expired or something like that.
|
||||
|
||||
#### `publish-testpypi` Job
|
||||
|
||||
Publishes the distribution on the [Test PyPI] index, using the `testpypi` GitHub environment.
|
||||
|
||||
This job is not required for the production PyPI publish, but included just in case you want to test the PyPI release.
|
||||
|
||||
If approved and successful, you could try out the test release like this:
|
||||
|
||||
```sh
|
||||
# Create a new virtual environment
|
||||
python -m venv ~/.test-invokeai-dist --prompt test-invokeai-dist
|
||||
# Install the distribution from Test PyPI
|
||||
pip install --index-url https://test.pypi.org/simple/ invokeai
|
||||
# Run and test the app
|
||||
invokeai-web
|
||||
# Cleanup
|
||||
deactivate
|
||||
rm -rf ~/.test-invokeai-dist
|
||||
```
|
||||
|
||||
#### `publish-pypi` Job
|
||||
|
||||
Publishes the distribution on the production PyPI index, using the `pypi` GitHub environment.
|
||||
|
||||
## Publish the GitHub Release with installer
|
||||
|
||||
Once the release is published to PyPI, it's time to publish the GitHub release.
|
||||
|
||||
1. [Draft a new release] on GitHub, choosing the tag that triggered the release.
|
||||
1. Write the release notes, describing important changes. The **Generate release notes** button automatically inserts the changelog and new contributors, and you can copy/paste the intro from previous releases.
|
||||
1. Use `scripts/get_external_contributions.py` to get a list of external contributions to shout out in the release notes.
|
||||
1. Upload the zip file created in **`build`** job into the Assets section of the release notes.
|
||||
1. Check **Set as a pre-release** if it's a pre-release.
|
||||
1. Check **Create a discussion for this release**.
|
||||
1. Publish the release.
|
||||
1. Announce the release in Discord.
|
||||
|
||||
> **TODO** Workflows can create a GitHub release from a template and upload release assets. One popular action to handle this is [ncipollo/release-action]. A future enhancement to the release process could set this up.
|
||||
|
||||
## Manual Build
|
||||
|
||||
The `build installer` workflow can be dispatched manually. This is useful to test the installer for a given branch or tag.
|
||||
|
||||
No checks are run, it just builds.
|
||||
|
||||
## Manual Release
|
||||
|
||||
The `release` workflow can be dispatched manually. You must dispatch the workflow from the right tag, else it will fail the version check.
|
||||
|
||||
This functionality is available as a fallback in case something goes wonky. Typically, releases should be triggered via tag push as described above.
|
||||
|
||||
[InvokeAI Releases Page]: https://github.com/invoke-ai/InvokeAI/releases
|
||||
[PyPI]: https://pypi.org/
|
||||
[Draft a new release]: https://github.com/invoke-ai/InvokeAI/releases/new
|
||||
[Test PyPI]: https://test.pypi.org/
|
||||
[version specifier]: https://packaging.python.org/en/latest/specifications/version-specifiers/
|
||||
[ncipollo/release-action]: https://github.com/ncipollo/release-action
|
||||
[GitHub environments]: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment
|
||||
[trusted publishers]: https://docs.pypi.org/trusted-publishers/
|
||||
[samuelcolvin/check-python-version]: https://github.com/samuelcolvin/check-python-version
|
||||
[manually]: #manual-release
|
||||
[python infrastructure status page]: https://status.python.org/
|
||||
|
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 310 KiB |
|
Before Width: | Height: | Size: 23 KiB |
|
Before Width: | Height: | Size: 2.7 KiB |
|
Before Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 221 KiB |
|
Before Width: | Height: | Size: 53 KiB |
|
Before Width: | Height: | Size: 786 B |
|
Before Width: | Height: | Size: 27 KiB |
|
Before Width: | Height: | Size: 3.3 KiB |
|
Before Width: | Height: | Size: 4.9 MiB After Width: | Height: | Size: 1.1 MiB |
|
Before Width: | Height: | Size: 1.1 MiB After Width: | Height: | Size: 169 KiB |
|
Before Width: | Height: | Size: 228 KiB After Width: | Height: | Size: 490 KiB |
|
Before Width: | Height: | Size: 131 KiB After Width: | Height: | Size: 335 KiB |
|
Before Width: | Height: | Size: 122 KiB After Width: | Height: | Size: 217 KiB |
|
Before Width: | Height: | Size: 95 KiB After Width: | Height: | Size: 244 KiB |
|
Before Width: | Height: | Size: 123 KiB After Width: | Height: | Size: 948 KiB |
|
Before Width: | Height: | Size: 107 KiB After Width: | Height: | Size: 292 KiB |
|
Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 420 KiB |
|
Before Width: | Height: | Size: 119 KiB After Width: | Height: | Size: 179 KiB |
BIN
docs/assets/nodes/groupsrandseed.png
Normal file
|
After Width: | Height: | Size: 216 KiB |
|
Before Width: | Height: | Size: 60 KiB |
|
Before Width: | Height: | Size: 129 KiB |
|
Before Width: | Height: | Size: 421 KiB After Width: | Height: | Size: 501 KiB |
|
Before Width: | Height: | Size: 585 KiB After Width: | Height: | Size: 473 KiB |
|
Before Width: | Height: | Size: 598 KiB After Width: | Height: | Size: 557 KiB |
|
Before Width: | Height: | Size: 438 KiB After Width: | Height: | Size: 340 KiB |
|
Before Width: | Height: | Size: 64 KiB |
|
Before Width: | Height: | Size: 42 KiB |
@@ -1,183 +0,0 @@
|
||||
---
|
||||
title: Configuration
|
||||
---
|
||||
|
||||
# :material-tune-variant: InvokeAI Configuration
|
||||
|
||||
## Intro
|
||||
|
||||
Runtime settings, including the location of files and
|
||||
directories, memory usage, and performance, are managed via the
|
||||
`invokeai.yaml` config file or environment variables. A subset
|
||||
of settings may be set via commandline arguments.
|
||||
|
||||
Settings sources are used in this order:
|
||||
|
||||
- CLI args
|
||||
- Environment variables
|
||||
- `invokeai.yaml` settings
|
||||
- Fallback: defaults
|
||||
|
||||
### InvokeAI Root Directory
|
||||
|
||||
On startup, InvokeAI searches for its "root" directory. This is the directory
|
||||
that contains models, images, the database, and so on. It also contains
|
||||
a configuration file called `invokeai.yaml`.
|
||||
|
||||
InvokeAI searches for the root directory in this order:
|
||||
|
||||
1. The `--root <path>` CLI arg.
|
||||
2. The environment variable INVOKEAI_ROOT.
|
||||
3. The directory containing the currently active virtual environment.
|
||||
4. Fallback: a directory in the current user's home directory named `invokeai`.
|
||||
|
||||
### InvokeAI Configuration File
|
||||
|
||||
Inside the root directory, we read settings from the `invokeai.yaml` file.
|
||||
|
||||
It has two sections - one for internal use and one for user settings:
|
||||
|
||||
```yaml
|
||||
# Internal metadata - do not edit:
|
||||
schema_version: 4
|
||||
|
||||
# Put user settings here - see https://invoke-ai.github.io/InvokeAI/features/CONFIGURATION/:
|
||||
host: 0.0.0.0 # serve the app on your local network
|
||||
models_dir: D:\invokeai\models # store models on an external drive
|
||||
precision: float16 # always use fp16 precision
|
||||
```
|
||||
|
||||
The settings in this file will override the defaults. You only need
|
||||
to change this file if the default for a particular setting doesn't
|
||||
work for you.
|
||||
|
||||
You'll find an example file next to `invokeai.yaml` that shows the default values.
|
||||
|
||||
Some settings, like [Model Marketplace API Keys], require the YAML
|
||||
to be formatted correctly. Here is a [basic guide to YAML files].
|
||||
|
||||
#### Custom Config File Location
|
||||
|
||||
You can use any config file with the `--config` CLI arg. Pass in the path to the `invokeai.yaml` file you want to use.
|
||||
|
||||
Note that environment variables will trump any settings in the config file.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
All settings may be set via environment variables by prefixing `INVOKEAI_`
|
||||
to the variable name. For example, `INVOKEAI_HOST` would set the `host`
|
||||
setting.
|
||||
|
||||
For non-primitive values, pass a JSON-encoded string:
|
||||
|
||||
```sh
|
||||
export INVOKEAI_REMOTE_API_TOKENS='[{"url_regex":"modelmarketplace", "token": "12345"}]'
|
||||
```
|
||||
|
||||
We suggest using `invokeai.yaml`, as it is more user-friendly.
|
||||
|
||||
### CLI Args
|
||||
|
||||
A subset of settings may be specified using CLI args:
|
||||
|
||||
- `--root`: specify the root directory
|
||||
- `--config`: override the default `invokeai.yaml` file location
|
||||
|
||||
### All Settings
|
||||
|
||||
Following the table are additional explanations for certain settings.
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
::: invokeai.app.services.config.config_default.InvokeAIAppConfig
|
||||
options:
|
||||
show_root_heading: false
|
||||
members: false
|
||||
show_docstring_description: false
|
||||
show_category_heading: false
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
#### Model Marketplace API Keys
|
||||
|
||||
Some model marketplaces require an API key to download models. You can provide a URL pattern and appropriate token in your `invokeai.yaml` file to provide that API key.
|
||||
|
||||
The pattern can be any valid regex (you may need to surround the pattern with quotes):
|
||||
|
||||
```yaml
|
||||
remote_api_tokens:
|
||||
# Any URL containing `models.com` will automatically use `your_models_com_token`
|
||||
- url_regex: models.com
|
||||
token: your_models_com_token
|
||||
# Any URL matching this contrived regex will use `some_other_token`
|
||||
- url_regex: '^[a-z]{3}whatever.*\.com$'
|
||||
token: some_other_token
|
||||
```
|
||||
|
||||
The provided token will be added as a `Bearer` token to the network requests to download the model files. As far as we know, this works for all model marketplaces that require authorization.
|
||||
|
||||
#### Model Hashing
|
||||
|
||||
Models are hashed during installation, providing a stable identifier for models across all platforms. Hashing is a one-time operation.
|
||||
|
||||
```yaml
|
||||
hashing_algorithm: blake3_single # default value
|
||||
```
|
||||
|
||||
You might want to change this setting, depending on your system:
|
||||
|
||||
- `blake3_single` (default): Single-threaded - best for spinning HDDs, still OK for SSDs
|
||||
- `blake3_multi`: Parallelized, memory-mapped implementation - best for SSDs, terrible for spinning disks
|
||||
- `random`: Skip hashing entirely - fastest but of course no hash
|
||||
|
||||
During the first startup after upgrading to v4, all of your models will be hashed. This can take a few minutes.
|
||||
|
||||
Most common algorithms are supported, like `md5`, `sha256`, and `sha512`. These are typically much, much slower than either of the BLAKE3 variants.
|
||||
|
||||
#### Path Settings
|
||||
|
||||
These options set the paths of various directories and files used by InvokeAI. Any user-defined paths should be absolute paths.
|
||||
|
||||
#### Logging
|
||||
|
||||
Several different log handler destinations are available, and multiple destinations are supported by providing a list:
|
||||
|
||||
```yaml
|
||||
log_handlers:
|
||||
- console
|
||||
- syslog=localhost
|
||||
- file=/var/log/invokeai.log
|
||||
```
|
||||
|
||||
- `console` is the default. It prints log messages to the command-line window from which InvokeAI was launched.
|
||||
|
||||
- `syslog` is only available on Linux and Macintosh systems. It uses
|
||||
the operating system's "syslog" facility to write log file entries
|
||||
locally or to a remote logging machine. `syslog` offers a variety
|
||||
of configuration options:
|
||||
|
||||
```yaml
|
||||
syslog=/dev/log` - log to the /dev/log device
|
||||
syslog=localhost` - log to the network logger running on the local machine
|
||||
syslog=localhost:512` - same as above, but using a non-standard port
|
||||
syslog=fredserver,facility=LOG_USER,socktype=SOCK_DRAM`
|
||||
- Log to LAN-connected server "fredserver" using the facility LOG_USER and datagram packets.
|
||||
```
|
||||
|
||||
- `http` can be used to log to a remote web server. The server must be
|
||||
properly configured to receive and act on log messages. The option
|
||||
accepts the URL to the web server, and a `method` argument
|
||||
indicating whether the message should be submitted using the GET or
|
||||
POST method.
|
||||
|
||||
```yaml
|
||||
http=http://my.server/path/to/logger,method=POST
|
||||
```
|
||||
|
||||
The `log_format` option provides several alternative formats:
|
||||
|
||||
- `color` - default format providing time, date and a message, using text colors to distinguish different log severities
|
||||
- `plain` - same as above, but monochrome text only
|
||||
- `syslog` - the log level and error message only, allowing the syslog system to attach the time and date
|
||||
- `legacy` - a format similar to the one used by the legacy 2.3 InvokeAI releases.
|
||||
|
||||
[basic guide to yaml files]: https://circleci.com/blog/what-is-yaml-a-beginner-s-guide/
|
||||
[Model Marketplace API Keys]: #model-marketplace-api-keys
|
||||
56
docs/contributing/CONTRIBUTING.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# How to Contribute
|
||||
|
||||
## Welcome to Invoke AI
|
||||
Invoke AI originated as a project built by the community, and that vision carries forward today as we aim to build the best pro-grade tools available. We work together to incorporate the latest in AI/ML research, making these tools available in over 20 languages to artists and creatives around the world as part of our fully permissive OSS project designed for individual users to self-host and use.
|
||||
|
||||
|
||||
## Contributing to Invoke AI
|
||||
Anyone who wishes to contribute to InvokeAI, whether features, bug fixes, code cleanup, testing, code reviews, documentation or translation is very much encouraged to do so.
|
||||
|
||||
To join, just raise your hand on the InvokeAI Discord server (#dev-chat) or the GitHub discussion board.
|
||||
|
||||
### Areas of contribution:
|
||||
|
||||
#### Development
|
||||
If you’d like to help with development, please see our [development guide](contribution_guides/development.md). If you’re unfamiliar with contributing to open source projects, there is a tutorial contained within the development guide.
|
||||
|
||||
#### Documentation
|
||||
If you’d like to help with documentation, please see our [documentation guide](contribution_guides/documenation.md).
|
||||
|
||||
#### Translation
|
||||
If you'd like to help with translation, please see our [translation guide](docs/contributing/.contribution_guides/translation.md).
|
||||
|
||||
#### Tutorials
|
||||
Please reach out to @imic or @hipsterusername on [Discord](https://discord.gg/ZmtBAhwWhy) to help create tutorials for InvokeAI.
|
||||
|
||||
We hope you enjoy using our software as much as we enjoy creating it, and we hope that some of those of you who are reading this will elect to become part of our contributor community.
|
||||
|
||||
|
||||
### Contributors
|
||||
|
||||
This project is a combined effort of dedicated people from across the world. [Check out the list of all these amazing people](https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/). We thank them for their time, hard work and effort.
|
||||
|
||||
### Code of Conduct
|
||||
|
||||
The InvokeAI community is a welcoming place, and we want your help in maintaining that. Please review our [Code of Conduct](https://github.com/invoke-ai/InvokeAI/blob/main/CODE_OF_CONDUCT.md) to learn more - it's essential to maintaining a respectful and inclusive environment.
|
||||
|
||||
By making a contribution to this project, you certify that:
|
||||
|
||||
1. The contribution was created in whole or in part by you and you have the right to submit it under the open-source license indicated in this project’s GitHub repository; or
|
||||
2. The contribution is based upon previous work that, to the best of your knowledge, is covered under an appropriate open-source license and you have the right under that license to submit that work with modifications, whether created in whole or in part by you, under the same open-source license (unless you are permitted to submit under a different license); or
|
||||
3. The contribution was provided directly to you by some other person who certified (1) or (2) and you have not modified it; or
|
||||
4. You understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information you submit with it, including your sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open-source license(s) involved.
|
||||
|
||||
This disclaimer is not a license and does not grant any rights or permissions. You must obtain necessary permissions and licenses, including from third parties, before contributing to this project.
|
||||
|
||||
This disclaimer is provided "as is" without warranty of any kind, whether expressed or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, or non-infringement. In no event shall the authors or copyright holders be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the contribution or the use or other dealings in the contribution.
|
||||
|
||||
### Support
|
||||
|
||||
For support, please use this repository's [GitHub Issues](https://github.com/invoke-ai/InvokeAI/issues), or join the [Discord](https://discord.gg/ZmtBAhwWhy).
|
||||
|
||||
Original portions of the software are Copyright (c) 2023 by respective contributors.
|
||||
|
||||
---
|
||||
|
||||
Remember, your contributions help make this project great. We're excited to see what you'll bring to our community!
|
||||
@@ -1,334 +0,0 @@
|
||||
# The InvokeAI Download Queue
|
||||
|
||||
The DownloadQueueService provides a multithreaded parallel download
|
||||
queue for arbitrary URLs, with queue prioritization, event handling,
|
||||
and restart capabilities.
|
||||
|
||||
## Simple Example
|
||||
|
||||
```
|
||||
from invokeai.app.services.download import DownloadQueueService, TqdmProgress
|
||||
|
||||
download_queue = DownloadQueueService()
|
||||
for url in ['https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/assets/a-painting-of-a-fire.png?raw=true',
|
||||
'https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/assets/birdhouse.png?raw=true',
|
||||
'https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/assets/missing.png',
|
||||
'https://civitai.com/api/download/models/152309?type=Model&format=SafeTensor',
|
||||
]:
|
||||
|
||||
# urls start downloading as soon as download() is called
|
||||
download_queue.download(source=url,
|
||||
dest='/tmp/downloads',
|
||||
on_progress=TqdmProgress().update
|
||||
)
|
||||
|
||||
download_queue.join() # wait for all downloads to finish
|
||||
for job in download_queue.list_jobs():
|
||||
print(job.model_dump_json(exclude_none=True, indent=4),"\n")
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
```
|
||||
{
|
||||
"source": "https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/assets/a-painting-of-a-fire.png?raw=true",
|
||||
"dest": "/tmp/downloads",
|
||||
"id": 0,
|
||||
"priority": 10,
|
||||
"status": "completed",
|
||||
"download_path": "/tmp/downloads/a-painting-of-a-fire.png",
|
||||
"job_started": "2023-12-04T05:34:41.742174",
|
||||
"job_ended": "2023-12-04T05:34:42.592035",
|
||||
"bytes": 666734,
|
||||
"total_bytes": 666734
|
||||
}
|
||||
|
||||
{
|
||||
"source": "https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/assets/birdhouse.png?raw=true",
|
||||
"dest": "/tmp/downloads",
|
||||
"id": 1,
|
||||
"priority": 10,
|
||||
"status": "completed",
|
||||
"download_path": "/tmp/downloads/birdhouse.png",
|
||||
"job_started": "2023-12-04T05:34:41.741975",
|
||||
"job_ended": "2023-12-04T05:34:42.652841",
|
||||
"bytes": 774949,
|
||||
"total_bytes": 774949
|
||||
}
|
||||
|
||||
{
|
||||
"source": "https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/assets/missing.png",
|
||||
"dest": "/tmp/downloads",
|
||||
"id": 2,
|
||||
"priority": 10,
|
||||
"status": "error",
|
||||
"job_started": "2023-12-04T05:34:41.742079",
|
||||
"job_ended": "2023-12-04T05:34:42.147625",
|
||||
"bytes": 0,
|
||||
"total_bytes": 0,
|
||||
"error_type": "HTTPError(Not Found)",
|
||||
"error": "Traceback (most recent call last):\n File \"/home/lstein/Projects/InvokeAI/invokeai/app/services/download/download_default.py\", line 182, in _download_next_item\n self._do_download(job)\n File \"/home/lstein/Projects/InvokeAI/invokeai/app/services/download/download_default.py\", line 206, in _do_download\n raise HTTPError(resp.reason)\nrequests.exceptions.HTTPError: Not Found\n"
|
||||
}
|
||||
|
||||
{
|
||||
"source": "https://civitai.com/api/download/models/152309?type=Model&format=SafeTensor",
|
||||
"dest": "/tmp/downloads",
|
||||
"id": 3,
|
||||
"priority": 10,
|
||||
"status": "completed",
|
||||
"download_path": "/tmp/downloads/xl_more_art-full_v1.safetensors",
|
||||
"job_started": "2023-12-04T05:34:42.147645",
|
||||
"job_ended": "2023-12-04T05:34:43.735990",
|
||||
"bytes": 719020768,
|
||||
"total_bytes": 719020768
|
||||
}
|
||||
```
|
||||
|
||||
## The API
|
||||
|
||||
The default download queue is `DownloadQueueService`, an
|
||||
implementation of ABC `DownloadQueueServiceBase`. It juggles multiple
|
||||
background download requests and provides facilities for interrogating
|
||||
and cancelling the requests. Access to a current or past download task
|
||||
is mediated via `DownloadJob` objects which report the current status
|
||||
of a job request
|
||||
|
||||
### The Queue Object
|
||||
|
||||
A default download queue is located in
|
||||
`ApiDependencies.invoker.services.download_queue`. However, you can
|
||||
create additional instances if you need to isolate your queue from the
|
||||
main one.
|
||||
|
||||
```
|
||||
queue = DownloadQueueService(event_bus=events)
|
||||
```
|
||||
|
||||
`DownloadQueueService()` takes three optional arguments:
|
||||
|
||||
| **Argument** | **Type** | **Default** | **Description** |
|
||||
|----------------|-----------------|---------------|-----------------|
|
||||
| `max_parallel_dl` | int | 5 | Maximum number of simultaneous downloads allowed |
|
||||
| `event_bus` | EventServiceBase | None | System-wide FastAPI event bus for reporting download events |
|
||||
| `requests_session` | requests.sessions.Session | None | An alternative requests Session object to use for the download |
|
||||
|
||||
`max_parallel_dl` specifies how many download jobs are allowed to run
|
||||
simultaneously. Each will run in a different thread of execution.
|
||||
|
||||
`event_bus` is an EventServiceBase, typically the one created at
|
||||
InvokeAI startup. If present, download events are periodically emitted
|
||||
on this bus to allow clients to follow download progress.
|
||||
|
||||
`requests_session` is a url library requests Session object. It is
|
||||
used for testing.
|
||||
|
||||
### The Job object
|
||||
|
||||
The queue operates on a series of download job objects. These objects
|
||||
specify the source and destination of the download, and keep track of
|
||||
the progress of the download.
|
||||
|
||||
Two job types are defined. `DownloadJob` and
|
||||
`MultiFileDownloadJob`. The former is a pydantic object with the
|
||||
following fields:
|
||||
|
||||
| **Field** | **Type** | **Default** | **Description** |
|
||||
|----------------|-----------------|---------------|-----------------|
|
||||
| _Fields passed in at job creation time_ |
|
||||
| `source` | AnyHttpUrl | | Where to download from |
|
||||
| `dest` | Path | | Where to download to |
|
||||
| `access_token` | str | | [optional] string containing authentication token for access |
|
||||
| `on_start` | Callable | | [optional] callback when the download starts |
|
||||
| `on_progress` | Callable | | [optional] callback called at intervals during download progress |
|
||||
| `on_complete` | Callable | | [optional] callback called after successful download completion |
|
||||
| `on_error` | Callable | | [optional] callback called after an error occurs |
|
||||
| `id` | int | auto assigned | Job ID, an integer >= 0 |
|
||||
| `priority` | int | 10 | Job priority. Lower priorities run before higher priorities |
|
||||
| |
|
||||
| _Fields updated over the course of the download task_
|
||||
| `status` | DownloadJobStatus| | Status code |
|
||||
| `download_path` | Path | | Path to the location of the downloaded file |
|
||||
| `job_started` | float | | Timestamp for when the job started running |
|
||||
| `job_ended` | float | | Timestamp for when the job completed or errored out |
|
||||
| `job_sequence` | int | | A counter that is incremented each time a model is dequeued |
|
||||
| `bytes` | int | 0 | Bytes downloaded so far |
|
||||
| `total_bytes` | int | 0 | Total size of the file at the remote site |
|
||||
| `error_type` | str | | String version of the exception that caused an error during download |
|
||||
| `error` | str | | String version of the traceback associated with an error |
|
||||
| `cancelled` | bool | False | Set to true if the job was cancelled by the caller|
|
||||
|
||||
When you create a job, you can assign it a `priority`. If multiple
|
||||
jobs are queued, the job with the lowest priority runs first.
|
||||
|
||||
Every job has a `source` and a `dest`. `source` is a pydantic.networks AnyHttpUrl object.
|
||||
The `dest` is a path on the local filesystem that specifies the
|
||||
destination for the downloaded object. Its semantics are
|
||||
described below.
|
||||
|
||||
When the job is submitted, it is assigned a numeric `id`. The id can
|
||||
then be used to fetch the job object from the queue.
|
||||
|
||||
The `status` field is updated by the queue to indicate where the job
|
||||
is in its lifecycle. Values are defined in the string enum
|
||||
`DownloadJobStatus`, a symbol available from
|
||||
`invokeai.app.services.download_manager`. Possible values are:
|
||||
|
||||
| **Value** | **String Value** | ** Description ** |
|
||||
|--------------|---------------------|-------------------|
|
||||
| `WAITING` | waiting | Job is on the queue but not yet running|
|
||||
| `RUNNING` | running | The download is started |
|
||||
| `COMPLETED` | completed | Job has finished its work without an error |
|
||||
| `ERROR` | error | Job encountered an error and will not run again|
|
||||
|
||||
`job_started` and `job_ended` indicate when the job
|
||||
was started (using a python timestamp) and when it completed.
|
||||
|
||||
In case of an error, the job's status will be set to `DownloadJobStatus.ERROR`, the text of the
|
||||
Exception that caused the error will be placed in the `error_type`
|
||||
field and the traceback that led to the error will be in `error`.
|
||||
|
||||
A cancelled job will have status `DownloadJobStatus.ERROR` and an
|
||||
`error_type` field of "DownloadJobCancelledException". In addition,
|
||||
the job's `cancelled` property will be set to True.
|
||||
|
||||
The `MultiFileDownloadJob` is used for diffusers model downloads,
|
||||
which contain multiple files and directories under a common root:
|
||||
|
||||
| **Field** | **Type** | **Default** | **Description** |
|
||||
|----------------|-----------------|---------------|-----------------|
|
||||
| _Fields passed in at job creation time_ |
|
||||
| `download_parts` | Set[DownloadJob]| | Component download jobs |
|
||||
| `dest` | Path | | Where to download to |
|
||||
| `on_start` | Callable | | [optional] callback when the download starts |
|
||||
| `on_progress` | Callable | | [optional] callback called at intervals during download progress |
|
||||
| `on_complete` | Callable | | [optional] callback called after successful download completion |
|
||||
| `on_error` | Callable | | [optional] callback called after an error occurs |
|
||||
| `id` | int | auto assigned | Job ID, an integer >= 0 |
|
||||
| _Fields updated over the course of the download task_
|
||||
| `status` | DownloadJobStatus| | Status code |
|
||||
| `download_path` | Path | | Path to the root of the downloaded files |
|
||||
| `bytes` | int | 0 | Bytes downloaded so far |
|
||||
| `total_bytes` | int | 0 | Total size of the file at the remote site |
|
||||
| `error_type` | str | | String version of the exception that caused an error during download |
|
||||
| `error` | str | | String version of the traceback associated with an error |
|
||||
| `cancelled` | bool | False | Set to true if the job was cancelled by the caller|
|
||||
|
||||
Note that the MultiFileDownloadJob does not support the `priority`,
|
||||
`job_started`, `job_ended` or `content_type` attributes. You can get
|
||||
these from the individual download jobs in `download_parts`.
|
||||
|
||||
|
||||
### Callbacks
|
||||
|
||||
Download jobs can be associated with a series of callbacks, each with
|
||||
the signature `Callable[["DownloadJob"], None]`. The callbacks are assigned
|
||||
using optional arguments `on_start`, `on_progress`, `on_complete` and
|
||||
`on_error`. When the corresponding event occurs, the callback wil be
|
||||
invoked and passed the job. The callback will be run in a `try:`
|
||||
context in the same thread as the download job. Any exceptions that
|
||||
occur during execution of the callback will be caught and converted
|
||||
into a log error message, thereby allowing the download to continue.
|
||||
|
||||
#### `TqdmProgress`
|
||||
|
||||
The `invokeai.app.services.download.download_default` module defines a
|
||||
class named `TqdmProgress` which can be used as an `on_progress`
|
||||
handler to display a completion bar in the console. Use as follows:
|
||||
|
||||
```
|
||||
from invokeai.app.services.download import TqdmProgress
|
||||
|
||||
download_queue.download(source='http://some.server.somewhere/some_file',
|
||||
dest='/tmp/downloads',
|
||||
on_progress=TqdmProgress().update
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
### Events
|
||||
|
||||
If the queue was initialized with the InvokeAI event bus (the case
|
||||
when using `ApiDependencies.invoker.services.download_queue`), then
|
||||
download events will also be issued on the bus. The events are:
|
||||
|
||||
* `download_started` -- This is issued when a job is taken off the
|
||||
queue and a request is made to the remote server for the URL headers, but before any data
|
||||
has been downloaded. The event payload will contain the keys `source`
|
||||
and `download_path`. The latter contains the path that the URL will be
|
||||
downloaded to.
|
||||
|
||||
* `download_progress -- This is issued periodically as the download
|
||||
runs. The payload contains the keys `source`, `download_path`,
|
||||
`current_bytes` and `total_bytes`. The latter two fields can be
|
||||
used to display the percent complete.
|
||||
|
||||
* `download_complete` -- This is issued when the download completes
|
||||
successfully. The payload contains the keys `source`, `download_path`
|
||||
and `total_bytes`.
|
||||
|
||||
* `download_error` -- This is issued when the download stops because
|
||||
of an error condition. The payload contains the fields `error_type`
|
||||
and `error`. The former is the text representation of the exception,
|
||||
and the latter is a traceback showing where the error occurred.
|
||||
|
||||
### Job control
|
||||
|
||||
To create a job call the queue's `download()` method. You can list all
|
||||
jobs using `list_jobs()`, fetch a single job by its with
|
||||
`id_to_job()`, cancel a running job with `cancel_job()`, cancel all
|
||||
running jobs with `cancel_all_jobs()`, and wait for all jobs to finish
|
||||
with `join()`.
|
||||
|
||||
#### job = queue.download(source, dest, priority, access_token, on_start, on_progress, on_complete, on_cancelled, on_error)
|
||||
|
||||
Create a new download job and put it on the queue, returning the
|
||||
DownloadJob object.
|
||||
|
||||
#### multifile_job = queue.multifile_download(parts, dest, access_token, on_start, on_progress, on_complete, on_cancelled, on_error)
|
||||
|
||||
This is similar to download(), but instead of taking a single source,
|
||||
it accepts a `parts` argument consisting of a list of
|
||||
`RemoteModelFile` objects. Each part corresponds to a URL/Path pair,
|
||||
where the URL is the location of the remote file, and the Path is the
|
||||
destination.
|
||||
|
||||
`RemoteModelFile` can be imported from `invokeai.backend.model_manager.metadata`, and
|
||||
consists of a url/path pair. Note that the path *must* be relative.
|
||||
|
||||
The method returns a `MultiFileDownloadJob`.
|
||||
|
||||
|
||||
```
|
||||
from invokeai.backend.model_manager.metadata import RemoteModelFile
|
||||
remote_file_1 = RemoteModelFile(url='http://www.foo.bar/my/pytorch_model.safetensors'',
|
||||
path='my_model/textencoder/pytorch_model.safetensors'
|
||||
)
|
||||
remote_file_2 = RemoteModelFile(url='http://www.bar.baz/vae.ckpt',
|
||||
path='my_model/vae/diffusers_model.safetensors'
|
||||
)
|
||||
job = queue.multifile_download(parts=[remote_file_1, remote_file_2],
|
||||
dest='/tmp/downloads',
|
||||
on_progress=TqdmProgress().update)
|
||||
queue.wait_for_job(job)
|
||||
print(f"The files were downloaded to {job.download_path}")
|
||||
```
|
||||
|
||||
#### jobs = queue.list_jobs()
|
||||
|
||||
Return a list of all active and inactive `DownloadJob`s.
|
||||
|
||||
#### job = queue.id_to_job(id)
|
||||
|
||||
Return the job corresponding to given ID.
|
||||
|
||||
Return a list of all active and inactive `DownloadJob`s.
|
||||
|
||||
#### queue.prune_jobs()
|
||||
|
||||
Remove inactive (complete or errored) jobs from the listing returned
|
||||
by `list_jobs()`.
|
||||
|
||||
#### queue.join()
|
||||
|
||||
Block until all pending jobs have run to completion or errored out.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Nodes
|
||||
# Invocations
|
||||
|
||||
Features in InvokeAI are added in the form of modular nodes systems called
|
||||
Features in InvokeAI are added in the form of modular node-like systems called
|
||||
**Invocations**.
|
||||
|
||||
An Invocation is simply a single operation that takes in some inputs and gives
|
||||
@@ -9,38 +9,13 @@ complex functionality.
|
||||
|
||||
## Invocations Directory
|
||||
|
||||
InvokeAI Nodes can be found in the `invokeai/app/invocations` directory. These
|
||||
can be used as examples to create your own nodes.
|
||||
InvokeAI Invocations can be found in the `invokeai/app/invocations` directory.
|
||||
|
||||
New nodes should be added to a subfolder in `nodes` direction found at the root
|
||||
level of the InvokeAI installation location. Nodes added to this folder will be
|
||||
able to be used upon application startup.
|
||||
You can add your new functionality to one of the existing Invocations in this
|
||||
directory or create a new file in this directory as per your needs.
|
||||
|
||||
Example `nodes` subfolder structure:
|
||||
|
||||
```py
|
||||
├── __init__.py # Invoke-managed custom node loader
|
||||
│
|
||||
├── cool_node
|
||||
│ ├── __init__.py # see example below
|
||||
│ └── cool_node.py
|
||||
│
|
||||
└── my_node_pack
|
||||
├── __init__.py # see example below
|
||||
├── tasty_node.py
|
||||
├── bodacious_node.py
|
||||
├── utils.py
|
||||
└── extra_nodes
|
||||
└── fancy_node.py
|
||||
```
|
||||
|
||||
Each node folder must have an `__init__.py` file that imports its nodes. Only
|
||||
nodes imported in the `__init__.py` file are loaded. See the README in the nodes
|
||||
folder for more examples:
|
||||
|
||||
```py
|
||||
from .cool_node import CoolInvocation
|
||||
```
|
||||
**Note:** _All Invocations must be inside this directory for InvokeAI to
|
||||
recognize them as valid Invocations._
|
||||
|
||||
## Creating A New Invocation
|
||||
|
||||
@@ -54,13 +29,12 @@ The first set of things we need to do when creating a new Invocation are -
|
||||
|
||||
- Create a new class that derives from a predefined parent class called
|
||||
`BaseInvocation`.
|
||||
- The name of every Invocation must end with the word `Invocation` in order for
|
||||
it to be recognized as an Invocation.
|
||||
- Every Invocation must have a `docstring` that describes what this Invocation
|
||||
does.
|
||||
- While not strictly required, we suggest every invocation class name ends in
|
||||
"Invocation", eg "CropImageInvocation".
|
||||
- Every Invocation must use the `@invocation` decorator to provide its unique
|
||||
invocation type. You may also provide its title, tags and category using the
|
||||
decorator.
|
||||
- Every Invocation must have a unique `type` field defined which becomes its
|
||||
indentifier.
|
||||
- Invocations are strictly typed. We make use of the native
|
||||
[typing](https://docs.python.org/3/library/typing.html) library and the
|
||||
installed [pydantic](https://pydantic-docs.helpmanual.io/) library for
|
||||
@@ -69,11 +43,12 @@ The first set of things we need to do when creating a new Invocation are -
|
||||
So let us do that.
|
||||
|
||||
```python
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from typing import Literal
|
||||
from .baseinvocation import BaseInvocation
|
||||
|
||||
@invocation('resize')
|
||||
class ResizeInvocation(BaseInvocation):
|
||||
'''Resizes an image'''
|
||||
type: Literal['resize'] = 'resize'
|
||||
```
|
||||
|
||||
That's great.
|
||||
@@ -87,10 +62,8 @@ our Invocation takes.
|
||||
|
||||
### **Inputs**
|
||||
|
||||
Every Invocation input must be defined using the `InputField` function. This is
|
||||
a wrapper around the pydantic `Field` function, which handles a few extra things
|
||||
and provides type hints. Like everything else, this should be strictly typed and
|
||||
defined.
|
||||
Every Invocation input is a pydantic `Field` and like everything else should be
|
||||
strictly typed and defined.
|
||||
|
||||
So let us create these inputs for our Invocation. First up, the `image` input we
|
||||
need. Generally, we can use standard variable types in Python but InvokeAI
|
||||
@@ -103,50 +76,55 @@ create your own custom field types later in this guide. For now, let's go ahead
|
||||
and use it.
|
||||
|
||||
```python
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, InputField, invocation
|
||||
from invokeai.app.invocations.primitives import ImageField
|
||||
from typing import Literal, Union
|
||||
from pydantic import Field
|
||||
|
||||
from .baseinvocation import BaseInvocation
|
||||
from ..models.image import ImageField
|
||||
|
||||
@invocation('resize')
|
||||
class ResizeInvocation(BaseInvocation):
|
||||
'''Resizes an image'''
|
||||
type: Literal['resize'] = 'resize'
|
||||
|
||||
# Inputs
|
||||
image: ImageField = InputField(description="The input image")
|
||||
image: Union[ImageField, None] = Field(description="The input image", default=None)
|
||||
```
|
||||
|
||||
Let us break down our input code.
|
||||
|
||||
```python
|
||||
image: ImageField = InputField(description="The input image")
|
||||
image: Union[ImageField, None] = Field(description="The input image", default=None)
|
||||
```
|
||||
|
||||
| Part | Value | Description |
|
||||
| --------- | ------------------------------------------- | ------------------------------------------------------------------------------- |
|
||||
| Name | `image` | The variable that will hold our image |
|
||||
| Type Hint | `ImageField` | The types for our field. Indicates that the image must be an `ImageField` type. |
|
||||
| Field | `InputField(description="The input image")` | The image variable is an `InputField` which needs a description. |
|
||||
| Part | Value | Description |
|
||||
| --------- | ---------------------------------------------------- | -------------------------------------------------------------------------------------------------- |
|
||||
| Name | `image` | The variable that will hold our image |
|
||||
| Type Hint | `Union[ImageField, None]` | The types for our field. Indicates that the image can either be an `ImageField` type or `None` |
|
||||
| Field | `Field(description="The input image", default=None)` | The image variable is a field which needs a description and a default value that we set to `None`. |
|
||||
|
||||
Great. Now let us create our other inputs for `width` and `height`
|
||||
|
||||
```python
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, InputField, invocation
|
||||
from invokeai.app.invocations.primitives import ImageField
|
||||
from typing import Literal, Union
|
||||
from pydantic import Field
|
||||
|
||||
from .baseinvocation import BaseInvocation
|
||||
from ..models.image import ImageField
|
||||
|
||||
@invocation('resize')
|
||||
class ResizeInvocation(BaseInvocation):
|
||||
'''Resizes an image'''
|
||||
type: Literal['resize'] = 'resize'
|
||||
|
||||
image: ImageField = InputField(description="The input image")
|
||||
width: int = InputField(default=512, ge=64, le=2048, description="Width of the new image")
|
||||
height: int = InputField(default=512, ge=64, le=2048, description="Height of the new image")
|
||||
# Inputs
|
||||
image: Union[ImageField, None] = Field(description="The input image", default=None)
|
||||
width: int = Field(default=512, ge=64, le=2048, description="Width of the new image")
|
||||
height: int = Field(default=512, ge=64, le=2048, description="Height of the new image")
|
||||
```
|
||||
|
||||
As you might have noticed, we added two new arguments to the `InputField`
|
||||
definition for `width` and `height`, called `gt` and `le`. They stand for
|
||||
_greater than or equal to_ and _less than or equal to_.
|
||||
|
||||
These impose contraints on those fields, and will raise an exception if the
|
||||
values do not meet the constraints. Field constraints are provided by
|
||||
**pydantic**, so anything you see in the **pydantic docs** will work.
|
||||
As you might have noticed, we added two new parameters to the field type for
|
||||
`width` and `height` called `gt` and `le`. These basically stand for _greater
|
||||
than or equal to_ and _less than or equal to_. There are various other param
|
||||
types for field that you can find on the **pydantic** documentation.
|
||||
|
||||
**Note:** _Any time it is possible to define constraints for our field, we
|
||||
should do it so the frontend has more information on how to parse this field._
|
||||
@@ -163,16 +141,20 @@ that are provided by it by InvokeAI.
|
||||
Let us create this function first.
|
||||
|
||||
```python
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, InputField, invocation, InvocationContext
|
||||
from invokeai.app.invocations.primitives import ImageField
|
||||
from typing import Literal, Union
|
||||
from pydantic import Field
|
||||
|
||||
from .baseinvocation import BaseInvocation, InvocationContext
|
||||
from ..models.image import ImageField
|
||||
|
||||
@invocation('resize')
|
||||
class ResizeInvocation(BaseInvocation):
|
||||
'''Resizes an image'''
|
||||
type: Literal['resize'] = 'resize'
|
||||
|
||||
image: ImageField = InputField(description="The input image")
|
||||
width: int = InputField(default=512, ge=64, le=2048, description="Width of the new image")
|
||||
height: int = InputField(default=512, ge=64, le=2048, description="Height of the new image")
|
||||
# Inputs
|
||||
image: Union[ImageField, None] = Field(description="The input image", default=None)
|
||||
width: int = Field(default=512, ge=64, le=2048, description="Width of the new image")
|
||||
height: int = Field(default=512, ge=64, le=2048, description="Height of the new image")
|
||||
|
||||
def invoke(self, context: InvocationContext):
|
||||
pass
|
||||
@@ -191,17 +173,21 @@ all the necessary info related to image outputs. So let us use that.
|
||||
We will cover how to create your own output types later in this guide.
|
||||
|
||||
```python
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, InputField, invocation, InvocationContext
|
||||
from invokeai.app.invocations.primitives import ImageField
|
||||
from invokeai.app.invocations.image import ImageOutput
|
||||
from typing import Literal, Union
|
||||
from pydantic import Field
|
||||
|
||||
from .baseinvocation import BaseInvocation, InvocationContext
|
||||
from ..models.image import ImageField
|
||||
from .image import ImageOutput
|
||||
|
||||
@invocation('resize')
|
||||
class ResizeInvocation(BaseInvocation):
|
||||
'''Resizes an image'''
|
||||
type: Literal['resize'] = 'resize'
|
||||
|
||||
image: ImageField = InputField(description="The input image")
|
||||
width: int = InputField(default=512, ge=64, le=2048, description="Width of the new image")
|
||||
height: int = InputField(default=512, ge=64, le=2048, description="Height of the new image")
|
||||
# Inputs
|
||||
image: Union[ImageField, None] = Field(description="The input image", default=None)
|
||||
width: int = Field(default=512, ge=64, le=2048, description="Width of the new image")
|
||||
height: int = Field(default=512, ge=64, le=2048, description="Height of the new image")
|
||||
|
||||
def invoke(self, context: InvocationContext) -> ImageOutput:
|
||||
pass
|
||||
@@ -209,38 +195,57 @@ class ResizeInvocation(BaseInvocation):
|
||||
|
||||
Perfect. Now that we have our Invocation setup, let us do what we want to do.
|
||||
|
||||
- We will first load the image using one of the services provided by InvokeAI to
|
||||
load the image.
|
||||
- We will first load the image. Generally we do this using the `PIL` library but
|
||||
we can use one of the services provided by InvokeAI to load the image.
|
||||
- We will resize the image using `PIL` to our input data.
|
||||
- We will output this image in the format we set above.
|
||||
|
||||
So let's do that.
|
||||
|
||||
```python
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, InputField, invocation, InvocationContext
|
||||
from invokeai.app.invocations.primitives import ImageField
|
||||
from invokeai.app.invocations.image import ImageOutput, ResourceOrigin, ImageCategory
|
||||
from typing import Literal, Union
|
||||
from pydantic import Field
|
||||
|
||||
from .baseinvocation import BaseInvocation, InvocationContext
|
||||
from ..models.image import ImageField, ResourceOrigin, ImageCategory
|
||||
from .image import ImageOutput
|
||||
|
||||
@invocation("resize")
|
||||
class ResizeInvocation(BaseInvocation):
|
||||
"""Resizes an image"""
|
||||
'''Resizes an image'''
|
||||
type: Literal['resize'] = 'resize'
|
||||
|
||||
image: ImageField = InputField(description="The input image")
|
||||
width: int = InputField(default=512, ge=64, le=2048, description="Width of the new image")
|
||||
height: int = InputField(default=512, ge=64, le=2048, description="Height of the new image")
|
||||
# Inputs
|
||||
image: Union[ImageField, None] = Field(description="The input image", default=None)
|
||||
width: int = Field(default=512, ge=64, le=2048, description="Width of the new image")
|
||||
height: int = Field(default=512, ge=64, le=2048, description="Height of the new image")
|
||||
|
||||
def invoke(self, context: InvocationContext) -> ImageOutput:
|
||||
# Load the input image as a PIL image
|
||||
image = context.images.get_pil(self.image.image_name)
|
||||
# Load the image using InvokeAI's predefined Image Service.
|
||||
image = context.services.images.get_pil_image(self.image.image_origin, self.image.image_name)
|
||||
|
||||
# Resize the image
|
||||
# Resizing the image
|
||||
# Because we used the above service, we already have a PIL image. So we can simply resize.
|
||||
resized_image = image.resize((self.width, self.height))
|
||||
|
||||
# Save the image
|
||||
image_dto = context.images.save(image=resized_image)
|
||||
# Preparing the image for output using InvokeAI's predefined Image Service.
|
||||
output_image = context.services.images.create(
|
||||
image=resized_image,
|
||||
image_origin=ResourceOrigin.INTERNAL,
|
||||
image_category=ImageCategory.GENERAL,
|
||||
node_id=self.id,
|
||||
session_id=context.graph_execution_state_id,
|
||||
is_intermediate=self.is_intermediate,
|
||||
)
|
||||
|
||||
# Return an ImageOutput
|
||||
return ImageOutput.build(image_dto)
|
||||
# Returning the Image
|
||||
return ImageOutput(
|
||||
image=ImageField(
|
||||
image_name=output_image.image_name,
|
||||
image_origin=output_image.image_origin,
|
||||
),
|
||||
width=output_image.width,
|
||||
height=output_image.height,
|
||||
)
|
||||
```
|
||||
|
||||
**Note:** Do not be overwhelmed by the `ImageOutput` process. InvokeAI has a
|
||||
@@ -248,24 +253,6 @@ certain way that the images need to be dispatched in order to be stored and read
|
||||
correctly. In 99% of the cases when dealing with an image output, you can simply
|
||||
copy-paste the template above.
|
||||
|
||||
### Customization
|
||||
|
||||
We can use the `@invocation` decorator to provide some additional info to the
|
||||
UI, like a custom title, tags and category.
|
||||
|
||||
We also encourage providing a version. This must be a
|
||||
[semver](https://semver.org/) version string ("$MAJOR.$MINOR.$PATCH"). The UI
|
||||
will let users know if their workflow is using a mismatched version of the node.
|
||||
|
||||
```python
|
||||
@invocation("resize", title="My Resizer", tags=["resize", "image"], category="My Invocations", version="1.0.0")
|
||||
class ResizeInvocation(BaseInvocation):
|
||||
"""Resizes an image"""
|
||||
|
||||
image: ImageField = InputField(description="The input image")
|
||||
...
|
||||
```
|
||||
|
||||
That's it. You made your own **Resize Invocation**.
|
||||
|
||||
## Result
|
||||
@@ -283,73 +270,27 @@ new Invocation ready to be used.
|
||||
|
||||

|
||||
|
||||
## Contributing Nodes
|
||||
# Advanced
|
||||
|
||||
Once you've created a Node, the next step is to share it with the community! The
|
||||
best way to do this is to submit a Pull Request to add the Node to the
|
||||
[Community Nodes](nodes/communityNodes) list. If you're not sure how to do that,
|
||||
take a look a at our [contributing nodes overview](contributingNodes).
|
||||
|
||||
## Advanced
|
||||
|
||||
### Custom Output Types
|
||||
|
||||
Like with custom inputs, sometimes you might find yourself needing custom
|
||||
outputs that InvokeAI does not provide. We can easily set one up.
|
||||
|
||||
Now that you are familiar with Invocations and Inputs, let us use that knowledge
|
||||
to create an output that has an `image` field, a `color` field and a `string`
|
||||
field.
|
||||
|
||||
- An invocation output is a class that derives from the parent class of
|
||||
`BaseInvocationOutput`.
|
||||
- All invocation outputs must use the `@invocation_output` decorator to provide
|
||||
their unique output type.
|
||||
- Output fields must use the provided `OutputField` function. This is very
|
||||
similar to the `InputField` function described earlier - it's a wrapper around
|
||||
`pydantic`'s `Field()`.
|
||||
- It is not mandatory but we recommend using names ending with `Output` for
|
||||
output types.
|
||||
- It is not mandatory but we highly recommend adding a `docstring` to describe
|
||||
what your output type is for.
|
||||
|
||||
Now that we know the basic rules for creating a new output type, let us go ahead
|
||||
and make it.
|
||||
|
||||
```python
|
||||
from .baseinvocation import BaseInvocationOutput, OutputField, invocation_output
|
||||
from .primitives import ImageField, ColorField
|
||||
|
||||
@invocation_output('image_color_string_output')
|
||||
class ImageColorStringOutput(BaseInvocationOutput):
|
||||
'''Base class for nodes that output a single image'''
|
||||
|
||||
image: ImageField = OutputField(description="The image")
|
||||
color: ColorField = OutputField(description="The color")
|
||||
text: str = OutputField(description="The string")
|
||||
```
|
||||
|
||||
That's all there is to it.
|
||||
|
||||
### Custom Input Fields
|
||||
## Custom Input Fields
|
||||
|
||||
Now that you know how to create your own Invocations, let us dive into slightly
|
||||
more advanced topics.
|
||||
|
||||
While creating your own Invocations, you might run into a scenario where the
|
||||
existing fields in InvokeAI do not meet your requirements. In such cases, you
|
||||
can create your own fields.
|
||||
existing input types in InvokeAI do not meet your requirements. In such cases,
|
||||
you can create your own input types.
|
||||
|
||||
Let us create one as an example. Let us say we want to create a color input
|
||||
field that represents a color code. But before we start on that here are some
|
||||
general good practices to keep in mind.
|
||||
|
||||
### Best Practices
|
||||
**Good Practices**
|
||||
|
||||
- There is no naming convention for input fields but we highly recommend that
|
||||
you name it something appropriate like `ColorField`.
|
||||
- It is not mandatory but it is heavily recommended to add a relevant
|
||||
`docstring` to describe your field.
|
||||
`docstring` to describe your input field.
|
||||
- Keep your field in the same file as the Invocation that it is made for or in
|
||||
another file where it is relevant.
|
||||
|
||||
@@ -364,13 +305,10 @@ class ColorField(BaseModel):
|
||||
pass
|
||||
```
|
||||
|
||||
Perfect. Now let us create the properties for our field. This is similar to how
|
||||
you created input fields for your Invocation. All the same rules apply. Let us
|
||||
create four fields representing the _red(r)_, _blue(b)_, _green(g)_ and
|
||||
_alpha(a)_ channel of the color.
|
||||
|
||||
> Technically, the properties are _also_ called fields - but in this case, it
|
||||
> refers to a `pydantic` field.
|
||||
Perfect. Now let us create our custom inputs for our field. This is exactly
|
||||
similar how you created input fields for your Invocation. All the same rules
|
||||
apply. Let us create four fields representing the _red(r)_, _blue(b)_,
|
||||
_green(g)_ and _alpha(a)_ channel of the color.
|
||||
|
||||
```python
|
||||
class ColorField(BaseModel):
|
||||
@@ -385,11 +323,468 @@ That's it. We now have a new input field type that we can use in our Invocations
|
||||
like this.
|
||||
|
||||
```python
|
||||
color: ColorField = InputField(default=ColorField(r=0, g=0, b=0, a=0), description='Background color of an image')
|
||||
color: ColorField = Field(default=ColorField(r=0, g=0, b=0, a=0), description='Background color of an image')
|
||||
```
|
||||
|
||||
### Using the custom field
|
||||
**Extra Config**
|
||||
|
||||
When you start the UI, your custom field will be automatically recognized.
|
||||
All input fields also take an additional `Config` class that you can use to do
|
||||
various advanced things like setting required parameters and etc.
|
||||
|
||||
Custom fields only support connection inputs in the Workflow Editor.
|
||||
Let us do that for our _ColorField_ and enforce all the values because we did
|
||||
not define any defaults for our fields.
|
||||
|
||||
```python
|
||||
class ColorField(BaseModel):
|
||||
'''A field that holds the rgba values of a color'''
|
||||
r: int = Field(ge=0, le=255, description="The red channel")
|
||||
g: int = Field(ge=0, le=255, description="The green channel")
|
||||
b: int = Field(ge=0, le=255, description="The blue channel")
|
||||
a: int = Field(ge=0, le=255, description="The alpha channel")
|
||||
|
||||
class Config:
|
||||
schema_extra = {"required": ["r", "g", "b", "a"]}
|
||||
```
|
||||
|
||||
Now it becomes mandatory for the user to supply all the values required by our
|
||||
input field.
|
||||
|
||||
We will discuss the `Config` class in extra detail later in this guide and how
|
||||
you can use it to make your Invocations more robust.
|
||||
|
||||
## Custom Output Types
|
||||
|
||||
Like with custom inputs, sometimes you might find yourself needing custom
|
||||
outputs that InvokeAI does not provide. We can easily set one up.
|
||||
|
||||
Now that you are familiar with Invocations and Inputs, let us use that knowledge
|
||||
to put together a custom output type for an Invocation that returns _width_,
|
||||
_height_ and _background_color_ that we need to create a blank image.
|
||||
|
||||
- A custom output type is a class that derives from the parent class of
|
||||
`BaseInvocationOutput`.
|
||||
- It is not mandatory but we recommend using names ending with `Output` for
|
||||
output types. So we'll call our class `BlankImageOutput`
|
||||
- It is not mandatory but we highly recommend adding a `docstring` to describe
|
||||
what your output type is for.
|
||||
- Like Invocations, each output type should have a `type` variable that is
|
||||
**unique**
|
||||
|
||||
Now that we know the basic rules for creating a new output type, let us go ahead
|
||||
and make it.
|
||||
|
||||
```python
|
||||
from typing import Literal
|
||||
from pydantic import Field
|
||||
|
||||
from .baseinvocation import BaseInvocationOutput
|
||||
|
||||
class BlankImageOutput(BaseInvocationOutput):
|
||||
'''Base output type for creating a blank image'''
|
||||
type: Literal['blank_image_output'] = 'blank_image_output'
|
||||
|
||||
# Inputs
|
||||
width: int = Field(description='Width of blank image')
|
||||
height: int = Field(description='Height of blank image')
|
||||
bg_color: ColorField = Field(description='Background color of blank image')
|
||||
|
||||
class Config:
|
||||
schema_extra = {"required": ["type", "width", "height", "bg_color"]}
|
||||
```
|
||||
|
||||
All set. We now have an output type that requires what we need to create a
|
||||
blank_image. And if you noticed it, we even used the `Config` class to ensure
|
||||
the fields are required.
|
||||
|
||||
## Custom Configuration
|
||||
|
||||
As you might have noticed when making inputs and outputs, we used a class called
|
||||
`Config` from _pydantic_ to further customize them. Because our inputs and
|
||||
outputs essentially inherit from _pydantic_'s `BaseModel` class, all
|
||||
[configuration options](https://docs.pydantic.dev/latest/usage/schema/#schema-customization)
|
||||
that are valid for _pydantic_ classes are also valid for our inputs and outputs.
|
||||
You can do the same for your Invocations too but InvokeAI makes our life a
|
||||
little bit easier on that end.
|
||||
|
||||
InvokeAI provides a custom configuration class called `InvocationConfig`
|
||||
particularly for configuring Invocations. This is exactly the same as the raw
|
||||
`Config` class from _pydantic_ with some extra stuff on top to help faciliate
|
||||
parsing of the scheme in the frontend UI.
|
||||
|
||||
At the current moment, tihs `InvocationConfig` class is further improved with
|
||||
the following features related the `ui`.
|
||||
|
||||
| Config Option | Field Type | Example |
|
||||
| ------------- | ------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
|
||||
| type_hints | `Dict[str, Literal["integer", "float", "boolean", "string", "enum", "image", "latents", "model", "control"]]` | `type_hint: "model"` provides type hints related to the model like displaying a list of available models |
|
||||
| tags | `List[str]` | `tags: ['resize', 'image']` will classify your invocation under the tags of resize and image. |
|
||||
| title | `str` | `title: 'Resize Image` will rename your to this custom title rather than infer from the name of the Invocation class. |
|
||||
|
||||
So let us update your `ResizeInvocation` with some extra configuration and see
|
||||
how that works.
|
||||
|
||||
```python
|
||||
from typing import Literal, Union
|
||||
from pydantic import Field
|
||||
|
||||
from .baseinvocation import BaseInvocation, InvocationContext, InvocationConfig
|
||||
from ..models.image import ImageField, ResourceOrigin, ImageCategory
|
||||
from .image import ImageOutput
|
||||
|
||||
class ResizeInvocation(BaseInvocation):
|
||||
'''Resizes an image'''
|
||||
type: Literal['resize'] = 'resize'
|
||||
|
||||
# Inputs
|
||||
image: Union[ImageField, None] = Field(description="The input image", default=None)
|
||||
width: int = Field(default=512, ge=64, le=2048, description="Width of the new image")
|
||||
height: int = Field(default=512, ge=64, le=2048, description="Height of the new image")
|
||||
|
||||
class Config(InvocationConfig):
|
||||
schema_extra: {
|
||||
ui: {
|
||||
tags: ['resize', 'image'],
|
||||
title: ['My Custom Resize']
|
||||
}
|
||||
}
|
||||
|
||||
def invoke(self, context: InvocationContext) -> ImageOutput:
|
||||
# Load the image using InvokeAI's predefined Image Service.
|
||||
image = context.services.images.get_pil_image(self.image.image_origin, self.image.image_name)
|
||||
|
||||
# Resizing the image
|
||||
# Because we used the above service, we already have a PIL image. So we can simply resize.
|
||||
resized_image = image.resize((self.width, self.height))
|
||||
|
||||
# Preparing the image for output using InvokeAI's predefined Image Service.
|
||||
output_image = context.services.images.create(
|
||||
image=resized_image,
|
||||
image_origin=ResourceOrigin.INTERNAL,
|
||||
image_category=ImageCategory.GENERAL,
|
||||
node_id=self.id,
|
||||
session_id=context.graph_execution_state_id,
|
||||
is_intermediate=self.is_intermediate,
|
||||
)
|
||||
|
||||
# Returning the Image
|
||||
return ImageOutput(
|
||||
image=ImageField(
|
||||
image_name=output_image.image_name,
|
||||
image_origin=output_image.image_origin,
|
||||
),
|
||||
width=output_image.width,
|
||||
height=output_image.height,
|
||||
)
|
||||
```
|
||||
|
||||
We now customized our code to let the frontend know that our Invocation falls
|
||||
under `resize` and `image` categories. So when the user searches for these
|
||||
particular words, our Invocation will show up too.
|
||||
|
||||
We also set a custom title for our Invocation. So instead of being called
|
||||
`Resize`, it will be called `My Custom Resize`.
|
||||
|
||||
As simple as that.
|
||||
|
||||
As time goes by, InvokeAI will further improve and add more customizability for
|
||||
Invocation configuration. We will have more documentation regarding this at a
|
||||
later time.
|
||||
|
||||
# **[TODO]**
|
||||
|
||||
## Custom Components For Frontend
|
||||
|
||||
Every backend input type should have a corresponding frontend component so the
|
||||
UI knows what to render when you use a particular field type.
|
||||
|
||||
If you are using existing field types, we already have components for those. So
|
||||
you don't have to worry about creating anything new. But this might not always
|
||||
be the case. Sometimes you might want to create new field types and have the
|
||||
frontend UI deal with it in a different way.
|
||||
|
||||
This is where we venture into the world of React and Javascript and create our
|
||||
own new components for our Invocations. Do not fear the world of JS. It's
|
||||
actually pretty straightforward.
|
||||
|
||||
Let us create a new component for our custom color field we created above. When
|
||||
we use a color field, let us say we want the UI to display a color picker for
|
||||
the user to pick from rather than entering values. That is what we will build
|
||||
now.
|
||||
|
||||
---
|
||||
|
||||
# OLD -- TO BE DELETED OR MOVED LATER
|
||||
|
||||
---
|
||||
|
||||
## Creating a new invocation
|
||||
|
||||
To create a new invocation, either find the appropriate module file in
|
||||
`/ldm/invoke/app/invocations` to add your invocation to, or create a new one in
|
||||
that folder. All invocations in that folder will be discovered and made
|
||||
available to the CLI and API automatically. Invocations make use of
|
||||
[typing](https://docs.python.org/3/library/typing.html) and
|
||||
[pydantic](https://pydantic-docs.helpmanual.io/) for validation and integration
|
||||
into the CLI and API.
|
||||
|
||||
An invocation looks like this:
|
||||
|
||||
```py
|
||||
class UpscaleInvocation(BaseInvocation):
|
||||
"""Upscales an image."""
|
||||
|
||||
# fmt: off
|
||||
type: Literal["upscale"] = "upscale"
|
||||
|
||||
# Inputs
|
||||
image: Union[ImageField, None] = Field(description="The input image", default=None)
|
||||
strength: float = Field(default=0.75, gt=0, le=1, description="The strength")
|
||||
level: Literal[2, 4] = Field(default=2, description="The upscale level")
|
||||
# fmt: on
|
||||
|
||||
# Schema customisation
|
||||
class Config(InvocationConfig):
|
||||
schema_extra = {
|
||||
"ui": {
|
||||
"tags": ["upscaling", "image"],
|
||||
},
|
||||
}
|
||||
|
||||
def invoke(self, context: InvocationContext) -> ImageOutput:
|
||||
image = context.services.images.get_pil_image(
|
||||
self.image.image_origin, self.image.image_name
|
||||
)
|
||||
results = context.services.restoration.upscale_and_reconstruct(
|
||||
image_list=[[image, 0]],
|
||||
upscale=(self.level, self.strength),
|
||||
strength=0.0, # GFPGAN strength
|
||||
save_original=False,
|
||||
image_callback=None,
|
||||
)
|
||||
|
||||
# Results are image and seed, unwrap for now
|
||||
# TODO: can this return multiple results?
|
||||
image_dto = context.services.images.create(
|
||||
image=results[0][0],
|
||||
image_origin=ResourceOrigin.INTERNAL,
|
||||
image_category=ImageCategory.GENERAL,
|
||||
node_id=self.id,
|
||||
session_id=context.graph_execution_state_id,
|
||||
is_intermediate=self.is_intermediate,
|
||||
)
|
||||
|
||||
return ImageOutput(
|
||||
image=ImageField(
|
||||
image_name=image_dto.image_name,
|
||||
image_origin=image_dto.image_origin,
|
||||
),
|
||||
width=image_dto.width,
|
||||
height=image_dto.height,
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
Each portion is important to implement correctly.
|
||||
|
||||
### Class definition and type
|
||||
|
||||
```py
|
||||
class UpscaleInvocation(BaseInvocation):
|
||||
"""Upscales an image."""
|
||||
type: Literal['upscale'] = 'upscale'
|
||||
```
|
||||
|
||||
All invocations must derive from `BaseInvocation`. They should have a docstring
|
||||
that declares what they do in a single, short line. They should also have a
|
||||
`type` with a type hint that's `Literal["command_name"]`, where `command_name`
|
||||
is what the user will type on the CLI or use in the API to create this
|
||||
invocation. The `command_name` must be unique. The `type` must be assigned to
|
||||
the value of the literal in the type hint.
|
||||
|
||||
### Inputs
|
||||
|
||||
```py
|
||||
# Inputs
|
||||
image: Union[ImageField,None] = Field(description="The input image")
|
||||
strength: float = Field(default=0.75, gt=0, le=1, description="The strength")
|
||||
level: Literal[2,4] = Field(default=2, description="The upscale level")
|
||||
```
|
||||
|
||||
Inputs consist of three parts: a name, a type hint, and a `Field` with default,
|
||||
description, and validation information. For example:
|
||||
|
||||
| Part | Value | Description |
|
||||
| --------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Name | `strength` | This field is referred to as `strength` |
|
||||
| Type Hint | `float` | This field must be of type `float` |
|
||||
| Field | `Field(default=0.75, gt=0, le=1, description="The strength")` | The default value is `0.75`, the value must be in the range (0,1], and help text will show "The strength" for this field. |
|
||||
|
||||
Notice that `image` has type `Union[ImageField,None]`. The `Union` allows this
|
||||
field to be parsed with `None` as a value, which enables linking to previous
|
||||
invocations. All fields should either provide a default value or allow `None` as
|
||||
a value, so that they can be overwritten with a linked output from another
|
||||
invocation.
|
||||
|
||||
The special type `ImageField` is also used here. All images are passed as
|
||||
`ImageField`, which protects them from pydantic validation errors (since images
|
||||
only ever come from links).
|
||||
|
||||
Finally, note that for all linking, the `type` of the linked fields must match.
|
||||
If the `name` also matches, then the field can be **automatically linked** to a
|
||||
previous invocation by name and matching.
|
||||
|
||||
### Config
|
||||
|
||||
```py
|
||||
# Schema customisation
|
||||
class Config(InvocationConfig):
|
||||
schema_extra = {
|
||||
"ui": {
|
||||
"tags": ["upscaling", "image"],
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
This is an optional configuration for the invocation. It inherits from
|
||||
pydantic's model `Config` class, and it used primarily to customize the
|
||||
autogenerated OpenAPI schema.
|
||||
|
||||
The UI relies on the OpenAPI schema in two ways:
|
||||
|
||||
- An API client & Typescript types are generated from it. This happens at build
|
||||
time.
|
||||
- The node editor parses the schema into a template used by the UI to create the
|
||||
node editor UI. This parsing happens at runtime.
|
||||
|
||||
In this example, a `ui` key has been added to the `schema_extra` dict to provide
|
||||
some tags for the UI, to facilitate filtering nodes.
|
||||
|
||||
See the Schema Generation section below for more information.
|
||||
|
||||
### Invoke Function
|
||||
|
||||
```py
|
||||
def invoke(self, context: InvocationContext) -> ImageOutput:
|
||||
image = context.services.images.get_pil_image(
|
||||
self.image.image_origin, self.image.image_name
|
||||
)
|
||||
results = context.services.restoration.upscale_and_reconstruct(
|
||||
image_list=[[image, 0]],
|
||||
upscale=(self.level, self.strength),
|
||||
strength=0.0, # GFPGAN strength
|
||||
save_original=False,
|
||||
image_callback=None,
|
||||
)
|
||||
|
||||
# Results are image and seed, unwrap for now
|
||||
# TODO: can this return multiple results?
|
||||
image_dto = context.services.images.create(
|
||||
image=results[0][0],
|
||||
image_origin=ResourceOrigin.INTERNAL,
|
||||
image_category=ImageCategory.GENERAL,
|
||||
node_id=self.id,
|
||||
session_id=context.graph_execution_state_id,
|
||||
is_intermediate=self.is_intermediate,
|
||||
)
|
||||
|
||||
return ImageOutput(
|
||||
image=ImageField(
|
||||
image_name=image_dto.image_name,
|
||||
image_origin=image_dto.image_origin,
|
||||
),
|
||||
width=image_dto.width,
|
||||
height=image_dto.height,
|
||||
)
|
||||
```
|
||||
|
||||
The `invoke` function is the last portion of an invocation. It is provided an
|
||||
`InvocationContext` which contains services to perform work as well as a
|
||||
`session_id` for use as needed. It should return a class with output values that
|
||||
derives from `BaseInvocationOutput`.
|
||||
|
||||
Before being called, the invocation will have all of its fields set from
|
||||
defaults, inputs, and finally links (overriding in that order).
|
||||
|
||||
Assume that this invocation may be running simultaneously with other
|
||||
invocations, may be running on another machine, or in other interesting
|
||||
scenarios. If you need functionality, please provide it as a service in the
|
||||
`InvocationServices` class, and make sure it can be overridden.
|
||||
|
||||
### Outputs
|
||||
|
||||
```py
|
||||
class ImageOutput(BaseInvocationOutput):
|
||||
"""Base class for invocations that output an image"""
|
||||
|
||||
# fmt: off
|
||||
type: Literal["image_output"] = "image_output"
|
||||
image: ImageField = Field(default=None, description="The output image")
|
||||
width: int = Field(description="The width of the image in pixels")
|
||||
height: int = Field(description="The height of the image in pixels")
|
||||
# fmt: on
|
||||
|
||||
class Config:
|
||||
schema_extra = {"required": ["type", "image", "width", "height"]}
|
||||
```
|
||||
|
||||
Output classes look like an invocation class without the invoke method. Prefer
|
||||
to use an existing output class if available, and prefer to name inputs the same
|
||||
as outputs when possible, to promote automatic invocation linking.
|
||||
|
||||
## Schema Generation
|
||||
|
||||
Invocation, output and related classes are used to generate an OpenAPI schema.
|
||||
|
||||
### Required Properties
|
||||
|
||||
The schema generation treat all properties with default values as optional. This
|
||||
makes sense internally, but when when using these classes via the generated
|
||||
schema, we end up with e.g. the `ImageOutput` class having its `image` property
|
||||
marked as optional.
|
||||
|
||||
We know that this property will always be present, so the additional logic
|
||||
needed to always check if the property exists adds a lot of extraneous cruft.
|
||||
|
||||
To fix this, we can leverage `pydantic`'s
|
||||
[schema customisation](https://docs.pydantic.dev/usage/schema/#schema-customization)
|
||||
to mark properties that we know will always be present as required.
|
||||
|
||||
Here's that `ImageOutput` class, without the needed schema customisation:
|
||||
|
||||
```python
|
||||
class ImageOutput(BaseInvocationOutput):
|
||||
"""Base class for invocations that output an image"""
|
||||
|
||||
# fmt: off
|
||||
type: Literal["image_output"] = "image_output"
|
||||
image: ImageField = Field(default=None, description="The output image")
|
||||
width: int = Field(description="The width of the image in pixels")
|
||||
height: int = Field(description="The height of the image in pixels")
|
||||
# fmt: on
|
||||
```
|
||||
|
||||
The OpenAPI schema that results from this `ImageOutput` will have the `type`,
|
||||
`image`, `width` and `height` properties marked as optional, even though we know
|
||||
they will always have a value.
|
||||
|
||||
```python
|
||||
class ImageOutput(BaseInvocationOutput):
|
||||
"""Base class for invocations that output an image"""
|
||||
|
||||
# fmt: off
|
||||
type: Literal["image_output"] = "image_output"
|
||||
image: ImageField = Field(default=None, description="The output image")
|
||||
width: int = Field(description="The width of the image in pixels")
|
||||
height: int = Field(description="The height of the image in pixels")
|
||||
# fmt: on
|
||||
|
||||
# Add schema customization
|
||||
class Config:
|
||||
schema_extra = {"required": ["type", "image", "width", "height"]}
|
||||
```
|
||||
|
||||
With the customization in place, the schema will now show these properties as
|
||||
required, obviating the need for extensive null checks in client code.
|
||||
|
||||
See this `pydantic` issue for discussion on this solution:
|
||||
<https://github.com/pydantic/pydantic/discussions/4577>
|
||||
|
||||
@@ -1,10 +1,21 @@
|
||||
# Local Development
|
||||
|
||||
If you want to contribute, you will need to set up a [local development environment](./dev-environment.md).
|
||||
If you are looking to contribute you will need to have a local development
|
||||
environment. See the
|
||||
[Developer Install](../installation/020_INSTALL_MANUAL.md#developer-install) for
|
||||
full details.
|
||||
|
||||
Broadly this involves cloning the repository, installing the pre-reqs, and
|
||||
InvokeAI (in editable form). Assuming this is working, choose your area of
|
||||
focus.
|
||||
|
||||
## Documentation
|
||||
|
||||
We use [mkdocs](https://www.mkdocs.org) for our documentation with the [material theme](https://squidfunk.github.io/mkdocs-material/). Documentation is written in markdown files under the `./docs` folder and then built into a static website for hosting with GitHub Pages at [invoke-ai.github.io/InvokeAI](https://invoke-ai.github.io/InvokeAI).
|
||||
We use [mkdocs](https://www.mkdocs.org) for our documentation with the
|
||||
[material theme](https://squidfunk.github.io/mkdocs-material/). Documentation is
|
||||
written in markdown files under the `./docs` folder and then built into a static
|
||||
website for hosting with GitHub Pages at
|
||||
[invoke-ai.github.io/InvokeAI](https://invoke-ai.github.io/InvokeAI).
|
||||
|
||||
To contribute to the documentation you'll need to install the dependencies. Note
|
||||
the use of `"`.
|
||||
@@ -24,34 +35,46 @@ access.
|
||||
|
||||
## Backend
|
||||
|
||||
The backend is contained within the `./invokeai/backend` and `./invokeai/app` directories.
|
||||
To get started please install the development dependencies.
|
||||
The backend is contained within the `./invokeai/backend` folder structure. To
|
||||
get started however please install the development dependencies.
|
||||
|
||||
From the root of the repository run the following command. Note the use of `"`.
|
||||
|
||||
```zsh
|
||||
pip install ".[dev,test]"
|
||||
pip install ".[test]"
|
||||
```
|
||||
|
||||
These are optional groups of packages which are defined within the `pyproject.toml`
|
||||
and will be required for testing the changes you make to the code.
|
||||
This in an optional group of packages which is defined within the
|
||||
`pyproject.toml` and will be required for testing the changes you make the the
|
||||
code.
|
||||
|
||||
### Tests
|
||||
### Running Tests
|
||||
|
||||
See the [tests documentation](./TESTS.md) for information about running and writing tests.
|
||||
We use [pytest](https://docs.pytest.org/en/7.2.x/) for our test suite. Tests can
|
||||
be found under the `./tests` folder and can be run with a single `pytest`
|
||||
command. Optionally, to review test coverage you can append `--cov`.
|
||||
|
||||
### Reloading Changes
|
||||
```zsh
|
||||
pytest --cov
|
||||
```
|
||||
|
||||
Experimenting with changes to the Python source code is a drag if you have to re-start the server —
|
||||
and re-load those multi-gigabyte models —
|
||||
after every change.
|
||||
Test outcomes and coverage will be reported in the terminal. In addition a more
|
||||
detailed report is created in both XML and HTML format in the `./coverage`
|
||||
folder. The HTML one in particular can help identify missing statements
|
||||
requiring tests to ensure coverage. This can be run by opening
|
||||
`./coverage/html/index.html`.
|
||||
|
||||
For a faster development workflow, add the `--dev_reload` flag when starting the server.
|
||||
The server will watch for changes to all the Python files in the `invokeai` directory and apply those changes to the
|
||||
running server on the fly.
|
||||
For example.
|
||||
|
||||
This will allow you to avoid restarting the server (and reloading models) in most cases, but there are some caveats; see
|
||||
the [jurigged documentation](https://github.com/breuleux/jurigged#caveats) for details.
|
||||
```zsh
|
||||
pytest --cov; open ./coverage/html/index.html
|
||||
```
|
||||
|
||||
??? info "HTML coverage report output"
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## Front End
|
||||
|
||||
@@ -131,23 +154,6 @@ and so you'll have access to the same python environment as the InvokeAI app.
|
||||
|
||||
This is _super_ handy.
|
||||
|
||||
#### Enabling Type-Checking with Pylance
|
||||
|
||||
We use python's typing system in InvokeAI. PR reviews will include checking that types are present and correct. We don't enforce types with `mypy` at this time, but that is on the horizon.
|
||||
|
||||
Using a code analysis tool to automatically type check your code (and types) is very important when writing with types. These tools provide immediate feedback in your editor when types are incorrect, and following their suggestions lead to fewer runtime bugs.
|
||||
|
||||
Pylance, installed at the beginning of this guide, is the de-facto python LSP (language server protocol). It provides type checking in the editor (among many other features). Once installed, you do need to enable type checking manually:
|
||||
|
||||
- Open a python file
|
||||
- Look along the status bar in VSCode for `{ } Python`
|
||||
- Click the `{ }`
|
||||
- Turn type checking on - basic is fine
|
||||
|
||||
You'll now see red squiggly lines where type issues are detected. Hover your cursor over the indicated symbols to see what's wrong.
|
||||
|
||||
In 99% of cases when the type checker says there is a problem, there really is a problem, and you should take some time to understand and resolve what it is pointing out.
|
||||
|
||||
#### Debugging configs with `launch.json`
|
||||
|
||||
Debugging configs are managed in a `launch.json` file. Like most VSCode configs,
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
# InvokeAI Backend Tests
|
||||
|
||||
We use `pytest` to run the backend python tests. (See [pyproject.toml](/pyproject.toml) for the default `pytest` options.)
|
||||
|
||||
## Fast vs. Slow
|
||||
All tests are categorized as either 'fast' (no test annotation) or 'slow' (annotated with the `@pytest.mark.slow` decorator).
|
||||
|
||||
'Fast' tests are run to validate every PR, and are fast enough that they can be run routinely during development.
|
||||
|
||||
'Slow' tests are currently only run manually on an ad-hoc basis. In the future, they may be automated to run nightly. Most developers are only expected to run the 'slow' tests that directly relate to the feature(s) that they are working on.
|
||||
|
||||
As a rule of thumb, tests should be marked as 'slow' if there is a chance that they take >1s (e.g. on a CPU-only machine with slow internet connection). Common examples of slow tests are tests that depend on downloading a model, or running model inference.
|
||||
|
||||
## Running Tests
|
||||
|
||||
Below are some common test commands:
|
||||
```bash
|
||||
# Run the fast tests. (This implicitly uses the configured default option: `-m "not slow"`.)
|
||||
pytest tests/
|
||||
|
||||
# Equivalent command to run the fast tests.
|
||||
pytest tests/ -m "not slow"
|
||||
|
||||
# Run the slow tests.
|
||||
pytest tests/ -m "slow"
|
||||
|
||||
# Run the slow tests from a specific file.
|
||||
pytest tests/path/to/slow_test.py -m "slow"
|
||||
|
||||
# Run all tests (fast and slow).
|
||||
pytest tests -m ""
|
||||
```
|
||||
|
||||
## Test Organization
|
||||
|
||||
All backend tests are in the [`tests/`](/tests/) directory. This directory mirrors the organization of the `invokeai/` directory. For example, tests for `invokeai/model_management/model_manager.py` would be found in `tests/model_management/test_model_manager.py`.
|
||||
|
||||
TODO: The above statement is aspirational. A re-organization of legacy tests is required to make it true.
|
||||
|
||||
## Tests that depend on models
|
||||
|
||||
There are a few things to keep in mind when adding tests that depend on models.
|
||||
|
||||
1. If a required model is not already present, it should automatically be downloaded as part of the test setup.
|
||||
2. If a model is already downloaded, it should not be re-downloaded unnecessarily.
|
||||
3. Take reasonable care to keep the total number of models required for the tests low. Whenever possible, re-use models that are already required for other tests. If you are adding a new model, consider including a comment to explain why it is required/unique.
|
||||
|
||||
There are several utilities to help with model setup for tests. Here is a sample test that depends on a model:
|
||||
```python
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from invokeai.backend.model_management.models.base import BaseModelType, ModelType
|
||||
from invokeai.backend.util.test_utils import install_and_load_model
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_model(model_installer, torch_device):
|
||||
model_info = install_and_load_model(
|
||||
model_installer=model_installer,
|
||||
model_path_id_or_url="HF/dummy_model_id",
|
||||
model_name="dummy_model",
|
||||
base_model=BaseModelType.StableDiffusion1,
|
||||
model_type=ModelType.Dummy,
|
||||
)
|
||||
|
||||
dummy_input = build_dummy_input(torch_device)
|
||||
|
||||
with torch.no_grad(), model_info as model:
|
||||
model.to(torch_device, dtype=torch.float32)
|
||||
output = model(dummy_input)
|
||||
|
||||
# Validate output...
|
||||
|
||||
```
|
||||
|
||||
## Test Coverage
|
||||
|
||||
To review test coverage, append `--cov` to your pytest command:
|
||||
```bash
|
||||
pytest tests/ --cov
|
||||
```
|
||||
|
||||
Test outcomes and coverage will be reported in the terminal. In addition, a more detailed report is created in both XML and HTML format in the `./coverage` folder. The HTML output is particularly helpful in identifying untested statements where coverage should be improved. The HTML report can be viewed by opening `./coverage/html/index.html`.
|
||||
|
||||
??? info "HTML coverage report output"
|
||||
|
||||

|
||||
|
||||

|
||||
@@ -4,21 +4,14 @@
|
||||
|
||||
If you are looking to help to with a code contribution, InvokeAI uses several different technologies under the hood: Python (Pydantic, FastAPI, diffusers) and Typescript (React, Redux Toolkit, ChakraUI, Mantine, Konva). Familiarity with StableDiffusion and image generation concepts is helpful, but not essential.
|
||||
|
||||
|
||||
## **Get Started**
|
||||
|
||||
To get started, take a look at our [new contributors checklist](newContributorChecklist.md)
|
||||
|
||||
Once you're setup, for more information, you can review the documentation specific to your area of interest:
|
||||
For more information, please review our area specific documentation:
|
||||
|
||||
* #### [InvokeAI Architecure](../ARCHITECTURE.md)
|
||||
* #### [Frontend Documentation](https://github.com/invoke-ai/InvokeAI/tree/main/invokeai/frontend/web)
|
||||
* #### [Frontend Documentation](development_guides/contributingToFrontend.md)
|
||||
* #### [Node Documentation](../INVOCATIONS.md)
|
||||
* #### [Local Development](../LOCAL_DEVELOPMENT.md)
|
||||
|
||||
|
||||
|
||||
If you don't feel ready to make a code contribution yet, no problem! You can also help out in other ways, such as [documentation](documentation.md), [translation](translation.md) or helping support other users and triage issues as they're reported in GitHub.
|
||||
If you don't feel ready to make a code contribution yet, no problem! You can also help out in other ways, such as [documentation](documentation.md) or [translation](translation.md).
|
||||
|
||||
There are two paths to making a development contribution:
|
||||
|
||||
@@ -30,20 +23,69 @@ There are two paths to making a development contribution:
|
||||
|
||||
## Best Practices:
|
||||
* Keep your pull requests small. Smaller pull requests are more likely to be accepted and merged
|
||||
* Comments! Commenting your code helps reviewers easily understand your contribution
|
||||
* Comments! Commenting your code helps reviwers easily understand your contribution
|
||||
* Use Python and Typescript’s typing systems, and consider using an editor with [LSP](https://microsoft.github.io/language-server-protocol/) support to streamline development
|
||||
* Make all communications public. This ensure knowledge is shared with the whole community
|
||||
|
||||
## **How do I make a contribution?**
|
||||
|
||||
Never made an open source contribution before? Wondering how contributions work in our project? Here's a quick rundown!
|
||||
|
||||
Before starting these steps, ensure you have your local environment [configured for development](../LOCAL_DEVELOPMENT.md).
|
||||
|
||||
1. Find a [good first issue](https://github.com/invoke-ai/InvokeAI/contribute) that you are interested in addressing or a feature that you would like to add. Then, reach out to our team in the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) channel of the Discord to ensure you are setup for success.
|
||||
2. Fork the [InvokeAI](https://github.com/invoke-ai/InvokeAI) repository to your GitHub profile. This means that you will have a copy of the repository under **your-GitHub-username/InvokeAI**.
|
||||
3. Clone the repository to your local machine using:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/your-GitHub-username/InvokeAI.git
|
||||
```
|
||||
|
||||
If you're unfamiliar with using Git through the commandline, [GitHub Desktop](https://desktop.github.com) is a easy-to-use alternative with a UI. You can do all the same steps listed here, but through the interface.
|
||||
|
||||
4. Create a new branch for your fix using:
|
||||
|
||||
```bash
|
||||
git checkout -b branch-name-here
|
||||
```
|
||||
|
||||
5. Make the appropriate changes for the issue you are trying to address or the feature that you want to add.
|
||||
6. Add the file contents of the changed files to the "snapshot" git uses to manage the state of the project, also known as the index:
|
||||
|
||||
```bash
|
||||
git add insert-paths-of-changed-files-here
|
||||
```
|
||||
|
||||
7. Store the contents of the index with a descriptive message.
|
||||
|
||||
```bash
|
||||
git commit -m "Insert a short message of the changes made here"
|
||||
```
|
||||
|
||||
8. Push the changes to the remote repository using
|
||||
|
||||
```markdown
|
||||
git push origin branch-name-here
|
||||
```
|
||||
|
||||
9. Submit a pull request to the **main** branch of the InvokeAI repository.
|
||||
10. Title the pull request with a short description of the changes made and the issue or bug number associated with your change. For example, you can title an issue like so "Added more log outputting to resolve #1234".
|
||||
11. In the description of the pull request, explain the changes that you made, any issues you think exist with the pull request you made, and any questions you have for the maintainer. It's OK if your pull request is not perfect (no pull request is), the reviewer will be able to help you fix any problems and improve it!
|
||||
12. Wait for the pull request to be reviewed by other collaborators.
|
||||
13. Make changes to the pull request if the reviewer(s) recommend them.
|
||||
14. Celebrate your success after your pull request is merged!
|
||||
|
||||
If you’d like to learn more about contributing to Open Source projects, here is a [Getting Started Guide](https://opensource.com/article/19/7/create-pull-request-github).
|
||||
|
||||
## **Where can I go for help?**
|
||||
|
||||
If you need help, you can ask questions in the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) channel of the Discord.
|
||||
|
||||
For frontend related work, **@psychedelicious** is the best person to reach out to.
|
||||
|
||||
For backend related work, please reach out to **@blessedcoolant**, **@lstein**, **@StAlKeR7779** or **@psychedelicious**.
|
||||
For frontend related work, **@pyschedelicious** is the best person to reach out to.
|
||||
|
||||
For backend related work, please reach out to **@blessedcoolant**, **@lstein**, **@StAlKeR7779** or **@pyschedelicious**.
|
||||
|
||||
## **What does the Code of Conduct mean for me?**
|
||||
|
||||
Our [Code of Conduct](../../CODE_OF_CONDUCT.md) means that you are responsible for treating everyone on the project with respect and courtesy regardless of their identity. If you are the victim of any inappropriate behavior or comments as described in our Code of Conduct, we are here for you and will do the best to ensure that the abuser is reprimanded appropriately, per our code.
|
||||
Our [Code of Conduct](CODE_OF_CONDUCT.md) means that you are responsible for treating everyone on the project with respect and courtesy regardless of their identity. If you are the victim of any inappropriate behavior or comments as described in our Code of Conduct, we are here for you and will do the best to ensure that the abuser is reprimanded appropriately, per our code.
|
||||
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
# Contributing to the Frontend
|
||||
|
||||
# InvokeAI Web UI
|
||||
|
||||
- [InvokeAI Web UI](https://github.com/invoke-ai/InvokeAI/tree/main/invokeai/frontend/web/docs#invokeai-web-ui)
|
||||
- [Stack](https://github.com/invoke-ai/InvokeAI/tree/main/invokeai/frontend/web/docs#stack)
|
||||
- [Contributing](https://github.com/invoke-ai/InvokeAI/tree/main/invokeai/frontend/web/docs#contributing)
|
||||
- [Dev Environment](https://github.com/invoke-ai/InvokeAI/tree/main/invokeai/frontend/web/docs#dev-environment)
|
||||
- [Production builds](https://github.com/invoke-ai/InvokeAI/tree/main/invokeai/frontend/web/docs#production-builds)
|
||||
|
||||
The UI is a fairly straightforward Typescript React app, with the Unified Canvas being more complex.
|
||||
|
||||
Code is located in `invokeai/frontend/web/` for review.
|
||||
|
||||
## Stack
|
||||
|
||||
State management is Redux via [Redux Toolkit](https://github.com/reduxjs/redux-toolkit). We lean heavily on RTK:
|
||||
|
||||
- `createAsyncThunk` for HTTP requests
|
||||
- `createEntityAdapter` for fetching images and models
|
||||
- `createListenerMiddleware` for workflows
|
||||
|
||||
The API client and associated types are generated from the OpenAPI schema. See API_CLIENT.md.
|
||||
|
||||
Communication with server is a mix of HTTP and [socket.io](https://github.com/socketio/socket.io-client) (with a simple socket.io redux middleware to help).
|
||||
|
||||
[Chakra-UI](https://github.com/chakra-ui/chakra-ui) & [Mantine](https://github.com/mantinedev/mantine) for components and styling.
|
||||
|
||||
[Konva](https://github.com/konvajs/react-konva) for the canvas, but we are pushing the limits of what is feasible with it (and HTML canvas in general). We plan to rebuild it with [PixiJS](https://github.com/pixijs/pixijs) to take advantage of WebGL's improved raster handling.
|
||||
|
||||
[Vite](https://vitejs.dev/) for bundling.
|
||||
|
||||
Localisation is via [i18next](https://github.com/i18next/react-i18next), but translation happens on our [Weblate](https://hosted.weblate.org/engage/invokeai/) project. Only the English source strings should be changed on this repo.
|
||||
|
||||
## Contributing
|
||||
|
||||
Thanks for your interest in contributing to the InvokeAI Web UI!
|
||||
|
||||
We encourage you to ping @psychedelicious and @blessedcoolant on [Discord](https://discord.gg/ZmtBAhwWhy) if you want to contribute, just to touch base and ensure your work doesn't conflict with anything else going on. The project is very active.
|
||||
|
||||
### Dev Environment
|
||||
|
||||
**Setup**
|
||||
|
||||
1. Install [node](https://nodejs.org/en/download/). You can confirm node is installed with:
|
||||
```bash
|
||||
node --version
|
||||
```
|
||||
2. Install [yarn classic](https://classic.yarnpkg.com/lang/en/) and confirm it is installed by running this:
|
||||
```bash
|
||||
npm install --global yarn
|
||||
yarn --version
|
||||
```
|
||||
|
||||
From `invokeai/frontend/web/` run `yarn install` to get everything set up.
|
||||
|
||||
Start everything in dev mode:
|
||||
1. Ensure your virtual environment is running
|
||||
2. Start the dev server: `yarn dev`
|
||||
3. Start the InvokeAI Nodes backend: `python scripts/invokeai-web.py # run from the repo root`
|
||||
4. Point your browser to the dev server address e.g. [http://localhost:5173/](http://localhost:5173/)
|
||||
|
||||
### VSCode Remote Dev
|
||||
|
||||
We've noticed an intermittent issue with the VSCode Remote Dev port forwarding. If you use this feature of VSCode, you may intermittently click the Invoke button and then get nothing until the request times out. Suggest disabling the IDE's port forwarding feature and doing it manually via SSH:
|
||||
|
||||
`ssh -L 9090:localhost:9090 -L 5173:localhost:5173 user@host`
|
||||
|
||||
### Production builds
|
||||
|
||||
For a number of technical and logistical reasons, we need to commit UI build artefacts to the repo.
|
||||
|
||||
If you submit a PR, there is a good chance we will ask you to include a separate commit with a build of the app.
|
||||
|
||||
To build for production, run `yarn build`.
|
||||
@@ -1,13 +1,13 @@
|
||||
# Documentation
|
||||
|
||||
Documentation is an important part of any open source project. It provides a clear and concise way to communicate how the software works, how to use it, and how to troubleshoot issues. Without proper documentation, it can be difficult for users to understand the purpose and functionality of the project.
|
||||
Documentation is an important part of any open source project. It provides a clear and concise way to communicate how the software works, how to use it, and how to troubleshoot issues. Without proper documentation, it can be difficult for users to understand the purpose and functionality of the project.
|
||||
|
||||
## Contributing
|
||||
|
||||
All documentation is maintained in our [GitHub repository](https://github.com/invoke-ai/InvokeAI). If you come across documentation that is out of date or incorrect, please submit a pull request with the necessary changes.
|
||||
All documentation is maintained in the InvokeAI GitHub repository. If you come across documentation that is out of date or incorrect, please submit a pull request with the necessary changes.
|
||||
|
||||
When updating or creating documentation, please keep in mind Invoke is a tool for everyone, not just those who have familiarity with generative art.
|
||||
When updating or creating documentation, please keep in mind InvokeAI is a tool for everyone, not just those who have familiarity with generative art.
|
||||
|
||||
## Help & Questions
|
||||
|
||||
Please ping @hipsterusername on [Discord](https://discord.gg/ZmtBAhwWhy) if you have any questions.
|
||||
Please ping @imic1 or @hipsterusername in the [Discord](https://discord.com/channels/1020123559063990373/1049495067846524939) if you have any questions.
|
||||
@@ -1,77 +0,0 @@
|
||||
# New Contributor Guide
|
||||
|
||||
If you're a new contributor to InvokeAI or Open Source Projects, this is the guide for you.
|
||||
|
||||
## New Contributor Checklist
|
||||
|
||||
- [x] Set up your local development environment & fork of InvokAI by following [the steps outlined here](../dev-environment.md)
|
||||
- [x] Set up your local tooling with [this guide](InvokeAI/contributing/LOCAL_DEVELOPMENT/#developing-invokeai-in-vscode). Feel free to skip this step if you already have tooling you're comfortable with.
|
||||
- [x] Familiarize yourself with [Git](https://www.atlassian.com/git) & our project structure by reading through the [development documentation](development.md)
|
||||
- [x] Join the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) channel of the Discord
|
||||
- [x] Choose an issue to work on! This can be achieved by asking in the #dev-chat channel, tackling a [good first issue](https://github.com/invoke-ai/InvokeAI/contribute) or finding an item on the [roadmap](https://github.com/orgs/invoke-ai/projects/7). If nothing in any of those places catches your eye, feel free to work on something of interest to you!
|
||||
- [x] Make your first Pull Request with the guide below
|
||||
- [x] Happy development! Don't be afraid to ask for help - we're happy to help you contribute!
|
||||
|
||||
## How do I make a contribution?
|
||||
|
||||
Never made an open source contribution before? Wondering how contributions work in our project? Here's a quick rundown!
|
||||
|
||||
Before starting these steps, ensure you have your local environment [configured for development](../LOCAL_DEVELOPMENT.md).
|
||||
|
||||
1. Find a [good first issue](https://github.com/invoke-ai/InvokeAI/contribute) that you are interested in addressing or a feature that you would like to add. Then, reach out to our team in the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) channel of the Discord to ensure you are setup for success.
|
||||
2. Fork the [InvokeAI](https://github.com/invoke-ai/InvokeAI) repository to your GitHub profile. This means that you will have a copy of the repository under **your-GitHub-username/InvokeAI**.
|
||||
3. Clone the repository to your local machine using:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/your-GitHub-username/InvokeAI.git
|
||||
```
|
||||
|
||||
If you're unfamiliar with using Git through the commandline, [GitHub Desktop](https://desktop.github.com) is a easy-to-use alternative with a UI. You can do all the same steps listed here, but through the interface. 4. Create a new branch for your fix using:
|
||||
|
||||
```bash
|
||||
git checkout -b branch-name-here
|
||||
```
|
||||
|
||||
5. Make the appropriate changes for the issue you are trying to address or the feature that you want to add.
|
||||
6. Add the file contents of the changed files to the "snapshot" git uses to manage the state of the project, also known as the index:
|
||||
|
||||
```bash
|
||||
git add -A
|
||||
```
|
||||
|
||||
7. Store the contents of the index with a descriptive message.
|
||||
|
||||
```bash
|
||||
git commit -m "Insert a short message of the changes made here"
|
||||
```
|
||||
|
||||
8. Push the changes to the remote repository using
|
||||
|
||||
```bash
|
||||
git push origin branch-name-here
|
||||
```
|
||||
|
||||
9. Submit a pull request to the **main** branch of the InvokeAI repository. If you're not sure how to, [follow this guide](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request)
|
||||
10. Title the pull request with a short description of the changes made and the issue or bug number associated with your change. For example, you can title an issue like so "Added more log outputting to resolve #1234".
|
||||
11. In the description of the pull request, explain the changes that you made, any issues you think exist with the pull request you made, and any questions you have for the maintainer. It's OK if your pull request is not perfect (no pull request is), the reviewer will be able to help you fix any problems and improve it!
|
||||
12. Wait for the pull request to be reviewed by other collaborators.
|
||||
13. Make changes to the pull request if the reviewer(s) recommend them.
|
||||
14. Celebrate your success after your pull request is merged!
|
||||
|
||||
If you’d like to learn more about contributing to Open Source projects, here is a [Getting Started Guide](https://opensource.com/article/19/7/create-pull-request-github).
|
||||
|
||||
## Best Practices
|
||||
|
||||
- Keep your pull requests small. Smaller pull requests are more likely to be accepted and merged
|
||||
|
||||
- Comments! Commenting your code helps reviewers easily understand your contribution
|
||||
- Use Python and Typescript’s typing systems, and consider using an editor with [LSP](https://microsoft.github.io/language-server-protocol/) support to streamline development
|
||||
- Make all communications public. This ensure knowledge is shared with the whole community
|
||||
|
||||
## **Where can I go for help?**
|
||||
|
||||
If you need help, you can ask questions in the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) channel of the Discord.
|
||||
|
||||
For frontend related work, **@pyschedelicious** is the best person to reach out to.
|
||||
|
||||
For backend related work, please reach out to **@blessedcoolant**, **@lstein**, **@StAlKeR7779** or **@pyschedelicious**.
|
||||
@@ -16,4 +16,4 @@ Please check Weblate's [documentation](https://docs.weblate.org/en/latest/index
|
||||
|
||||
## Thanks
|
||||
|
||||
Thanks to the InvokeAI community for their efforts to translate the project!
|
||||
Thanks to the InvokeAI community for their efforts to translate the project!
|
||||
@@ -1,54 +0,0 @@
|
||||
---
|
||||
title: Contributors
|
||||
---
|
||||
|
||||
We thank [all contributors](https://github.com/invoke-ai/InvokeAI/graphs/contributors) for their time and hard work!
|
||||
|
||||
## **Original Author**
|
||||
|
||||
- [Lincoln D. Stein](mailto:lincoln.stein@gmail.com)
|
||||
|
||||
## **Current Core Team**
|
||||
|
||||
- @lstein (Lincoln Stein) - Co-maintainer
|
||||
- @blessedcoolant - Co-maintainer
|
||||
- @hipsterusername (Kent Keirsey) - Co-maintainer, CEO, Positive Vibes
|
||||
- @psychedelicious (Spencer Mabrito) - Web Team Leader
|
||||
- @joshistoast (Josh Corbett) - Web Development
|
||||
- @cheerio (Mary Rogers) - Lead Engineer & Web App Development
|
||||
- @ebr (Eugene Brodsky) - Cloud/DevOps/Sofware engineer; your friendly neighbourhood cluster-autoscaler
|
||||
- @sunija - Standalone version
|
||||
- @brandon (Brandon Rising) - Platform, Infrastructure, Backend Systems
|
||||
- @ryanjdick (Ryan Dick) - Machine Learning & Training
|
||||
- @JPPhoto - Core image generation nodes
|
||||
- @dunkeroni - Image generation backend
|
||||
- @SkunkWorxDark - Image generation backend
|
||||
- @glimmerleaf (Devon Hopkins) - Community Wizard
|
||||
- @gogurt enjoyer - Discord moderator and end user support
|
||||
- @whosawhatsis - Discord moderator and end user support
|
||||
- @dwringer - Discord moderator and end user support
|
||||
- @526christian - Discord moderator and end user support
|
||||
- @harvester62 - Discord moderator and end user support
|
||||
|
||||
## **Honored Team Alumni**
|
||||
|
||||
- @StAlKeR7779 (Sergey Borisov) - Torch stack, ONNX, model management, optimization
|
||||
- @damian0815 - Attention Systems and Compel Maintainer
|
||||
- @netsvetaev (Artur) - Localization support
|
||||
- @Kyle0654 (Kyle Schouviller) - Node Architect and General Backend Wizard
|
||||
- @tildebyte - Installation and configuration
|
||||
- @mauwii (Matthias Wilde) - Installation, release, continuous integration
|
||||
- @chainchompa (Jennifer Player) - Web Development & Chain-Chomping
|
||||
- @millu (Millun Atluri) - Community Wizard, Documentation, Node-wrangler,
|
||||
- @genomancer (Gregg Helt) - Controlnet support
|
||||
- @keturn (Kevin Turner) - Diffusers
|
||||
|
||||
## **Original CompVis (Stable Diffusion) Authors**
|
||||
|
||||
- [Robin Rombach](https://github.com/rromb)
|
||||
- [Patrick von Platen](https://github.com/patrickvonplaten)
|
||||
- [ablattmann](https://github.com/ablattmann)
|
||||
- [Patrick Esser](https://github.com/pesser)
|
||||
- [owenvincent](https://github.com/owenvincent)
|
||||
- [apolinario](https://github.com/apolinario)
|
||||
- [Charles Packer](https://github.com/cpacker)
|
||||
@@ -1,99 +0,0 @@
|
||||
# Dev Environment
|
||||
|
||||
To make changes to Invoke's backend, frontend, or documentation, you'll need to set up a dev environment.
|
||||
|
||||
If you just want to use Invoke, you should use the [installer][installer link].
|
||||
|
||||
!!! info "Why do I need the frontend toolchain?"
|
||||
|
||||
The repo doesn't contain a build of the frontend. You'll be responsible for rebuilding it every time you pull in new changes, or run it in dev mode (which incurs a substantial performance penalty).
|
||||
|
||||
!!! warning
|
||||
|
||||
Invoke uses a SQLite database. When you run the application as a dev install, you accept responsibility for your database. This means making regular backups (especially before pulling) and/or fixing it yourself in the event that a PR introduces a schema change.
|
||||
|
||||
If you don't need to persist your db, you can use an ephemeral in-memory database by setting `use_memory_db: true` in your `invokeai.yaml` file. You'll also want to set `scan_models_on_startup: true` so that your models are registered on startup.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Run through the [requirements][requirements link].
|
||||
1. [Fork and clone][forking link] the [InvokeAI repo][repo link].
|
||||
1. Create an directory for user data (images, models, db, etc). This is typically at `~/invokeai`, but if you already have a non-dev install, you may want to create a separate directory for the dev install.
|
||||
1. Create a python virtual environment inside the directory you just created:
|
||||
|
||||
```sh
|
||||
python3 -m venv .venv --prompt InvokeAI-Dev
|
||||
```
|
||||
|
||||
1. Activate the venv (you'll need to do this every time you want to run the app):
|
||||
|
||||
```sh
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
1. Install the repo as an [editable install][editable install link]:
|
||||
|
||||
```sh
|
||||
pip install -e ".[dev,test,xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu121
|
||||
```
|
||||
|
||||
Refer to the [manual installation][manual install link]] instructions for more determining the correct install options. `xformers` is optional, but `dev` and `test` are not.
|
||||
|
||||
1. Install the frontend dev toolchain:
|
||||
|
||||
- [`nodejs`](https://nodejs.org/) (recommend v20 LTS)
|
||||
- [`pnpm`](https://pnpm.io/installation#installing-a-specific-version) (must be v8 - not v9!)
|
||||
|
||||
1. Do a production build of the frontend:
|
||||
|
||||
```sh
|
||||
pnpm build
|
||||
```
|
||||
|
||||
1. Start the application:
|
||||
|
||||
```sh
|
||||
python scripts/invokeai-web.py
|
||||
```
|
||||
|
||||
1. Access the UI at `localhost:9090`.
|
||||
|
||||
## Updating the UI
|
||||
|
||||
You'll need to run `pnpm build` every time you pull in new changes. Another option is to skip the build and instead run the app in dev mode:
|
||||
|
||||
```sh
|
||||
pnpm dev
|
||||
```
|
||||
|
||||
This starts a dev server at `localhost:5173`, which you will use instead of `localhost:9090`.
|
||||
|
||||
The dev mode is substantially slower than the production build but may be more convenient if you just need to test things out.
|
||||
|
||||
## Documentation
|
||||
|
||||
The documentation is built with `mkdocs`. To preview it locally, you need a additional set of packages installed.
|
||||
|
||||
```sh
|
||||
# after activating the venv
|
||||
pip install -e ".[docs]"
|
||||
```
|
||||
|
||||
Then, you can start a live docs dev server, which will auto-refresh when you edit the docs:
|
||||
|
||||
```sh
|
||||
mkdocs serve
|
||||
```
|
||||
|
||||
On macOS and Linux, there is a `make` target for this:
|
||||
|
||||
```sh
|
||||
make docs
|
||||
```
|
||||
|
||||
[installer link]: ../installation/installer.md
|
||||
[forking link]: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo
|
||||
[requirements link]: ../installation/requirements.md
|
||||
[repo link]: https://github.com/invoke-ai/InvokeAI
|
||||
[manual install link]: ../installation/manual.md
|
||||
[editable install link]: https://pip.pypa.io/en/latest/cli/pip_install/#cmdoption-e
|
||||
@@ -1,128 +0,0 @@
|
||||
# Invoke UI
|
||||
|
||||
Invoke's UI is made possible by many contributors and open-source libraries. Thank you!
|
||||
|
||||
## Dev environment
|
||||
|
||||
Follow the [dev environment](../dev-environment.md) guide to get set up. Run the UI using `pnpm dev`.
|
||||
|
||||
## Package scripts
|
||||
|
||||
- `dev`: run the frontend in dev mode, enabling hot reloading
|
||||
- `build`: run all checks (dpdm, eslint, prettier, tsc, knip) and then build the frontend
|
||||
- `lint:dpdm`: check circular dependencies
|
||||
- `lint:eslint`: check code quality
|
||||
- `lint:prettier`: check code formatting
|
||||
- `lint:tsc`: check type issues
|
||||
- `lint:knip`: check for unused exports or objects
|
||||
- `lint`: run all checks concurrently
|
||||
- `fix`: run `eslint` and `prettier`, fixing fixable issues
|
||||
- `test:ui`: run `vitest` with the fancy web UI
|
||||
|
||||
## Type generation
|
||||
|
||||
We use [openapi-typescript] to generate types from the app's OpenAPI schema. The generated types are committed to the repo in [schema.ts].
|
||||
|
||||
If you make backend changes, it's important to regenerate the frontend types:
|
||||
|
||||
```sh
|
||||
cd invokeai/frontend/web && python ../../../scripts/generate_openapi_schema.py | pnpm typegen
|
||||
```
|
||||
|
||||
On macOS and Linux, you can run `make frontend-typegen` as a shortcut for the above snippet.
|
||||
|
||||
## Localization
|
||||
|
||||
We use [i18next] for localization, but translation to languages other than English happens on our [Weblate] project.
|
||||
|
||||
Only the English source strings (i.e. `en.json`) should be changed on this repo.
|
||||
|
||||
## VSCode
|
||||
|
||||
### Example debugger config
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "chrome",
|
||||
"request": "launch",
|
||||
"name": "Invoke UI",
|
||||
"url": "http://localhost:5173",
|
||||
"webRoot": "${workspaceFolder}/invokeai/frontend/web"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Remote dev
|
||||
|
||||
We've noticed an intermittent timeout issue with the VSCode remote dev port forwarding.
|
||||
|
||||
We suggest disabling the editor's port forwarding feature and doing it manually via SSH:
|
||||
|
||||
```sh
|
||||
ssh -L 9090:localhost:9090 -L 5173:localhost:5173 user@host
|
||||
```
|
||||
|
||||
## Contributing Guidelines
|
||||
|
||||
Thanks for your interest in contributing to the Invoke Web UI!
|
||||
|
||||
Please follow these guidelines when contributing.
|
||||
|
||||
## Check in before investing your time
|
||||
|
||||
Please check in before you invest your time on anything besides a trivial fix, in case it conflicts with ongoing work or isn't aligned with the vision for the app.
|
||||
|
||||
If a feature request or issue doesn't already exist for the thing you want to work on, please create one.
|
||||
|
||||
Ping `@psychedelicious` on [discord] in the `#frontend-dev` channel or in the feature request / issue you want to work on - we're happy to chat.
|
||||
|
||||
## Code conventions
|
||||
|
||||
- This is a fairly complex app with a deep component tree. Please use memoization (`useCallback`, `useMemo`, `memo`) with enthusiasm.
|
||||
- If you need to add some global, ephemeral state, please use [nanostores] if possible.
|
||||
- Be careful with your redux selectors. If they need to be parameterized, consider creating them inside a `useMemo`.
|
||||
- Feel free to use `lodash` (via `lodash-es`) to make the intent of your code clear.
|
||||
- Please add comments describing the "why", not the "how" (unless it is really arcane).
|
||||
|
||||
## Commit format
|
||||
|
||||
Please use the [conventional commits] spec for the web UI, with a scope of "ui":
|
||||
|
||||
- `chore(ui): bump deps`
|
||||
- `chore(ui): lint`
|
||||
- `feat(ui): add some cool new feature`
|
||||
- `fix(ui): fix some bug`
|
||||
|
||||
## Tests
|
||||
|
||||
We don't do any UI testing at this time, but consider adding tests for sensitive logic.
|
||||
|
||||
We use `vitest`, and tests should be next to the file they are testing. If the logic is in `something.ts`, the tests should be in `something.test.ts`.
|
||||
|
||||
In some situations, we may want to test types. For example, if you use `zod` to create a schema that should match a generated type, it's best to add a test to confirm that the types match. Use `tsafe`'s assert for this.
|
||||
|
||||
## Submitting a PR
|
||||
|
||||
- Ensure your branch is tidy. Use an interactive rebase to clean up the commit history and reword the commit messages if they are not descriptive.
|
||||
- Run `pnpm lint`. Some issues are auto-fixable with `pnpm fix`.
|
||||
- Fill out the PR form when creating the PR.
|
||||
- It doesn't need to be super detailed, but a screenshot or video is nice if you changed something visually.
|
||||
- If a section isn't relevant, delete it.
|
||||
|
||||
## Other docs
|
||||
|
||||
- [Workflows - Design and Implementation]
|
||||
- [State Management]
|
||||
|
||||
[discord]: https://discord.gg/ZmtBAhwWhy
|
||||
[i18next]: https://github.com/i18next/react-i18next
|
||||
[Weblate]: https://hosted.weblate.org/engage/invokeai/
|
||||
[openapi-typescript]: https://github.com/openapi-ts/openapi-typescript
|
||||
[schema.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/services/api/schema.ts
|
||||
[conventional commits]: https://www.conventionalcommits.org/en/v1.0.0/
|
||||
[Workflows - Design and Implementation]: ./workflows.md
|
||||
[State Management]: ./state-management.md
|
||||
@@ -1,38 +0,0 @@
|
||||
# State Management
|
||||
|
||||
The app makes heavy use of Redux Toolkit, its Query library, and `nanostores`.
|
||||
|
||||
## Redux
|
||||
|
||||
We use RTK extensively - slices, entity adapters, queries, reselect, the whole 9 yards. Their [docs](https://redux-toolkit.js.org/) are excellent.
|
||||
|
||||
## `nanostores`
|
||||
|
||||
[nanostores] is a tiny state management library. It provides both imperative and declarative APIs.
|
||||
|
||||
### Example
|
||||
|
||||
```ts
|
||||
export const $myStringOption = atom<string | null>(null);
|
||||
|
||||
// Outside a component, or within a callback for performance-critical logic
|
||||
$myStringOption.get();
|
||||
$myStringOption.set('new value');
|
||||
|
||||
// Inside a component
|
||||
const myStringOption = useStore($myStringOption);
|
||||
```
|
||||
|
||||
### Where to put nanostores
|
||||
|
||||
- For global application state, export your stores from `invokeai/frontend/web/src/app/store/nanostores/`.
|
||||
- For feature state, create a file for the stores next to the redux slice definition (e.g. `invokeai/frontend/web/src/features/myFeature/myFeatureNanostores.ts`).
|
||||
- For hooks with global state, export the store from the same file the hook is in, or put it next to the hook.
|
||||
|
||||
### When to use nanostores
|
||||
|
||||
- For non-serializable data that needs to be available throughout the app, use `nanostores` instead of a global.
|
||||
- For ephemeral global state (i.e. state that does not need to be persisted), use `nanostores` instead of redux.
|
||||
- For performance-critical code and in callbacks, redux selectors can be problematic due to the declarative reactivity system. Consider refactoring to use `nanostores` if there's a **measurable** performance issue.
|
||||
|
||||
[nanostores]: https://github.com/nanostores/nanostores/
|
||||
@@ -1,314 +0,0 @@
|
||||
# Workflows - Design and Implementation
|
||||
|
||||
> This document describes, at a high level, the design and implementation of workflows in the InvokeAI frontend. There are a substantial number of implementation details not included, but which are hopefully clear from the code.
|
||||
|
||||
InvokeAI's backend uses graphs, composed of **nodes** and **edges**, to process data and generate images.
|
||||
|
||||
Nodes have any number of **input fields** and **output fields**. Edges connect nodes together via their inputs and outputs. Fields have data types which dictate how they may be connected.
|
||||
|
||||
During execution, a nodes' outputs may be passed along to any number of other nodes' inputs.
|
||||
|
||||
Workflows are an enriched abstraction over a graph.
|
||||
|
||||
## Design
|
||||
|
||||
InvokeAI provide two ways to build graphs in the frontend: the [Linear UI](#linear-ui) and [Workflow Editor](#workflow-editor).
|
||||
|
||||
To better understand the use case and challenges related to workflows, we will review both of these modes.
|
||||
|
||||
### Linear UI
|
||||
|
||||
This includes the **Text to Image**, **Image to Image** and **Unified Canvas** tabs.
|
||||
|
||||
The user-managed parameters on these tabs are stored as simple objects in the application state. When the user invokes, adding a generation to the queue, we internally build a graph from these parameters.
|
||||
|
||||
This logic can be fairly complex due to the range of features available and their interactions. Depending on the parameters selected, the graph may be very different. Building graphs in code can be challenging - you are trying to construct a non-linear structure in a linear context.
|
||||
|
||||
The simplest graph building logic is for **Text to Image** with a SD1.5 model: [buildLinearTextToImageGraph.ts]
|
||||
|
||||
There are many other graph builders in the same directory for different tabs or base models (e.g. SDXL). Some are pretty hairy.
|
||||
|
||||
In the Linear UI, we go straight from **simple application state** to **graph** via these builders.
|
||||
|
||||
### Workflow Editor
|
||||
|
||||
The Workflow Editor is a visual graph editor, allowing users to draw edges from node to node to construct a graph. This _far_ more approachable way to create complex graphs.
|
||||
|
||||
InvokeAI uses the [reactflow] library to power the Workflow Editor. It provides both a graph editor UI and manages its own internal graph state.
|
||||
|
||||
#### Workflows
|
||||
|
||||
A workflow is a representation of a graph plus additional metadata:
|
||||
|
||||
- Name
|
||||
- Description
|
||||
- Version
|
||||
- Notes
|
||||
- [Exposed fields](#workflow-linear-view)
|
||||
- Author, tags, category, etc.
|
||||
|
||||
Workflows should have other qualities:
|
||||
|
||||
- Portable: you should be able to load a workflow created by another person.
|
||||
- Resilient: you should be able to "upgrade" a workflow as the application changes.
|
||||
- Abstract: as much as is possible, workflows should not be married to the specific implementation details of the application.
|
||||
|
||||
To support these qualities, workflows are serializable, have a versioned schemas, and represent graphs as minimally as possible. Fortunately, the reactflow state for nodes and edges works perfectly for this.
|
||||
|
||||
##### Workflow -> reactflow state -> InvokeAI graph
|
||||
|
||||
Given a workflow, we need to be able to derive reactflow state and/or an InvokeAI graph from it.
|
||||
|
||||
The first step - workflow to reactflow state - is very simple. The logic is in [nodesSlice.ts], in the `workflowLoaded` reducer.
|
||||
|
||||
The reactflow state is, however, structurally incompatible with our backend's graph structure. When a user invokes on a Workflow, we need to convert the reactflow state into an InvokeAI graph. This is far simpler than the graph building logic from the Linear UI:
|
||||
[buildNodesGraph.ts]
|
||||
|
||||
##### Nodes vs Invocations
|
||||
|
||||
We often use the terms "node" and "invocation" interchangeably, but they may refer to different things in the frontend.
|
||||
|
||||
reactflow [has its own definitions][reactflow-concepts] of "node", "edge" and "handle" which are closely related to InvokeAI graph concepts.
|
||||
|
||||
- A reactflow node is related to an InvokeAI invocation. It has a "data" property, which holds the InvokeAI-specific invocation data.
|
||||
- A reactflow edge is roughly equivalent to an InvokeAI edge.
|
||||
- A reactflow handle is roughly equivalent to an InvokeAI input or output field.
|
||||
|
||||
##### Workflow Linear View
|
||||
|
||||
Graphs are very capable data structures, but not everyone wants to work with them all the time.
|
||||
|
||||
To allow less technical users - or anyone who wants a less visually noisy workspace - to benefit from the power of nodes, InvokeAI has a workflow feature called the Linear View.
|
||||
|
||||
A workflow input field can be added to this Linear View, and its input component can be presented similarly to the Linear UI tabs. Internally, we add the field to the workflow's list of exposed fields.
|
||||
|
||||
#### OpenAPI Schema
|
||||
|
||||
OpenAPI is a schema specification that can represent complex data structures and relationships. The backend is capable of generating an OpenAPI schema for all invocations.
|
||||
|
||||
When the UI connects, it requests this schema and parses each invocation into an **invocation template**. Invocation templates have a number of properties, like title, description and type, but the most important ones are their input and output **field templates**.
|
||||
|
||||
Invocation and field templates are the "source of truth" for graphs, because they indicate what the backend is able to process.
|
||||
|
||||
When a user adds a new node to their workflow, these templates are used to instantiate a node with fields instantiated from the input and output field templates.
|
||||
|
||||
##### Field Instances and Templates
|
||||
|
||||
Field templates consist of:
|
||||
|
||||
- Name: the identifier of the field, its variable name in python
|
||||
- Type: derived from the field's type annotation in python (e.g. IntegerField, ImageField, MainModelField)
|
||||
- Constraints: derived from the field's creation args in python (e.g. minimum value for an integer)
|
||||
- Default value: optionally provided in the field's creation args (e.g. 42 for an integer)
|
||||
|
||||
Field instances are created from the templates and have name, type and optionally a value.
|
||||
|
||||
The type of the field determines the UI components that are rendered for it.
|
||||
|
||||
A field instance's name associates it with its template.
|
||||
|
||||
##### Stateful vs Stateless Fields
|
||||
|
||||
**Stateful** fields store their value in the frontend graph. Think primitives, model identifiers, images, etc. Fields are only stateful if the frontend allows the user to directly input a value for them.
|
||||
|
||||
Many field types, however, are **stateless**. An example is a `UNetField`, which contains some data describing a UNet. Users cannot directly provide this data - it is created and consumed in the backend.
|
||||
|
||||
Stateless fields do not store their value in the node, so their field instances do not have values.
|
||||
|
||||
"Custom" fields will always be treated as stateless fields.
|
||||
|
||||
##### Single and Collection Fields
|
||||
|
||||
Field types have a name and cardinality property which may identify it as a **SINGLE**, **COLLECTION** or **SINGLE_OR_COLLECTION** field.
|
||||
|
||||
- If a field is annotated in python as a singular value or class, its field type is parsed as a **SINGLE** type (e.g. `int`, `ImageField`, `str`).
|
||||
- If a field is annotated in python as a list, its field type is parsed as a **COLLECTION** type (e.g. `list[int]`).
|
||||
- If it is annotated as a union of a type and list, the type will be parsed as a **SINGLE_OR_COLLECTION** type (e.g. `Union[int, list[int]]`). Fields may not be unions of different types (e.g. `Union[int, list[str]]` and `Union[int, str]` are not allowed).
|
||||
|
||||
## Implementation
|
||||
|
||||
The majority of data structures in the backend are [pydantic] models. Pydantic provides OpenAPI schemas for all models and we then generate TypeScript types from those.
|
||||
|
||||
The OpenAPI schema is parsed at runtime into our invocation templates.
|
||||
|
||||
Workflows and all related data are modeled in the frontend using [zod]. Related types are inferred from the zod schemas.
|
||||
|
||||
> In python, invocations are pydantic models with fields. These fields become node inputs. The invocation's `invoke()` function returns a pydantic model - its output. Like the invocation itself, the output model has any number of fields, which become node outputs.
|
||||
|
||||
### zod Schemas and Types
|
||||
|
||||
The zod schemas, inferred types, and type guards are in [types/].
|
||||
|
||||
Roughly order from lowest-level to highest:
|
||||
|
||||
- `common.ts`: stateful field data, and couple other misc types
|
||||
- `field.ts`: fields - types, values, instances, templates
|
||||
- `invocation.ts`: invocations and other node types
|
||||
- `workflow.ts`: workflows and constituents
|
||||
|
||||
We customize the OpenAPI schema to include additional properties on invocation and field schemas. To facilitate parsing this schema into templates, we modify/wrap the types from [openapi-types] in `openapi.ts`.
|
||||
|
||||
### OpenAPI Schema Parsing
|
||||
|
||||
The entrypoint for OpenAPI schema parsing is [parseSchema.ts].
|
||||
|
||||
General logic flow:
|
||||
|
||||
- Iterate over all invocation schema objects
|
||||
- Extract relevant invocation-level attributes (e.g. title, type, version, etc)
|
||||
- Iterate over the invocation's input fields
|
||||
- [Parse each field's type](#parsing-field-types)
|
||||
- [Build a field input template](#building-field-input-templates) from the type - either a stateful template or "generic" stateless template
|
||||
- Iterate over the invocation's output fields
|
||||
- Parse the field's type (same as inputs)
|
||||
- [Build a field output template](#building-field-output-templates)
|
||||
- Assemble the attributes and fields into an invocation template
|
||||
|
||||
Most of these involve very straightforward `reduce`s, but the less intuitive steps are detailed below.
|
||||
|
||||
#### Parsing Field Types
|
||||
|
||||
Field types are represented as structured objects:
|
||||
|
||||
```ts
|
||||
type FieldType = {
|
||||
name: string;
|
||||
cardinality: 'SINGLE' | 'COLLECTION' | 'SINGLE_OR_COLLECTION';
|
||||
};
|
||||
```
|
||||
|
||||
The parsing logic is in `parseFieldType.ts`.
|
||||
|
||||
There are 4 general cases for field type parsing.
|
||||
|
||||
##### Primitive Types
|
||||
|
||||
When a field is annotated as a primitive values (e.g. `int`, `str`, `float`), the field type parsing is fairly straightforward. The field is represented by a simple OpenAPI **schema object**, which has a `type` property.
|
||||
|
||||
We create a field type name from this `type` string (e.g. `string` -> `StringField`). The cardinality is `"SINGLE"`.
|
||||
|
||||
##### Complex Types
|
||||
|
||||
When a field is annotated as a pydantic model (e.g. `ImageField`, `MainModelField`, `ControlField`), it is represented as a **reference object**. Reference objects are pointers to another schema or reference object within the schema.
|
||||
|
||||
We need to **dereference** the schema to pull these out. Dereferencing may require recursion. We use the reference object's name directly for the field type name.
|
||||
|
||||
> Unfortunately, at this time, we've had limited success using external libraries to deference at runtime, so we do this ourselves.
|
||||
|
||||
##### Collection Types
|
||||
|
||||
When a field is annotated as a list of a single type, the schema object has an `items` property. They may be a schema object or reference object and must be parsed to determine the item type.
|
||||
|
||||
We use the item type for field type name. The cardinality is `"COLLECTION"`.
|
||||
|
||||
##### Single or Collection Types
|
||||
|
||||
When a field is annotated as a union of a type and list of that type, the schema object has an `anyOf` property, which holds a list of valid types for the union.
|
||||
|
||||
After verifying that the union has two members (a type and list of the same type), we use the type for field type name, with cardinality `"SINGLE_OR_COLLECTION"`.
|
||||
|
||||
##### Optional Fields
|
||||
|
||||
In OpenAPI v3.1, when an object is optional, it is put into an `anyOf` along with a primitive schema object with `type: 'null'`.
|
||||
|
||||
Handling this adds a fair bit of complexity, as we now must filter out the `'null'` types and work with the remaining types as described above.
|
||||
|
||||
If there is a single remaining schema object, we must recursively call to `parseFieldType()` to get parse it.
|
||||
|
||||
#### Building Field Input Templates
|
||||
|
||||
Now that we have a field type, we can build an input template for the field.
|
||||
|
||||
Stateful fields all get a function to build their template, while stateless fields are constructed directly. This is possible because stateless fields have no default value or constraints.
|
||||
|
||||
See [buildFieldInputTemplate.ts].
|
||||
|
||||
#### Building Field Output Templates
|
||||
|
||||
Field outputs are similar to stateless fields - they do not have any value in the frontend. When building their templates, we don't need a special function for each field type.
|
||||
|
||||
See [buildFieldOutputTemplate.ts].
|
||||
|
||||
### Managing reactflow State
|
||||
|
||||
As described above, the workflow editor state is the essentially the reactflow state, plus some extra metadata.
|
||||
|
||||
We provide reactflow with an array of nodes and edges via redux, and a number of [event handlers][reactflow-events]. These handlers dispatch redux actions, managing nodes and edges.
|
||||
|
||||
The pieces of redux state relevant to workflows are:
|
||||
|
||||
- `state.nodes.nodes`: the reactflow nodes state
|
||||
- `state.nodes.edges`: the reactflow edges state
|
||||
- `state.nodes.workflow`: the workflow metadata
|
||||
|
||||
#### Building Nodes and Edges
|
||||
|
||||
A reactflow node has a few important top-level properties:
|
||||
|
||||
- `id`: unique identifier
|
||||
- `type`: a string that maps to a react component to render the node
|
||||
- `position`: XY coordinates
|
||||
- `data`: arbitrary data
|
||||
|
||||
When the user adds a node, we build **invocation node data**, storing it in `data`. Invocation properties (e.g. type, version, label, etc.) are copied from the invocation template. Inputs and outputs are built from the invocation template's field templates.
|
||||
|
||||
See [buildInvocationNode.ts].
|
||||
|
||||
Edges are managed by reactflow, but briefly, they consist of:
|
||||
|
||||
- `source`: id of the source node
|
||||
- `sourceHandle`: id of the source node handle (output field)
|
||||
- `target`: id of the target node
|
||||
- `targetHandle`: id of the target node handle (input field)
|
||||
|
||||
> Edge creation is gated behind validation logic. This validation compares the input and output field types and overall graph state.
|
||||
|
||||
#### Building a Workflow
|
||||
|
||||
Building a workflow entity is as simple as dropping the nodes, edges and metadata into an object.
|
||||
|
||||
Each node and edge is parsed with a zod schema, which serves to strip out any unneeded data.
|
||||
|
||||
See [buildWorkflow.ts].
|
||||
|
||||
#### Loading a Workflow
|
||||
|
||||
Workflows may be loaded from external sources or the user's local instance. In all cases, the workflow needs to be handled with care, as an untrusted object.
|
||||
|
||||
Loading has a few stages which may throw or warn if there are problems:
|
||||
|
||||
- Parsing the workflow data structure itself, [migrating](#workflow-migrations) it if necessary (throws)
|
||||
- Check for a template for each node (warns)
|
||||
- Check each node's version against its template (warns)
|
||||
- Validate the source and target of each edge (warns)
|
||||
|
||||
This validation occurs in [validateWorkflow.ts].
|
||||
|
||||
If there are no fatal errors, the workflow is then stored in redux state.
|
||||
|
||||
### Workflow Migrations
|
||||
|
||||
When the workflow schema changes, we may need to perform some data migrations. This occurs as workflows are loaded. zod schemas for each workflow schema version is retained to facilitate migrations.
|
||||
|
||||
Previous schemas are in folders in `invokeai/frontend/web/src/features/nodes/types/`, eg `v1/`.
|
||||
|
||||
Migration logic is in [migrations.ts].
|
||||
|
||||
<!-- links -->
|
||||
|
||||
[pydantic]: https://github.com/pydantic/pydantic 'pydantic'
|
||||
[zod]: https://github.com/colinhacks/zod 'zod'
|
||||
[openapi-types]: https://github.com/kogosoftwarellc/open-api/tree/main/packages/openapi-types 'openapi-types'
|
||||
[reactflow]: https://github.com/xyflow/xyflow 'reactflow'
|
||||
[reactflow-concepts]: https://reactflow.dev/learn/concepts/terms-and-definitions
|
||||
[reactflow-events]: https://reactflow.dev/api-reference/react-flow#event-handlers
|
||||
[buildWorkflow.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/util/workflow/buildWorkflow.ts
|
||||
[nodesSlice.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/store/nodesSlice.ts
|
||||
[buildLinearTextToImageGraph.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/util/graph/buildLinearTextToImageGraph.ts
|
||||
[buildNodesGraph.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/util/graph/buildNodesGraph.ts
|
||||
[buildInvocationNode.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/util/node/buildInvocationNode.ts
|
||||
[validateWorkflow.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/util/workflow/validateWorkflow.ts
|
||||
[migrations.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/util/workflow/migrations.ts
|
||||
[parseSchema.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/util/schema/parseSchema.ts
|
||||
[buildFieldInputTemplate.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts
|
||||
[buildFieldOutputTemplate.ts]: https://github.com/invoke-ai/InvokeAI/blob/main/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldOutputTemplate.ts
|
||||
@@ -1,52 +0,0 @@
|
||||
# Contributing
|
||||
|
||||
Invoke originated as a project built by the community, and that vision carries forward today as we aim to build the best pro-grade tools available. We work together to incorporate the latest in AI/ML research, making these tools available in over 20 languages to artists and creatives around the world as part of our fully permissive OSS project designed for individual users to self-host and use.
|
||||
|
||||
We welcome contributions, whether features, bug fixes, code cleanup, testing, code reviews, documentation or translation. Please check in with us before diving in to code to ensure your work aligns with our vision.
|
||||
|
||||
## Development
|
||||
|
||||
If you’d like to help with development, please see our [development guide](contribution_guides/development.md).
|
||||
|
||||
**New Contributors:** If you’re unfamiliar with contributing to open source projects, take a look at our [new contributor guide](contribution_guides/newContributorChecklist.md).
|
||||
|
||||
## Nodes
|
||||
|
||||
If you’d like to add a Node, please see our [nodes contribution guide](../nodes/contributingNodes.md).
|
||||
|
||||
## Support and Triaging
|
||||
|
||||
Helping support other users in [Discord](https://discord.gg/ZmtBAhwWhy) and on Github are valuable forms of contribution that we greatly appreciate.
|
||||
|
||||
We receive many issues and requests for help from users. We're limited in bandwidth relative to our the user base, so providing answers to questions or helping identify causes of issues is very helpful. By doing this, you enable us to spend time on the highest priority work.
|
||||
|
||||
## Documentation
|
||||
|
||||
If you’d like to help with documentation, please see our [documentation guide](contribution_guides/documentation.md).
|
||||
|
||||
## Translation
|
||||
|
||||
If you'd like to help with translation, please see our [translation guide](contribution_guides/translation.md).
|
||||
|
||||
## Tutorials
|
||||
|
||||
Please reach out to @hipsterusername on [Discord](https://discord.gg/ZmtBAhwWhy) to help create tutorials for InvokeAI.
|
||||
|
||||
## Contributors
|
||||
|
||||
This project is a combined effort of dedicated people from across the world. [Check out the list of all these amazing people](https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/). We thank them for their time, hard work and effort.
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
The InvokeAI community is a welcoming place, and we want your help in maintaining that. Please review our [Code of Conduct](https://github.com/invoke-ai/InvokeAI/blob/main/CODE_OF_CONDUCT.md) to learn more - it's essential to maintaining a respectful and inclusive environment.
|
||||
|
||||
By making a contribution to this project, you certify that:
|
||||
|
||||
1. The contribution was created in whole or in part by you and you have the right to submit it under the open-source license indicated in this project’s GitHub repository; or
|
||||
2. The contribution is based upon previous work that, to the best of your knowledge, is covered under an appropriate open-source license and you have the right under that license to submit that work with modifications, whether created in whole or in part by you, under the same open-source license (unless you are permitted to submit under a different license); or
|
||||
3. The contribution was provided directly to you by some other person who certified (1) or (2) and you have not modified it; or
|
||||
4. You understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information you submit with it, including your sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open-source license(s) involved.
|
||||
|
||||
This disclaimer is not a license and does not grant any rights or permissions. You must obtain necessary permissions and licenses, including from third parties, before contributing to this project.
|
||||
|
||||
This disclaimer is provided "as is" without warranty of any kind, whether expressed or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, or non-infringement. In no event shall the authors or copyright holders be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the contribution or the use or other dealings in the contribution.
|
||||
589
docs/deprecated/CLI.md
Normal file
@@ -0,0 +1,589 @@
|
||||
---
|
||||
title: Command-Line Interface
|
||||
---
|
||||
|
||||
# :material-bash: CLI
|
||||
|
||||
## **Interactive Command Line Interface**
|
||||
|
||||
The InvokeAI command line interface (CLI) provides scriptable access
|
||||
to InvokeAI's features.Some advanced features are only available
|
||||
through the CLI, though they eventually find their way into the WebUI.
|
||||
|
||||
The CLI is accessible from the `invoke.sh`/`invoke.bat` launcher by
|
||||
selecting option (1). Alternatively, it can be launched directly from
|
||||
the command line by activating the InvokeAI environment and giving the
|
||||
command:
|
||||
|
||||
```bash
|
||||
invokeai
|
||||
```
|
||||
|
||||
After some startup messages, you will be presented with the `invoke> `
|
||||
prompt. Here you can type prompts to generate images and issue other
|
||||
commands to load and manipulate generative models. The CLI has a large
|
||||
number of command-line options that control its behavior. To get a
|
||||
concise summary of the options, call `invokeai` with the `--help` argument:
|
||||
|
||||
```bash
|
||||
invokeai --help
|
||||
```
|
||||
|
||||
The script uses the readline library to allow for in-line editing, command
|
||||
history (++up++ and ++down++), autocompletion, and more. To help keep track of
|
||||
which prompts generated which images, the script writes a log file of image
|
||||
names and prompts to the selected output directory.
|
||||
|
||||
Here is a typical session
|
||||
|
||||
```bash
|
||||
PS1:C:\Users\fred> invokeai
|
||||
* Initializing, be patient...
|
||||
* Initializing, be patient...
|
||||
>> Initialization file /home/lstein/invokeai/invokeai.init found. Loading...
|
||||
>> Internet connectivity is True
|
||||
>> InvokeAI, version 2.3.0-rc5
|
||||
>> InvokeAI runtime directory is "/home/lstein/invokeai"
|
||||
>> GFPGAN Initialized
|
||||
>> CodeFormer Initialized
|
||||
>> ESRGAN Initialized
|
||||
>> Using device_type cuda
|
||||
>> xformers memory-efficient attention is available and enabled
|
||||
(...more initialization messages...)
|
||||
* Initialization done! Awaiting your command (-h for help, 'q' to quit)
|
||||
invoke> ashley judd riding a camel -n2 -s150
|
||||
Outputs:
|
||||
outputs/img-samples/00009.png: "ashley judd riding a camel" -n2 -s150 -S 416354203
|
||||
outputs/img-samples/00010.png: "ashley judd riding a camel" -n2 -s150 -S 1362479620
|
||||
|
||||
invoke> "there's a fly in my soup" -n6 -g
|
||||
outputs/img-samples/00011.png: "there's a fly in my soup" -n6 -g -S 2685670268
|
||||
seeds for individual rows: [2685670268, 1216708065, 2335773498, 822223658, 714542046, 3395302430]
|
||||
invoke> q
|
||||
```
|
||||
|
||||

|
||||
|
||||
## Arguments
|
||||
|
||||
The script recognizes a series of command-line switches that will
|
||||
change important global defaults, such as the directory for image
|
||||
outputs and the location of the model weight files.
|
||||
|
||||
### List of arguments recognized at the command line
|
||||
|
||||
These command-line arguments can be passed to `invoke.py` when you first run it
|
||||
from the Windows, Mac or Linux command line. Some set defaults that can be
|
||||
overridden on a per-prompt basis (see
|
||||
[List of prompt arguments](#list-of-prompt-arguments). Others
|
||||
|
||||
| Argument <img width="240" align="right"/> | Shortcut <img width="100" align="right"/> | Default <img width="320" align="right"/> | Description |
|
||||
| ----------------------------------------- | ----------------------------------------- | ---------------------------------------------- | ---------------------------------------------------------------------------------------------------- |
|
||||
| `--help` | `-h` | | Print a concise help message. |
|
||||
| `--outdir <path>` | `-o<path>` | `outputs/img_samples` | Location for generated images. |
|
||||
| `--prompt_as_dir` | `-p` | `False` | Name output directories using the prompt text. |
|
||||
| `--from_file <path>` | | `None` | Read list of prompts from a file. Use `-` to read from standard input |
|
||||
| `--model <modelname>` | | `stable-diffusion-1.5` | Loads the initial model specified in configs/models.yaml. |
|
||||
| `--ckpt_convert ` | | `False` | If provided both .ckpt and .safetensors files will be auto-converted into diffusers format in memory |
|
||||
| `--autoconvert <path>` | | `None` | On startup, scan the indicated directory for new .ckpt/.safetensor files and automatically convert and import them |
|
||||
| `--precision` | | `fp16` | Provide `fp32` for full precision mode, `fp16` for half-precision. `fp32` needed for Macintoshes and some NVidia cards. |
|
||||
| `--png_compression <0-9>` | `-z<0-9>` | `6` | Select level of compression for output files, from 0 (no compression) to 9 (max compression) |
|
||||
| `--safety-checker` | | `False` | Activate safety checker for NSFW and other potentially disturbing imagery |
|
||||
| `--patchmatch`, `--no-patchmatch` | | `--patchmatch` | Load/Don't load the PatchMatch inpainting extension |
|
||||
| `--xformers`, `--no-xformers` | | `--xformers` | Load/Don't load the Xformers memory-efficient attention module (CUDA only) |
|
||||
| `--web` | | `False` | Start in web server mode |
|
||||
| `--host <ip addr>` | | `localhost` | Which network interface web server should listen on. Set to 0.0.0.0 to listen on any. |
|
||||
| `--port <port>` | | `9090` | Which port web server should listen for requests on. |
|
||||
| `--config <path>` | | `configs/models.yaml` | Configuration file for models and their weights. |
|
||||
| `--iterations <int>` | `-n<int>` | `1` | How many images to generate per prompt. |
|
||||
| `--width <int>` | `-W<int>` | `512` | Width of generated image |
|
||||
| `--height <int>` | `-H<int>` | `512` | Height of generated image | `--steps <int>` | `-s<int>` | `50` | How many steps of refinement to apply |
|
||||
| `--strength <float>` | `-s<float>` | `0.75` | For img2img: how hard to try to match the prompt to the initial image. Ranges from 0.0-0.99, with higher values replacing the initial image completely. |
|
||||
| `--fit` | `-F` | `False` | For img2img: scale the init image to fit into the specified -H and -W dimensions |
|
||||
| `--grid` | `-g` | `False` | Save all image series as a grid rather than individually. |
|
||||
| `--sampler <sampler>` | `-A<sampler>` | `k_lms` | Sampler to use. Use `-h` to get list of available samplers. |
|
||||
| `--seamless` | | `False` | Create interesting effects by tiling elements of the image. |
|
||||
| `--embedding_path <path>` | | `None` | Path to pre-trained embedding manager checkpoints, for custom models |
|
||||
| `--gfpgan_model_path` | | `experiments/pretrained_models/GFPGANv1.4.pth` | Path to GFPGAN model file. |
|
||||
| `--free_gpu_mem` | | `False` | Free GPU memory after sampling, to allow image decoding and saving in low VRAM conditions |
|
||||
| `--precision` | | `auto` | Set model precision, default is selected by device. Options: auto, float32, float16, autocast |
|
||||
|
||||
!!! warning "These arguments are deprecated but still work"
|
||||
|
||||
<div align="center" markdown>
|
||||
|
||||
| Argument | Shortcut | Default | Description |
|
||||
|--------------------|------------|---------------------|--------------|
|
||||
| `--full_precision` | | `False` | Same as `--precision=fp32`|
|
||||
| `--weights <path>` | | `None` | Path to weights file; use `--model stable-diffusion-1.4` instead |
|
||||
| `--laion400m` | `-l` | `False` | Use older LAION400m weights; use `--model=laion400m` instead |
|
||||
|
||||
</div>
|
||||
|
||||
!!! tip
|
||||
|
||||
On Windows systems, you may run into
|
||||
problems when passing the invoke script standard backslashed path
|
||||
names because the Python interpreter treats "\" as an escape.
|
||||
You can either double your slashes (ick): `C:\\path\\to\\my\\file`, or
|
||||
use Linux/Mac style forward slashes (better): `C:/path/to/my/file`.
|
||||
|
||||
## The .invokeai initialization file
|
||||
|
||||
To start up invoke.py with your preferred settings, place your desired
|
||||
startup options in a file in your home directory named `.invokeai` The
|
||||
file should contain the startup options as you would type them on the
|
||||
command line (`--steps=10 --grid`), one argument per line, or a
|
||||
mixture of both using any of the accepted command switch formats:
|
||||
|
||||
!!! example "my unmodified initialization file"
|
||||
|
||||
```bash title="~/.invokeai" linenums="1"
|
||||
# InvokeAI initialization file
|
||||
# This is the InvokeAI initialization file, which contains command-line default values.
|
||||
# Feel free to edit. If anything goes wrong, you can re-initialize this file by deleting
|
||||
# or renaming it and then running invokeai-configure again.
|
||||
|
||||
# The --root option below points to the folder in which InvokeAI stores its models, configs and outputs.
|
||||
--root="/Users/mauwii/invokeai"
|
||||
|
||||
# the --outdir option controls the default location of image files.
|
||||
--outdir="/Users/mauwii/invokeai/outputs"
|
||||
|
||||
# You may place other frequently-used startup commands here, one or more per line.
|
||||
# Examples:
|
||||
# --web --host=0.0.0.0
|
||||
# --steps=20
|
||||
# -Ak_euler_a -C10.0
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
The initialization file only accepts the command line arguments.
|
||||
There are additional arguments that you can provide on the `invoke>` command
|
||||
line (such as `-n` or `--iterations`) that cannot be entered into this file.
|
||||
Also be alert for empty blank lines at the end of the file, which will cause
|
||||
an arguments error at startup time.
|
||||
|
||||
## List of prompt arguments
|
||||
|
||||
After the invoke.py script initializes, it will present you with a `invoke>`
|
||||
prompt. Here you can enter information to generate images from text
|
||||
([txt2img](#txt2img)), to embellish an existing image or sketch
|
||||
([img2img](#img2img)), or to selectively alter chosen regions of the image
|
||||
([inpainting](#inpainting)).
|
||||
|
||||
### txt2img
|
||||
|
||||
!!! example ""
|
||||
|
||||
```bash
|
||||
invoke> waterfall and rainbow -W640 -H480
|
||||
```
|
||||
|
||||
This will create the requested image with the dimensions 640 (width)
|
||||
and 480 (height).
|
||||
|
||||
Here are the invoke> command that apply to txt2img:
|
||||
|
||||
| Argument <img width="680" align="right"/> | Shortcut <img width="420" align="right"/> | Default <img width="480" align="right"/> | Description |
|
||||
| ----------------------------------------- | ----------------------------------------- | ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| "my prompt" | | | Text prompt to use. The quotation marks are optional. |
|
||||
| `--width <int>` | `-W<int>` | `512` | Width of generated image |
|
||||
| `--height <int>` | `-H<int>` | `512` | Height of generated image |
|
||||
| `--iterations <int>` | `-n<int>` | `1` | How many images to generate from this prompt |
|
||||
| `--steps <int>` | `-s<int>` | `50` | How many steps of refinement to apply |
|
||||
| `--cfg_scale <float>` | `-C<float>` | `7.5` | How hard to try to match the prompt to the generated image; any number greater than 1.0 works, but the useful range is roughly 5.0 to 20.0 |
|
||||
| `--seed <int>` | `-S<int>` | `None` | Set the random seed for the next series of images. This can be used to recreate an image generated previously. |
|
||||
| `--sampler <sampler>` | `-A<sampler>` | `k_lms` | Sampler to use. Use -h to get list of available samplers. |
|
||||
| `--karras_max <int>` | | `29` | When using k\_\* samplers, set the maximum number of steps before shifting from using the Karras noise schedule (good for low step counts) to the LatentDiffusion noise schedule (good for high step counts) This value is sticky. [29] |
|
||||
| `--hires_fix` | | | Larger images often have duplication artefacts. This option suppresses duplicates by generating the image at low res, and then using img2img to increase the resolution |
|
||||
| `--png_compression <0-9>` | `-z<0-9>` | `6` | Select level of compression for output files, from 0 (no compression) to 9 (max compression) |
|
||||
| `--grid` | `-g` | `False` | Turn on grid mode to return a single image combining all the images generated by this prompt |
|
||||
| `--individual` | `-i` | `True` | Turn off grid mode (deprecated; leave off --grid instead) |
|
||||
| `--outdir <path>` | `-o<path>` | `outputs/img_samples` | Temporarily change the location of these images |
|
||||
| `--seamless` | | `False` | Activate seamless tiling for interesting effects |
|
||||
| `--seamless_axes` | | `x,y` | Specify which axes to use circular convolution on. |
|
||||
| `--log_tokenization` | `-t` | `False` | Display a color-coded list of the parsed tokens derived from the prompt |
|
||||
| `--skip_normalization` | `-x` | `False` | Weighted subprompts will not be normalized. See [Weighted Prompts](../features/OTHER.md#weighted-prompts) |
|
||||
| `--upscale <int> <float>` | `-U <int> <float>` | `-U 1 0.75` | Upscale image by magnification factor (2, 4), and set strength of upscaling (0.0-1.0). If strength not set, will default to 0.75. |
|
||||
| `--facetool_strength <float>` | `-G <float> ` | `-G0` | Fix faces (defaults to using the GFPGAN algorithm); argument indicates how hard the algorithm should try (0.0-1.0) |
|
||||
| `--facetool <name>` | `-ft <name>` | `-ft gfpgan` | Select face restoration algorithm to use: gfpgan, codeformer |
|
||||
| `--codeformer_fidelity` | `-cf <float>` | `0.75` | Used along with CodeFormer. Takes values between 0 and 1. 0 produces high quality but low accuracy. 1 produces high accuracy but low quality |
|
||||
| `--save_original` | `-save_orig` | `False` | When upscaling or fixing faces, this will cause the original image to be saved rather than replaced. |
|
||||
| `--variation <float>` | `-v<float>` | `0.0` | Add a bit of noise (0.0=none, 1.0=high) to the image in order to generate a series of variations. Usually used in combination with `-S<seed>` and `-n<int>` to generate a series a riffs on a starting image. See [Variations](../features/VARIATIONS.md). |
|
||||
| `--with_variations <pattern>` | | `None` | Combine two or more variations. See [Variations](../features/VARIATIONS.md) for now to use this. |
|
||||
| `--save_intermediates <n>` | | `None` | Save the image from every nth step into an "intermediates" folder inside the output directory |
|
||||
| `--h_symmetry_time_pct <float>` | | `None` | Create symmetry along the X axis at the desired percent complete of the generation process. (Must be between 0.0 and 1.0; set to a very small number like 0.0001 for just after the first step of generation.) |
|
||||
| `--v_symmetry_time_pct <float>` | | `None` | Create symmetry along the Y axis at the desired percent complete of the generation process. (Must be between 0.0 and 1.0; set to a very small number like 0.0001 for just after the first step of generation.) |
|
||||
|
||||
!!! note
|
||||
|
||||
the width and height of the image must be multiples of 64. You can
|
||||
provide different values, but they will be rounded down to the nearest multiple
|
||||
of 64.
|
||||
|
||||
!!! example "This is a example of img2img"
|
||||
|
||||
```bash
|
||||
invoke> waterfall and rainbow -I./vacation-photo.png -W640 -H480 --fit
|
||||
```
|
||||
|
||||
This will modify the indicated vacation photograph by making it more like the
|
||||
prompt. Results will vary greatly depending on what is in the image. We also ask
|
||||
to --fit the image into a box no bigger than 640x480. Otherwise the image size
|
||||
will be identical to the provided photo and you may run out of memory if it is
|
||||
large.
|
||||
|
||||
In addition to the command-line options recognized by txt2img, img2img accepts
|
||||
additional options:
|
||||
|
||||
| Argument <img width="160" align="right"/> | Shortcut | Default | Description |
|
||||
| ----------------------------------------- | ----------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `--init_img <path>` | `-I<path>` | `None` | Path to the initialization image |
|
||||
| `--fit` | `-F` | `False` | Scale the image to fit into the specified -H and -W dimensions |
|
||||
| `--strength <float>` | `-s<float>` | `0.75` | How hard to try to match the prompt to the initial image. Ranges from 0.0-0.99, with higher values replacing the initial image completely. |
|
||||
|
||||
### inpainting
|
||||
|
||||
!!! example ""
|
||||
|
||||
```bash
|
||||
invoke> waterfall and rainbow -I./vacation-photo.png -M./vacation-mask.png -W640 -H480 --fit
|
||||
```
|
||||
|
||||
This will do the same thing as img2img, but image alterations will
|
||||
only occur within transparent areas defined by the mask file specified
|
||||
by `-M`. You may also supply just a single initial image with the areas
|
||||
to overpaint made transparent, but you must be careful not to destroy
|
||||
the pixels underneath when you create the transparent areas. See
|
||||
[Inpainting](INPAINTING.md) for details.
|
||||
|
||||
inpainting accepts all the arguments used for txt2img and img2img, as well as
|
||||
the --mask (-M) and --text_mask (-tm) arguments:
|
||||
|
||||
| Argument <img width="100" align="right"/> | Shortcut | Default | Description |
|
||||
| ----------------------------------------- | ------------------------ | ------- | ------------------------------------------------------------------------------------------------ |
|
||||
| `--init_mask <path>` | `-M<path>` | `None` | Path to an image the same size as the initial_image, with areas for inpainting made transparent. |
|
||||
| `--invert_mask ` | | False | If true, invert the mask so that transparent areas are opaque and vice versa. |
|
||||
| `--text_mask <prompt> [<float>]` | `-tm <prompt> [<float>]` | <none> | Create a mask from a text prompt describing part of the image |
|
||||
|
||||
The mask may either be an image with transparent areas, in which case the
|
||||
inpainting will occur in the transparent areas only, or a black and white image,
|
||||
in which case all black areas will be painted into.
|
||||
|
||||
`--text_mask` (short form `-tm`) is a way to generate a mask using a text
|
||||
description of the part of the image to replace. For example, if you have an
|
||||
image of a breakfast plate with a bagel, toast and scrambled eggs, you can
|
||||
selectively mask the bagel and replace it with a piece of cake this way:
|
||||
|
||||
```bash
|
||||
invoke> a piece of cake -I /path/to/breakfast.png -tm bagel
|
||||
```
|
||||
|
||||
The algorithm uses <a
|
||||
href="https://github.com/timojl/clipseg">clipseg</a> to classify different
|
||||
regions of the image. The classifier puts out a confidence score for each region
|
||||
it identifies. Generally regions that score above 0.5 are reliable, but if you
|
||||
are getting too much or too little masking you can adjust the threshold down (to
|
||||
get more mask), or up (to get less). In this example, by passing `-tm` a higher
|
||||
value, we are insisting on a more stringent classification.
|
||||
|
||||
```bash
|
||||
invoke> a piece of cake -I /path/to/breakfast.png -tm bagel 0.6
|
||||
```
|
||||
|
||||
### Custom Styles and Subjects
|
||||
|
||||
You can load and use hundreds of community-contributed Textual
|
||||
Inversion models just by typing the appropriate trigger phrase. Please
|
||||
see [Concepts Library](../features/CONCEPTS.md) for more details.
|
||||
|
||||
## Other Commands
|
||||
|
||||
The CLI offers a number of commands that begin with "!".
|
||||
|
||||
### Postprocessing images
|
||||
|
||||
To postprocess a file using face restoration or upscaling, use the `!fix`
|
||||
command.
|
||||
|
||||
#### `!fix`
|
||||
|
||||
This command runs a post-processor on a previously-generated image. It takes a
|
||||
PNG filename or path and applies your choice of the `-U`, `-G`, or `--embiggen`
|
||||
switches in order to fix faces or upscale. If you provide a filename, the script
|
||||
will look for it in the current output directory. Otherwise you can provide a
|
||||
full or partial path to the desired file.
|
||||
|
||||
Some examples:
|
||||
|
||||
!!! example "Upscale to 4X its original size and fix faces using codeformer"
|
||||
|
||||
```bash
|
||||
invoke> !fix 0000045.4829112.png -G1 -U4 -ft codeformer
|
||||
```
|
||||
|
||||
!!! example "Use the GFPGAN algorithm to fix faces, then upscale to 3X using --embiggen"
|
||||
|
||||
```bash
|
||||
invoke> !fix 0000045.4829112.png -G0.8 -ft gfpgan
|
||||
>> fixing outputs/img-samples/0000045.4829112.png
|
||||
>> retrieved seed 4829112 and prompt "boy enjoying a banana split"
|
||||
>> GFPGAN - Restoring Faces for image seed:4829112
|
||||
Outputs:
|
||||
[1] outputs/img-samples/000017.4829112.gfpgan-00.png: !fix "outputs/img-samples/0000045.4829112.png" -s 50 -S -W 512 -H 512 -C 7.5 -A k_lms -G 0.8
|
||||
```
|
||||
|
||||
#### `!mask`
|
||||
|
||||
This command takes an image, a text prompt, and uses the `clipseg` algorithm to
|
||||
automatically generate a mask of the area that matches the text prompt. It is
|
||||
useful for debugging the text masking process prior to inpainting with the
|
||||
`--text_mask` argument. See [INPAINTING.md] for details.
|
||||
|
||||
### Model selection and importation
|
||||
|
||||
The CLI allows you to add new models on the fly, as well as to switch
|
||||
among them rapidly without leaving the script. There are several
|
||||
different model formats, each described in the [Model Installation
|
||||
Guide](../installation/050_INSTALLING_MODELS.md).
|
||||
|
||||
#### `!models`
|
||||
|
||||
This prints out a list of the models defined in `config/models.yaml'. The active
|
||||
model is bold-faced
|
||||
|
||||
Example:
|
||||
|
||||
<pre>
|
||||
inpainting-1.5 not loaded Stable Diffusion inpainting model
|
||||
<b>stable-diffusion-1.5 active Stable Diffusion v1.5</b>
|
||||
waifu-diffusion not loaded Waifu Diffusion v1.4
|
||||
</pre>
|
||||
|
||||
#### `!switch <model>`
|
||||
|
||||
This quickly switches from one model to another without leaving the CLI script.
|
||||
`invoke.py` uses a memory caching system; once a model has been loaded,
|
||||
switching back and forth is quick. The following example shows this in action.
|
||||
Note how the second column of the `!models` table changes to `cached` after a
|
||||
model is first loaded, and that the long initialization step is not needed when
|
||||
loading a cached model.
|
||||
|
||||
#### `!import_model <hugging_face_repo_ID>`
|
||||
|
||||
This imports and installs a `diffusers`-style model that is stored on
|
||||
the [HuggingFace Web Site](https://huggingface.co). You can look up
|
||||
any [Stable Diffusion diffusers
|
||||
model](https://huggingface.co/models?library=diffusers) and install it
|
||||
with a command like the following:
|
||||
|
||||
```bash
|
||||
!import_model prompthero/openjourney
|
||||
```
|
||||
|
||||
#### `!import_model <path/to/diffusers/directory>`
|
||||
|
||||
If you have a copy of a `diffusers`-style model saved to disk, you can
|
||||
import it by passing the path to model's top-level directory.
|
||||
|
||||
#### `!import_model <url>`
|
||||
|
||||
For a `.ckpt` or `.safetensors` file, if you have a direct download
|
||||
URL for the file, you can provide it to `!import_model` and the file
|
||||
will be downloaded and installed for you.
|
||||
|
||||
#### `!import_model <path/to/model/weights.ckpt>`
|
||||
|
||||
This command imports a new model weights file into InvokeAI, makes it available
|
||||
for image generation within the script, and writes out the configuration for the
|
||||
model into `config/models.yaml` for use in subsequent sessions.
|
||||
|
||||
Provide `!import_model` with the path to a weights file ending in `.ckpt`. If
|
||||
you type a partial path and press tab, the CLI will autocomplete. Although it
|
||||
will also autocomplete to `.vae` files, these are not currenty supported (but
|
||||
will be soon).
|
||||
|
||||
When you hit return, the CLI will prompt you to fill in additional information
|
||||
about the model, including the short name you wish to use for it with the
|
||||
`!switch` command, a brief description of the model, the default image width and
|
||||
height to use with this model, and the model's configuration file. The latter
|
||||
three fields are automatically filled with reasonable defaults. In the example
|
||||
below, the bold-faced text shows what the user typed in with the exception of
|
||||
the width, height and configuration file paths, which were filled in
|
||||
automatically.
|
||||
|
||||
#### `!import_model <path/to/directory_of_models>`
|
||||
|
||||
If you provide the path of a directory that contains one or more
|
||||
`.ckpt` or `.safetensors` files, the CLI will scan the directory and
|
||||
interactively offer to import the models it finds there. Also see the
|
||||
`--autoconvert` command-line option.
|
||||
|
||||
#### `!edit_model <name_of_model>`
|
||||
|
||||
The `!edit_model` command can be used to modify a model that is already defined
|
||||
in `config/models.yaml`. Call it with the short name of the model you wish to
|
||||
modify, and it will allow you to modify the model's `description`, `weights` and
|
||||
other fields.
|
||||
|
||||
Example:
|
||||
|
||||
<pre>
|
||||
invoke> <b>!edit_model waifu-diffusion</b>
|
||||
>> Editing model waifu-diffusion from configuration file ./configs/models.yaml
|
||||
description: <b>Waifu diffusion v1.4beta</b>
|
||||
weights: models/ldm/stable-diffusion-v1/<b>model-epoch10-float16.ckpt</b>
|
||||
config: configs/stable-diffusion/v1-inference.yaml
|
||||
width: 512
|
||||
height: 512
|
||||
|
||||
>> New configuration:
|
||||
waifu-diffusion:
|
||||
config: configs/stable-diffusion/v1-inference.yaml
|
||||
description: Waifu diffusion v1.4beta
|
||||
weights: models/ldm/stable-diffusion-v1/model-epoch10-float16.ckpt
|
||||
height: 512
|
||||
width: 512
|
||||
|
||||
OK to import [n]? y
|
||||
>> Caching model stable-diffusion-1.4 in system RAM
|
||||
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/model-epoch10-float16.ckpt
|
||||
...
|
||||
</pre>
|
||||
|
||||
### History processing
|
||||
|
||||
The CLI provides a series of convenient commands for reviewing previous actions,
|
||||
retrieving them, modifying them, and re-running them.
|
||||
|
||||
#### `!history`
|
||||
|
||||
The invoke script keeps track of all the commands you issue during a session,
|
||||
allowing you to re-run them. On Mac and Linux systems, it also writes the
|
||||
command-line history out to disk, giving you access to the most recent 1000
|
||||
commands issued.
|
||||
|
||||
The `!history` command will return a numbered list of all the commands issued
|
||||
during the session (Windows), or the most recent 1000 commands (Mac|Linux). You
|
||||
can then repeat a command by using the command `!NNN`, where "NNN" is the
|
||||
history line number. For example:
|
||||
|
||||
!!! example ""
|
||||
|
||||
```bash
|
||||
invoke> !history
|
||||
...
|
||||
[14] happy woman sitting under tree wearing broad hat and flowing garment
|
||||
[15] beautiful woman sitting under tree wearing broad hat and flowing garment
|
||||
[18] beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6
|
||||
[20] watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
|
||||
[21] surrealist painting of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
|
||||
...
|
||||
invoke> !20
|
||||
invoke> watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
|
||||
```
|
||||
|
||||
####`!fetch`
|
||||
|
||||
This command retrieves the generation parameters from a previously generated
|
||||
image and either loads them into the command line (Linux|Mac), or prints them
|
||||
out in a comment for copy-and-paste (Windows). You may provide either the name
|
||||
of a file in the current output directory, or a full file path. Specify path to
|
||||
a folder with image png files, and wildcard \*.png to retrieve the dream command
|
||||
used to generate the images, and save them to a file commands.txt for further
|
||||
processing.
|
||||
|
||||
!!! example "load the generation command for a single png file"
|
||||
|
||||
```bash
|
||||
invoke> !fetch 0000015.8929913.png
|
||||
# the script returns the next line, ready for editing and running:
|
||||
invoke> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
|
||||
```
|
||||
|
||||
!!! example "fetch the generation commands from a batch of files and store them into `selected.txt`"
|
||||
|
||||
```bash
|
||||
invoke> !fetch outputs\selected-imgs\*.png selected.txt
|
||||
```
|
||||
|
||||
#### `!replay`
|
||||
|
||||
This command replays a text file generated by !fetch or created manually
|
||||
|
||||
!!! example
|
||||
|
||||
```bash
|
||||
invoke> !replay outputs\selected-imgs\selected.txt
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
These commands may behave unexpectedly if given a PNG file that was
|
||||
not generated by InvokeAI.
|
||||
|
||||
#### `!search <search string>`
|
||||
|
||||
This is similar to !history but it only returns lines that contain
|
||||
`search string`. For example:
|
||||
|
||||
```bash
|
||||
invoke> !search surreal
|
||||
[21] surrealist painting of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
|
||||
```
|
||||
|
||||
#### `!clear`
|
||||
|
||||
This clears the search history from memory and disk. Be advised that this
|
||||
operation is irreversible and does not issue any warnings!
|
||||
|
||||
## Command-line editing and completion
|
||||
|
||||
The command-line offers convenient history tracking, editing, and command
|
||||
completion.
|
||||
|
||||
- To scroll through previous commands and potentially edit/reuse them, use the
|
||||
++up++ and ++down++ keys.
|
||||
- To edit the current command, use the ++left++ and ++right++ keys to position
|
||||
the cursor, and then ++backspace++, ++delete++ or insert characters.
|
||||
- To move to the very beginning of the command, type ++ctrl+a++ (or
|
||||
++command+a++ on the Mac)
|
||||
- To move to the end of the command, type ++ctrl+e++.
|
||||
- To cut a section of the command, position the cursor where you want to start
|
||||
cutting and type ++ctrl+k++
|
||||
- To paste a cut section back in, position the cursor where you want to paste,
|
||||
and type ++ctrl+y++
|
||||
|
||||
Windows users can get similar, but more limited, functionality if they launch
|
||||
`invoke.py` with the `winpty` program and have the `pyreadline3` library
|
||||
installed:
|
||||
|
||||
```batch
|
||||
> winpty python scripts\invoke.py
|
||||
```
|
||||
|
||||
On the Mac and Linux platforms, when you exit invoke.py, the last 1000 lines of
|
||||
your command-line history will be saved. When you restart `invoke.py`, you can
|
||||
access the saved history using the ++up++ key.
|
||||
|
||||
In addition, limited command-line completion is installed. In various contexts,
|
||||
you can start typing your command and press ++tab++. A list of potential
|
||||
completions will be presented to you. You can then type a little more, hit
|
||||
++tab++ again, and eventually autocomplete what you want.
|
||||
|
||||
When specifying file paths using the one-letter shortcuts, the CLI will attempt
|
||||
to complete pathnames for you. This is most handy for the `-I` (init image) and
|
||||
`-M` (init mask) paths. To initiate completion, start the path with a slash
|
||||
(`/`) or `./`. For example:
|
||||
|
||||
```bash
|
||||
invoke> zebra with a mustache -I./test-pictures<TAB>
|
||||
-I./test-pictures/Lincoln-and-Parrot.png -I./test-pictures/zebra.jpg -I./test-pictures/madonna.png
|
||||
-I./test-pictures/bad-sketch.png -I./test-pictures/man_with_eagle/
|
||||
```
|
||||
|
||||
You can then type ++z++, hit ++tab++ again, and it will autofill to `zebra.jpg`.
|
||||
|
||||
More text completion features (such as autocompleting seeds) are on their way.
|
||||
167
docs/deprecated/EMBIGGEN.md
Normal file
@@ -0,0 +1,167 @@
|
||||
---
|
||||
title: Embiggen
|
||||
---
|
||||
|
||||
# :material-loupe: Embiggen
|
||||
|
||||
**upscale your images on limited memory machines**
|
||||
|
||||
GFPGAN and Real-ESRGAN are both memory intensive. In order to avoid
|
||||
crashes and memory overloads during the Stable Diffusion process,
|
||||
these effects are applied after Stable Diffusion has completed its
|
||||
work.
|
||||
|
||||
In single image generations, you will see the output right away but
|
||||
when you are using multiple iterations, the images will first be
|
||||
generated and then upscaled and face restored after that process is
|
||||
complete. While the image generation is taking place, you will still
|
||||
be able to preview the base images.
|
||||
|
||||
If you wish to stop during the image generation but want to upscale or
|
||||
face restore a particular generated image, pass it again with the same
|
||||
prompt and generated seed along with the `-U` and `-G` prompt
|
||||
arguments to perform those actions.
|
||||
|
||||
## Embiggen
|
||||
|
||||
If you wanted to be able to do more (pixels) without running out of VRAM,
|
||||
or you want to upscale with details that couldn't possibly appear
|
||||
without the context of a prompt, this is the feature to try out.
|
||||
|
||||
Embiggen automates the process of taking an init image, upscaling it,
|
||||
cutting it into smaller tiles that slightly overlap, running all the
|
||||
tiles through img2img to refine details with respect to the prompt,
|
||||
and "stitching" the tiles back together into a cohesive image.
|
||||
|
||||
It automatically computes how many tiles are needed, and so it can be fed
|
||||
*ANY* size init image and perform Img2Img on it (though it will be run only
|
||||
one tile at a time, which can cause problems, see the Note at the end).
|
||||
|
||||
If you're familiar with "GoBig" (ala [progrock-stable](https://github.com/lowfuel/progrock-stable))
|
||||
it's similar to that, except it can work up to an arbitrarily large size
|
||||
(instead of just 2x), with tile overlaps configurable as a ratio, and
|
||||
has extra logic to re-run any number of the tile sub-sections of the image
|
||||
if for example a small part of a huge run got messed up.
|
||||
|
||||
### Usage
|
||||
|
||||
`-embiggen <scaling_factor> <esrgan_strength> <overlap_ratio OR overlap_pixels>`
|
||||
|
||||
Takes a scaling factor relative to the size of the `--init_img` (`-I`), followed by
|
||||
ESRGAN upscaling strength (0 - 1.0), followed by minimum amount of overlap
|
||||
between tiles as a decimal ratio (0 - 1.0) *OR* a number of pixels.
|
||||
|
||||
The scaling factor is how much larger than the `--init_img` the output
|
||||
should be, and will multiply both x and y axis, so an image that is a
|
||||
scaling factor of 3.0 has 3*3= 9 times as many pixels, and will take
|
||||
(at least) 9 times as long (see overlap for why it might be
|
||||
longer). If the `--init_img` is already the right size `-embiggen 1`,
|
||||
and it can also be less than one if the init_img is too big.
|
||||
|
||||
Esrgan_strength defaults to 0.75, and the overlap_ratio defaults to
|
||||
0.25, both are optional.
|
||||
|
||||
Unlike Img2Img, the `--width` (`-W`) and `--height` (`-H`) arguments
|
||||
do not control the size of the image as a whole, but the size of the
|
||||
tiles used to Embiggen the image.
|
||||
|
||||
ESRGAN is used to upscale the `--init_img` prior to cutting it into
|
||||
tiles/pieces to run through img2img and then stitch back
|
||||
together. Embiggen can be run without ESRGAN; just set the strength to
|
||||
zero (e.g. `-embiggen 1.75 0`). The output of Embiggen can also be
|
||||
upscaled after it's finished (`-U`).
|
||||
|
||||
The overlap is the minimum that tiles will overlap with adjacent
|
||||
tiles, specified as either a ratio or a number of pixels. How much the
|
||||
tiles overlap determines the likelihood the tiling will be noticable,
|
||||
really small overlaps (e.g. a couple of pixels) may produce noticeable
|
||||
grid-like fuzzy distortions in the final stitched image. Though, as
|
||||
the overlapping space doesn't contribute to making the image bigger,
|
||||
and the larger the overlap the more tiles (and the more time) it will
|
||||
take to finish.
|
||||
|
||||
Because the overlapping parts of tiles don't "contribute" to
|
||||
increasing size, every tile after the first in a row or column
|
||||
effectively only covers an extra `1 - overlap_ratio` on each axis. If
|
||||
the input/`--init_img` is same size as a tile, the ideal (for time)
|
||||
scaling factors with the default overlap (0.25) are 1.75, 2.5, 3.25,
|
||||
4.0, etc.
|
||||
|
||||
`-embiggen_tiles <spaced list of tiles>`
|
||||
|
||||
An advanced usage useful if you only want to alter parts of the image
|
||||
while running Embiggen. It takes a list of tiles by number to run and
|
||||
replace onto the initial image e.g. `1 3 5`. It's useful for either
|
||||
fixing problem spots from a previous Embiggen run, or selectively
|
||||
altering the prompt for sections of an image - for creative or
|
||||
coherency reasons.
|
||||
|
||||
Tiles are numbered starting with one, and left-to-right,
|
||||
top-to-bottom. So, if you are generating a 3x3 tiled image, the
|
||||
middle row would be `4 5 6`.
|
||||
|
||||
`-embiggen_strength <strength>`
|
||||
|
||||
Another advanced option if you want to experiment with the strength parameter
|
||||
that embiggen uses when it calls Img2Img. Values range from 0.0 to 1.0
|
||||
and lower values preserve more of the character of the initial image.
|
||||
Values that are too high will result in a completely different end image,
|
||||
while values that are too low will result in an image not dissimilar to one
|
||||
you would get with ESRGAN upscaling alone. The default value is 0.4.
|
||||
|
||||
### Examples
|
||||
|
||||
!!! example ""
|
||||
|
||||
Running Embiggen with 512x512 tiles on an existing image, scaling up by a factor of 2.5x;
|
||||
and doing the same again (default ESRGAN strength is 0.75, default overlap between tiles is 0.25):
|
||||
|
||||
```bash
|
||||
invoke > a photo of a forest at sunset -s 100 -W 512 -H 512 -I outputs/forest.png -f 0.4 -embiggen 2.5
|
||||
invoke > a photo of a forest at sunset -s 100 -W 512 -H 512 -I outputs/forest.png -f 0.4 -embiggen 2.5 0.75 0.25
|
||||
```
|
||||
|
||||
If your starting image was also 512x512 this should have taken 9 tiles.
|
||||
|
||||
!!! example ""
|
||||
|
||||
If there weren't enough clouds in the sky of that forest you just made
|
||||
(and that image is about 1280 pixels (512*2.5) wide A.K.A. three
|
||||
512x512 tiles with 0.25 overlaps wide) we can replace that top row of
|
||||
tiles:
|
||||
|
||||
```bash
|
||||
invoke> a photo of puffy clouds over a forest at sunset -s 100 -W 512 -H 512 -I outputs/000002.seed.png -f 0.5 -embiggen_tiles 1 2 3
|
||||
```
|
||||
|
||||
## Fixing Previously-Generated Images
|
||||
|
||||
It is easy to apply embiggen to any previously-generated file without having to
|
||||
look up the original prompt and provide an initial image. Just use the
|
||||
syntax `!fix path/to/file.png <embiggen>`. For example, you can rewrite the
|
||||
previous command to look like this:
|
||||
|
||||
```bash
|
||||
invoke> !fix ./outputs/000002.seed.png -embiggen_tiles 1 2 3
|
||||
```
|
||||
|
||||
A new file named `000002.seed.fixed.png` will be created in the output directory. Note that
|
||||
the `!fix` command does not replace the original file, unlike the behavior at generate time.
|
||||
You do not need to provide the prompt, and `!fix` automatically selects a good strength for
|
||||
embiggen-ing.
|
||||
|
||||
!!! note
|
||||
|
||||
Because the same prompt is used on all the tiled images, and the model
|
||||
doesn't have the context of anything outside the tile being run - it
|
||||
can end up creating repeated pattern (also called 'motifs') across all
|
||||
the tiles based on that prompt. The best way to combat this is
|
||||
lowering the `--strength` (`-f`) to stay more true to the init image,
|
||||
and increasing the number of steps so there is more compute-time to
|
||||
create the detail. Anecdotally `--strength` 0.35-0.45 works pretty
|
||||
well on most things. It may also work great in some examples even with
|
||||
the `--strength` set high for patterns, landscapes, or subjects that
|
||||
are more abstract. Because this is (relatively) fast, you can also
|
||||
preserve the best parts from each.
|
||||
|
||||
Author: [Travco](https://github.com/travco)
|
||||
310
docs/deprecated/INPAINTING.md
Normal file
@@ -0,0 +1,310 @@
|
||||
---
|
||||
title: Inpainting
|
||||
---
|
||||
|
||||
# :octicons-paintbrush-16: Inpainting
|
||||
|
||||
## **Creating Transparent Regions for Inpainting**
|
||||
|
||||
Inpainting is really cool. To do it, you start with an initial image and use a
|
||||
photoeditor to make one or more regions transparent (i.e. they have a "hole" in
|
||||
them). You then provide the path to this image at the dream> command line using
|
||||
the `-I` switch. Stable Diffusion will only paint within the transparent region.
|
||||
|
||||
There's a catch. In the current implementation, you have to prepare the initial
|
||||
image correctly so that the underlying colors are preserved under the
|
||||
transparent area. Many imaging editing applications will by default erase the
|
||||
color information under the transparent pixels and replace them with white or
|
||||
black, which will lead to suboptimal inpainting. It often helps to apply
|
||||
incomplete transparency, such as any value between 1 and 99%
|
||||
|
||||
You also must take care to export the PNG file in such a way that the color
|
||||
information is preserved. There is often an option in the export dialog that
|
||||
lets you specify this.
|
||||
|
||||
If your photoeditor is erasing the underlying color information, `dream.py` will
|
||||
give you a big fat warning. If you can't find a way to coax your photoeditor to
|
||||
retain color values under transparent areas, then you can combine the `-I` and
|
||||
`-M` switches to provide both the original unedited image and the masked
|
||||
(partially transparent) image:
|
||||
|
||||
```bash
|
||||
invoke> "man with cat on shoulder" -I./images/man.png -M./images/man-transparent.png
|
||||
```
|
||||
|
||||
## **Masking using Text**
|
||||
|
||||
You can also create a mask using a text prompt to select the part of the image
|
||||
you want to alter, using the [clipseg](https://github.com/timojl/clipseg)
|
||||
algorithm. This works on any image, not just ones generated by InvokeAI.
|
||||
|
||||
The `--text_mask` (short form `-tm`) option takes two arguments. The first
|
||||
argument is a text description of the part of the image you wish to mask (paint
|
||||
over). If the text description contains a space, you must surround it with
|
||||
quotation marks. The optional second argument is the minimum threshold for the
|
||||
mask classifier's confidence score, described in more detail below.
|
||||
|
||||
To see how this works in practice, here's an image of a still life painting that
|
||||
I got off the web.
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
You can selectively mask out the orange and replace it with a baseball in this
|
||||
way:
|
||||
|
||||
```bash
|
||||
invoke> a baseball -I /path/to/still_life.png -tm orange
|
||||
```
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
The clipseg classifier produces a confidence score for each region it
|
||||
identifies. Generally regions that score above 0.5 are reliable, but if you are
|
||||
getting too much or too little masking you can adjust the threshold down (to get
|
||||
more mask), or up (to get less). In this example, by passing `-tm` a higher
|
||||
value, we are insisting on a tigher mask. However, if you make it too high, the
|
||||
orange may not be picked up at all!
|
||||
|
||||
```bash
|
||||
invoke> a baseball -I /path/to/breakfast.png -tm orange 0.6
|
||||
```
|
||||
|
||||
The `!mask` command may be useful for debugging problems with the text2mask
|
||||
feature. The syntax is `!mask /path/to/image.png -tm <text> <threshold>`
|
||||
|
||||
It will generate three files:
|
||||
|
||||
- The image with the selected area highlighted.
|
||||
- it will be named XXXXX.<imagename>.<prompt>.selected.png
|
||||
- The image with the un-selected area highlighted.
|
||||
- it will be named XXXXX.<imagename>.<prompt>.deselected.png
|
||||
- The image with the selected area converted into a black and white image
|
||||
according to the threshold level
|
||||
- it will be named XXXXX.<imagename>.<prompt>.masked.png
|
||||
|
||||
The `.masked.png` file can then be directly passed to the `invoke>` prompt in
|
||||
the CLI via the `-M` argument. Do not attempt this with the `selected.png` or
|
||||
`deselected.png` files, as they contain some transparency throughout the image
|
||||
and will not produce the desired results.
|
||||
|
||||
Here is an example of how `!mask` works:
|
||||
|
||||
```bash
|
||||
invoke> !mask ./test-pictures/curly.png -tm hair 0.5
|
||||
>> generating masks from ./test-pictures/curly.png
|
||||
>> Initializing clipseg model for text to mask inference
|
||||
Outputs:
|
||||
[941.1] outputs/img-samples/000019.curly.hair.deselected.png: !mask ./test-pictures/curly.png -tm hair 0.5
|
||||
[941.2] outputs/img-samples/000019.curly.hair.selected.png: !mask ./test-pictures/curly.png -tm hair 0.5
|
||||
[941.3] outputs/img-samples/000019.curly.hair.masked.png: !mask ./test-pictures/curly.png -tm hair 0.5
|
||||
```
|
||||
|
||||
<figure markdown>
|
||||

|
||||
<figcaption>Original image "curly.png"</figcaption>
|
||||
</figure>
|
||||
|
||||
<figure markdown>
|
||||

|
||||
<figcaption>000019.curly.hair.selected.png</figcaption>
|
||||
</figure>
|
||||
|
||||
<figure markdown>
|
||||

|
||||
<figcaption>000019.curly.hair.deselected.png</figcaption>
|
||||
</figure>
|
||||
|
||||
<figure markdown>
|
||||

|
||||
<figcaption>000019.curly.hair.masked.png</figcaption>
|
||||
</figure>
|
||||
|
||||
It looks like we selected the hair pretty well at the 0.5 threshold (which is
|
||||
the default, so we didn't actually have to specify it), so let's have some fun:
|
||||
|
||||
```bash
|
||||
invoke> medusa with cobras -I ./test-pictures/curly.png -M 000019.curly.hair.masked.png -C20
|
||||
>> loaded input image of size 512x512 from ./test-pictures/curly.png
|
||||
...
|
||||
Outputs:
|
||||
[946] outputs/img-samples/000024.801380492.png: "medusa with cobras" -s 50 -S 801380492 -W 512 -H 512 -C 20.0 -I ./test-pictures/curly.png -A k_lms -f 0.75
|
||||
```
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
You can also skip the `!mask` creation step and just select the masked
|
||||
|
||||
region directly:
|
||||
|
||||
```bash
|
||||
invoke> medusa with cobras -I ./test-pictures/curly.png -tm hair -C20
|
||||
```
|
||||
|
||||
## Using the RunwayML inpainting model
|
||||
|
||||
The
|
||||
[RunwayML Inpainting Model v1.5](https://huggingface.co/runwayml/stable-diffusion-inpainting)
|
||||
is a specialized version of
|
||||
[Stable Diffusion v1.5](https://huggingface.co/spaces/runwayml/stable-diffusion-v1-5)
|
||||
that contains extra channels specifically designed to enhance inpainting and
|
||||
outpainting. While it can do regular `txt2img` and `img2img`, it really shines
|
||||
when filling in missing regions. It has an almost uncanny ability to blend the
|
||||
new regions with existing ones in a semantically coherent way.
|
||||
|
||||
To install the inpainting model, follow the
|
||||
[instructions](../installation/050_INSTALLING_MODELS.md) for installing a new model.
|
||||
You may use either the CLI (`invoke.py` script) or directly edit the
|
||||
`configs/models.yaml` configuration file to do this. The main thing to watch out
|
||||
for is that the the model `config` option must be set up to use
|
||||
`v1-inpainting-inference.yaml` rather than the `v1-inference.yaml` file that is
|
||||
used by Stable Diffusion 1.4 and 1.5.
|
||||
|
||||
After installation, your `models.yaml` should contain an entry that looks like
|
||||
this one:
|
||||
|
||||
```yml
|
||||
inpainting-1.5:
|
||||
weights: models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt
|
||||
description: SD inpainting v1.5
|
||||
config: configs/stable-diffusion/v1-inpainting-inference.yaml
|
||||
vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
width: 512
|
||||
height: 512
|
||||
```
|
||||
|
||||
As shown in the example, you may include a VAE fine-tuning weights file as well.
|
||||
This is strongly recommended.
|
||||
|
||||
To use the custom inpainting model, launch `invoke.py` with the argument
|
||||
`--model inpainting-1.5` or alternatively from within the script use the
|
||||
`!switch inpainting-1.5` command to load and switch to the inpainting model.
|
||||
|
||||
You can now do inpainting and outpainting exactly as described above, but there
|
||||
will (likely) be a noticeable improvement in coherence. Txt2img and Img2img will
|
||||
work as well.
|
||||
|
||||
There are a few caveats to be aware of:
|
||||
|
||||
1. The inpainting model is larger than the standard model, and will use nearly 4
|
||||
GB of GPU VRAM. This makes it unlikely to run on a 4 GB graphics card.
|
||||
|
||||
2. When operating in Img2img mode, the inpainting model is much less steerable
|
||||
than the standard model. It is great for making small changes, such as
|
||||
changing the pattern of a fabric, or slightly changing a subject's expression
|
||||
or hair, but the model will resist making the dramatic alterations that the
|
||||
standard model lets you do.
|
||||
|
||||
3. While the `--hires` option works fine with the inpainting model, some special
|
||||
features, such as `--embiggen` are disabled.
|
||||
|
||||
4. Prompt weighting (`banana++ sushi`) and merging work well with the inpainting
|
||||
model, but prompt swapping
|
||||
(`a ("fluffy cat").swap("smiling dog") eating a hotdog`) will not have any
|
||||
effect due to the way the model is set up. You may use text masking (with
|
||||
`-tm thing-to-mask`) as an effective replacement.
|
||||
|
||||
5. The model tends to oversharpen image if you use high step or CFG values. If
|
||||
you need to do large steps, use the standard model.
|
||||
|
||||
6. The `--strength` (`-f`) option has no effect on the inpainting model due to
|
||||
its fundamental differences with the standard model. It will always take the
|
||||
full number of steps you specify.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
Here are some troubleshooting tips for inpainting and outpainting.
|
||||
|
||||
## Inpainting is not changing the masked region enough!
|
||||
|
||||
One of the things to understand about how inpainting works is that it is
|
||||
equivalent to running img2img on just the masked (transparent) area. img2img
|
||||
builds on top of the existing image data, and therefore will attempt to preserve
|
||||
colors, shapes and textures to the best of its ability. Unfortunately this means
|
||||
that if you want to make a dramatic change in the inpainted region, for example
|
||||
replacing a red wall with a blue one, the algorithm will fight you.
|
||||
|
||||
You have a couple of options. The first is to increase the values of the
|
||||
requested steps (`-sXXX`), strength (`-f0.XX`), and/or condition-free guidance
|
||||
(`-CXX.X`). If this is not working for you, a more extreme step is to provide
|
||||
the `--inpaint_replace 0.X` (`-r0.X`) option. This value ranges from 0.0 to 1.0.
|
||||
The higher it is the less attention the algorithm will pay to the data
|
||||
underneath the masked region. At high values this will enable you to replace
|
||||
colored regions entirely, but beware that the masked region mayl not blend in
|
||||
with the surrounding unmasked regions as well.
|
||||
|
||||
---
|
||||
|
||||
## Recipe for GIMP
|
||||
|
||||
[GIMP](https://www.gimp.org/) is a popular Linux photoediting tool.
|
||||
|
||||
1. Open image in GIMP.
|
||||
2. Layer->Transparency->Add Alpha Channel
|
||||
3. Use lasso tool to select region to mask
|
||||
4. Choose Select -> Float to create a floating selection
|
||||
5. Open the Layers toolbar (^L) and select "Floating Selection"
|
||||
6. Set opacity to a value between 0% and 99%
|
||||
7. Export as PNG
|
||||
8. In the export dialogue, Make sure the "Save colour values from transparent
|
||||
pixels" checkbox is selected.
|
||||
|
||||
---
|
||||
|
||||
## Recipe for Adobe Photoshop
|
||||
|
||||
1. Open image in Photoshop
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
2. Use any of the selection tools (Marquee, Lasso, or Wand) to select the area
|
||||
you desire to inpaint.
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
3. Because we'll be applying a mask over the area we want to preserve, you
|
||||
should now select the inverse by using the ++shift+ctrl+i++ shortcut, or
|
||||
right clicking and using the "Select Inverse" option.
|
||||
|
||||
4. You'll now create a mask by selecting the image layer, and Masking the
|
||||
selection. Make sure that you don't delete any of the underlying image, or
|
||||
your inpainting results will be dramatically impacted.
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
5. Make sure to hide any background layers that are present. You should see the
|
||||
mask applied to your image layer, and the image on your canvas should display
|
||||
the checkered background.
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
6. Save the image as a transparent PNG by using `File`-->`Save a Copy` from the
|
||||
menu bar, or by using the keyboard shortcut ++alt+ctrl+s++
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
7. After following the inpainting instructions above (either through the CLI or
|
||||
the Web UI), marvel at your newfound ability to selectively invoke. Lookin'
|
||||
good!
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
8. In the export dialogue, Make sure the "Save colour values from transparent
|
||||
pixels" checkbox is selected.
|
||||
171
docs/deprecated/OUTPAINTING.md
Normal file
@@ -0,0 +1,171 @@
|
||||
---
|
||||
title: Outpainting
|
||||
---
|
||||
|
||||
# :octicons-paintbrush-16: Outpainting
|
||||
|
||||
## Outpainting and outcropping
|
||||
|
||||
Outpainting is a process by which the AI generates parts of the image that are
|
||||
outside its original frame. It can be used to fix up images in which the subject
|
||||
is off center, or when some detail (often the top of someone's head!) is cut
|
||||
off.
|
||||
|
||||
InvokeAI supports two versions of outpainting, one called "outpaint" and the
|
||||
other "outcrop." They work slightly differently and each has its advantages and
|
||||
drawbacks.
|
||||
|
||||
### Outpainting
|
||||
|
||||
Outpainting is the same as inpainting, except that the painting occurs in the
|
||||
regions outside of the original image. To outpaint using the `invoke.py` command
|
||||
line script, prepare an image in which the borders to be extended are pure
|
||||
black. Add an alpha channel (if there isn't one already), and make the borders
|
||||
completely transparent and the interior completely opaque. If you wish to modify
|
||||
the interior as well, you may create transparent holes in the transparency
|
||||
layer, which `img2img` will paint into as usual.
|
||||
|
||||
Pass the image as the argument to the `-I` switch as you would for regular
|
||||
inpainting:
|
||||
|
||||
```bash
|
||||
invoke> a stream by a river -I /path/to/transparent_img.png
|
||||
```
|
||||
|
||||
You'll likely be delighted by the results.
|
||||
|
||||
### Tips
|
||||
|
||||
1. Do not try to expand the image too much at once. Generally it is best to
|
||||
expand the margins in 64-pixel increments. 128 pixels often works, but your
|
||||
mileage may vary depending on the nature of the image you are trying to
|
||||
outpaint into.
|
||||
|
||||
2. There are a series of switches that can be used to adjust how the inpainting
|
||||
algorithm operates. In particular, you can use these to minimize the seam
|
||||
that sometimes appears between the original image and the extended part.
|
||||
These switches are:
|
||||
|
||||
| switch | default | description |
|
||||
| -------------------------- | ------- | ---------------------------------------------------------------------- |
|
||||
| `--seam_size SEAM_SIZE ` | `0` | Size of the mask around the seam between original and outpainted image |
|
||||
| `--seam_blur SEAM_BLUR` | `0` | The amount to blur the seam inwards |
|
||||
| `--seam_strength STRENGTH` | `0.7` | The img2img strength to use when filling the seam |
|
||||
| `--seam_steps SEAM_STEPS` | `10` | The number of steps to use to fill the seam. |
|
||||
| `--tile_size TILE_SIZE` | `32` | The tile size to use for filling outpaint areas |
|
||||
|
||||
### Outcrop
|
||||
|
||||
The `outcrop` extension gives you a convenient `!fix` postprocessing command
|
||||
that allows you to extend a previously-generated image in 64 pixel increments in
|
||||
any direction. You can apply the module to any image previously-generated by
|
||||
InvokeAI. Note that it works with arbitrary PNG photographs, but not currently
|
||||
with JPG or other formats. Outcropping is particularly effective when combined
|
||||
with the
|
||||
[runwayML custom inpainting model](INPAINTING.md#using-the-runwayml-inpainting-model).
|
||||
|
||||
Consider this image:
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
Pretty nice, but it's annoying that the top of her head is cut off. She's also a
|
||||
bit off center. Let's fix that!
|
||||
|
||||
```bash
|
||||
invoke> !fix images/curly.png --outcrop top 128 right 64 bottom 64
|
||||
```
|
||||
|
||||
This is saying to apply the `outcrop` extension by extending the top of the
|
||||
image by 128 pixels, and the right and bottom of the image by 64 pixels. You can
|
||||
use any combination of top|left|right|bottom, and specify any number of pixels
|
||||
to extend. You can also abbreviate `--outcrop` to `-c`.
|
||||
|
||||
The result looks like this:
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
The new image is larger than the original (576x704) because 64 pixels were added
|
||||
to the top and right sides. You will need enough VRAM to process an image of
|
||||
this size.
|
||||
|
||||
#### Outcropping non-InvokeAI images
|
||||
|
||||
You can outcrop an arbitrary image that was not generated by InvokeAI,
|
||||
but your results will vary. The `inpainting-1.5` model is highly
|
||||
recommended, but if not feasible, then you may be able to improve the
|
||||
output by conditioning the outcropping with a text prompt that
|
||||
describes the scene using the `--new_prompt` argument:
|
||||
|
||||
```bash
|
||||
invoke> !fix images/vacation.png --outcrop top 128 --new_prompt "family vacation"
|
||||
```
|
||||
|
||||
You may also provide a different seed for outcropping to use by passing
|
||||
`-S<seed>`. A negative seed will generate a new random seed.
|
||||
|
||||
A number of caveats:
|
||||
|
||||
1. Although you can specify any pixel values, they will be rounded up to the
|
||||
nearest multiple of 64. Smaller values are better. Larger extensions are more
|
||||
likely to generate artefacts. However, if you wish you can run the !fix
|
||||
command repeatedly to cautiously expand the image.
|
||||
|
||||
2. The extension is stochastic, meaning that each time you run it you'll get a
|
||||
slightly different result. You can run it repeatedly until you get an image
|
||||
you like. Unfortunately `!fix` does not currently respect the `-n`
|
||||
(`--iterations`) argument.
|
||||
|
||||
3. Your results will be _much_ better if you use the `inpaint-1.5` model
|
||||
released by runwayML and installed by default by `invokeai-configure`.
|
||||
This model was trained specifically to harmoniously fill in image gaps. The
|
||||
standard model will work as well, but you may notice color discontinuities at
|
||||
the border.
|
||||
|
||||
4. When using the `inpaint-1.5` model, you may notice subtle changes to the area
|
||||
outside the masked region. This is because the model performs an
|
||||
encoding/decoding on the image as a whole. This does not occur with the
|
||||
standard model.
|
||||
|
||||
## Outpaint
|
||||
|
||||
The `outpaint` extension does the same thing, but with subtle differences.
|
||||
Starting with the same image, here is how we would add an additional 64 pixels
|
||||
to the top of the image:
|
||||
|
||||
```bash
|
||||
invoke> !fix images/curly.png --out_direction top 64
|
||||
```
|
||||
|
||||
(you can abbreviate `--out_direction` as `-D`.
|
||||
|
||||
The result is shown here:
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
Although the effect is similar, there are significant differences from
|
||||
outcropping:
|
||||
|
||||
- You can only specify one direction to extend at a time.
|
||||
- The image is **not** resized. Instead, the image is shifted by the specified
|
||||
number of pixels. If you look carefully, you'll see that less of the lady's
|
||||
torso is visible in the image.
|
||||
- Because the image dimensions remain the same, there's no rounding to multiples
|
||||
of 64.
|
||||
- Attempting to outpaint larger areas will frequently give rise to ugly ghosting
|
||||
effects.
|
||||
- For best results, try increasing the step number.
|
||||
- If you don't specify a pixel value in `-D`, it will default to half of the
|
||||
whole image, which is likely not what you want.
|
||||
|
||||
!!! tip
|
||||
|
||||
Neither `outpaint` nor `outcrop` are perfect, but we continue to tune
|
||||
and improve them. If one doesn't work, try the other. You may also
|
||||
wish to experiment with other `img2img` arguments, such as `-C`, `-f`
|
||||
and `-s`.
|
||||
260
docs/faq.md
@@ -1,260 +0,0 @@
|
||||
# FAQ
|
||||
|
||||
!!! info "How to Reinstall"
|
||||
|
||||
Many issues can be resolved by re-installing the application. You won't lose any data by re-installing. We suggest downloading the [latest release](https://github.com/invoke-ai/InvokeAI/releases/latest) and using it to re-install the application. Consult the [installer guide](./installation/installer.md) for more information.
|
||||
|
||||
When you run the installer, you'll have an option to select the version to install. If you aren't ready to upgrade, you choose the current version to fix a broken install.
|
||||
|
||||
If the troubleshooting steps on this page don't get you up and running, please either [create an issue] or hop on [discord] for help.
|
||||
|
||||
## How to Install
|
||||
|
||||
You can download the latest installers [here](https://github.com/invoke-ai/InvokeAI/releases).
|
||||
|
||||
Note that any releases marked as _pre-release_ are in a beta state. You may experience some issues, but we appreciate your help testing those! For stable/reliable installations, please install the [latest release].
|
||||
|
||||
## Downloading models and using existing models
|
||||
|
||||
The Model Manager tab in the UI provides a few ways to install models, including using your already-downloaded models. You'll see a popup directing you there on first startup. For more information, see the [model install docs].
|
||||
|
||||
## Missing models after updating to v4
|
||||
|
||||
If you find some models are missing after updating to v4, it's likely they weren't correctly registered before the update and didn't get picked up in the migration.
|
||||
|
||||
You can use the `Scan Folder` tab in the Model Manager UI to fix this. The models will either be in the old, now-unused `autoimport` folder, or your `models` folder.
|
||||
|
||||
- Find and copy your install's old `autoimport` folder path, install the main install folder.
|
||||
- Go to the Model Manager and click `Scan Folder`.
|
||||
- Paste the path and scan.
|
||||
- IMPORTANT: Uncheck `Inplace install`.
|
||||
- Click `Install All` to install all found models, or just install the models you want.
|
||||
|
||||
Next, find and copy your install's `models` folder path (this could be your custom models folder path, or the `models` folder inside the main install folder).
|
||||
|
||||
Follow the same steps to scan and import the missing models.
|
||||
|
||||
## Slow generation
|
||||
|
||||
- Check the [system requirements] to ensure that your system is capable of generating images.
|
||||
- Check the `ram` setting in `invokeai.yaml`. This setting tells Invoke how much of your system RAM can be used to cache models. Having this too high or too low can slow things down. That said, it's generally safest to not set this at all and instead let Invoke manage it.
|
||||
- Check the `vram` setting in `invokeai.yaml`. This setting tells Invoke how much of your GPU VRAM can be used to cache models. Counter-intuitively, if this setting is too high, Invoke will need to do a lot of shuffling of models as it juggles the VRAM cache and the currently-loaded model. The default value of 0.25 is generally works well for GPUs without 16GB or more VRAM. Even on a 24GB card, the default works well.
|
||||
- Check that your generations are happening on your GPU (if you have one). InvokeAI will log what is being used for generation upon startup. If your GPU isn't used, re-install to ensure the correct versions of torch get installed.
|
||||
- If you are on Windows, you may have exceeded your GPU's VRAM capacity and are using slower [shared GPU memory](#shared-gpu-memory-windows). There's a guide to opt out of this behaviour in the linked FAQ entry.
|
||||
|
||||
## Shared GPU Memory (Windows)
|
||||
|
||||
!!! tip "Nvidia GPUs with driver 536.40"
|
||||
|
||||
This only applies to current Nvidia cards with driver 536.40 or later, released in June 2023.
|
||||
|
||||
When the GPU doesn't have enough VRAM for a task, Windows is able to allocate some of its CPU RAM to the GPU. This is much slower than VRAM, but it does allow the system to generate when it otherwise might no have enough VRAM.
|
||||
|
||||
When shared GPU memory is used, generation slows down dramatically - but at least it doesn't crash.
|
||||
|
||||
If you'd like to opt out of this behavior and instead get an error when you exceed your GPU's VRAM, follow [this guide from Nvidia](https://nvidia.custhelp.com/app/answers/detail/a_id/5490).
|
||||
|
||||
Here's how to get the python path required in the linked guide:
|
||||
|
||||
- Run `invoke.bat`.
|
||||
- Select option 2 for developer console.
|
||||
- At least one python path will be printed. Copy the path that includes your invoke installation directory (typically the first).
|
||||
|
||||
## Installer cannot find python (Windows)
|
||||
|
||||
Ensure that you checked **Add python.exe to PATH** when installing Python. This can be found at the bottom of the Python Installer window. If you already have Python installed, you can re-run the python installer, choose the Modify option and check the box.
|
||||
|
||||
## Triton error on startup
|
||||
|
||||
This can be safely ignored. InvokeAI doesn't use Triton, but if you are on Linux and wish to dismiss the error, you can install Triton.
|
||||
|
||||
## Updated to 3.4.0 and xformers can’t load C++/CUDA
|
||||
|
||||
An issue occurred with your PyTorch update. Follow these steps to fix :
|
||||
|
||||
1. Launch your invoke.bat / invoke.sh and select the option to open the developer console
|
||||
2. Run:`pip install ".[xformers]" --upgrade --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu121`
|
||||
- If you run into an error with `typing_extensions`, re-open the developer console and run: `pip install -U typing-extensions`
|
||||
|
||||
Note that v3.4.0 is an old, unsupported version. Please upgrade to the [latest release].
|
||||
|
||||
## Install failed and says `pip` is out of date
|
||||
|
||||
An out of date `pip` typically won't cause an installation to fail. The cause of the error can likely be found above the message that says `pip` is out of date.
|
||||
|
||||
If you saw that warning but the install went well, don't worry about it (but you can update `pip` afterwards if you'd like).
|
||||
|
||||
## Replicate image found online
|
||||
|
||||
Most example images with prompts that you'll find on the internet have been generated using different software, so you can't expect to get identical results. In order to reproduce an image, you need to replicate the exact settings and processing steps, including (but not limited to) the model, the positive and negative prompts, the seed, the sampler, the exact image size, any upscaling steps, etc.
|
||||
|
||||
## OSErrors on Windows while installing dependencies
|
||||
|
||||
During a zip file installation or an update, installation stops with an error like this:
|
||||
|
||||
{:width="800px"}
|
||||
|
||||
To resolve this, re-install the application as described above.
|
||||
|
||||
## HuggingFace install failed due to invalid access token
|
||||
|
||||
Some HuggingFace models require you to authenticate using an [access token].
|
||||
|
||||
Invoke doesn't manage this token for you, but it's easy to set it up:
|
||||
|
||||
- Follow the instructions in the link above to create an access token. Copy it.
|
||||
- Run the launcher script.
|
||||
- Select option 2 (developer console).
|
||||
- Paste the following command:
|
||||
|
||||
```sh
|
||||
python -c "import huggingface_hub; huggingface_hub.login()"
|
||||
```
|
||||
|
||||
- Paste your access token when prompted and press Enter. You won't see anything when you paste it.
|
||||
- Type `n` if prompted about git credentials.
|
||||
|
||||
If you get an error, try the command again - maybe the token didn't paste correctly.
|
||||
|
||||
Once your token is set, start Invoke and try downloading the model again. The installer will automatically use the access token.
|
||||
|
||||
If the install still fails, you may not have access to the model.
|
||||
|
||||
## Stable Diffusion XL generation fails after trying to load UNet
|
||||
|
||||
InvokeAI is working in other respects, but when trying to generate
|
||||
images with Stable Diffusion XL you get a "Server Error". The text log
|
||||
in the launch window contains this log line above several more lines of
|
||||
error messages:
|
||||
|
||||
`INFO --> Loading model:D:\LONG\PATH\TO\MODEL, type sdxl:main:unet`
|
||||
|
||||
This failure mode occurs when there is a network glitch during
|
||||
downloading the very large SDXL model.
|
||||
|
||||
To address this, first go to the Model Manager and delete the
|
||||
Stable-Diffusion-XL-base-1.X model. Then, click the HuggingFace tab,
|
||||
paste the Repo ID stabilityai/stable-diffusion-xl-base-1.0 and install
|
||||
the model.
|
||||
|
||||
## Package dependency conflicts during installation or update
|
||||
|
||||
If you have previously installed InvokeAI or another Stable Diffusion
|
||||
package, the installer may occasionally pick up outdated libraries and
|
||||
either the installer or `invoke` will fail with complaints about
|
||||
library conflicts.
|
||||
|
||||
To resolve this, re-install the application as described above.
|
||||
|
||||
## Invalid configuration file
|
||||
|
||||
Everything seems to install ok, you get a `ValidationError` when starting up the app.
|
||||
|
||||
This is caused by an invalid setting in the `invokeai.yaml` configuration file. The error message should tell you what is wrong.
|
||||
|
||||
Check the [configuration docs] for more detail about the settings and how to specify them.
|
||||
|
||||
## `ModuleNotFoundError: No module named 'controlnet_aux'`
|
||||
|
||||
`controlnet_aux` is a dependency of Invoke and appears to have been packaged or distributed strangely. Sometimes, it doesn't install correctly. This is outside our control.
|
||||
|
||||
If you encounter this error, the solution is to remove the package from the `pip` cache and re-run the Invoke installer so a fresh, working version of `controlnet_aux` can be downloaded and installed:
|
||||
|
||||
- Run the Invoke launcher
|
||||
- Choose the developer console option
|
||||
- Run this command: `pip cache remove controlnet_aux`
|
||||
- Close the terminal window
|
||||
- Download and run the [installer][latest release], selecting your current install location
|
||||
|
||||
## Out of Memory Issues
|
||||
|
||||
The models are large, VRAM is expensive, and you may find yourself
|
||||
faced with Out of Memory errors when generating images. Here are some
|
||||
tips to reduce the problem:
|
||||
|
||||
!!! info "Optimizing for GPU VRAM"
|
||||
|
||||
=== "4GB VRAM GPU"
|
||||
|
||||
This should be adequate for 512x512 pixel images using Stable Diffusion 1.5
|
||||
and derived models, provided that you do not use the NSFW checker. It won't be loaded unless you go into the UI settings and turn it on.
|
||||
|
||||
If you are on a CUDA-enabled GPU, we will automatically use xformers or torch-sdp to reduce VRAM requirements, though you can explicitly configure this. See the [configuration docs].
|
||||
|
||||
=== "6GB VRAM GPU"
|
||||
|
||||
This is a border case. Using the SD 1.5 series you should be able to
|
||||
generate images up to 640x640 with the NSFW checker enabled, and up to
|
||||
1024x1024 with it disabled.
|
||||
|
||||
If you run into persistent memory issues there are a series of
|
||||
environment variables that you can set before launching InvokeAI that
|
||||
alter how the PyTorch machine learning library manages memory. See
|
||||
<https://pytorch.org/docs/stable/notes/cuda.html#memory-management> for
|
||||
a list of these tweaks.
|
||||
|
||||
=== "12GB VRAM GPU"
|
||||
|
||||
This should be sufficient to generate larger images up to about 1280x1280.
|
||||
|
||||
## Checkpoint Models Load Slowly or Use Too Much RAM
|
||||
|
||||
The difference between diffusers models (a folder containing multiple
|
||||
subfolders) and checkpoint models (a file ending with .safetensors or
|
||||
.ckpt) is that InvokeAI is able to load diffusers models into memory
|
||||
incrementally, while checkpoint models must be loaded all at
|
||||
once. With very large models, or systems with limited RAM, you may
|
||||
experience slowdowns and other memory-related issues when loading
|
||||
checkpoint models.
|
||||
|
||||
To solve this, go to the Model Manager tab (the cube), select the
|
||||
checkpoint model that's giving you trouble, and press the "Convert"
|
||||
button in the upper right of your browser window. This will conver the
|
||||
checkpoint into a diffusers model, after which loading should be
|
||||
faster and less memory-intensive.
|
||||
|
||||
## Memory Leak (Linux)
|
||||
|
||||
If you notice a memory leak, it could be caused to memory fragmentation as models are loaded and/or moved from CPU to GPU.
|
||||
|
||||
A workaround is to tune memory allocation with an environment variable:
|
||||
|
||||
```bash
|
||||
# Force blocks >1MB to be allocated with `mmap` so that they are released to the system immediately when they are freed.
|
||||
MALLOC_MMAP_THRESHOLD_=1048576
|
||||
```
|
||||
|
||||
!!! warning "Speed vs Memory Tradeoff"
|
||||
|
||||
Your generations may be slower overall when setting this environment variable.
|
||||
|
||||
!!! info "Possibly dependent on `libc` implementation"
|
||||
|
||||
It's not known if this issue occurs with other `libc` implementations such as `musl`.
|
||||
|
||||
If you encounter this issue and your system uses a different implementation, please try this environment variable and let us know if it fixes the issue.
|
||||
|
||||
<h3>Detailed Discussion</h3>
|
||||
|
||||
Python (and PyTorch) relies on the memory allocator from the C Standard Library (`libc`). On linux, with the GNU C Standard Library implementation (`glibc`), our memory access patterns have been observed to cause severe memory fragmentation.
|
||||
|
||||
This fragmentation results in large amounts of memory that has been freed but can't be released back to the OS. Loading models from disk and moving them between CPU/CUDA seem to be the operations that contribute most to the fragmentation.
|
||||
|
||||
This memory fragmentation issue can result in OOM crashes during frequent model switching, even if `ram` (the max RAM cache size) is set to a reasonable value (e.g. a OOM crash with `ram=16` on a system with 32GB of RAM).
|
||||
|
||||
This problem may also exist on other OSes, and other `libc` implementations. But, at the time of writing, it has only been investigated on linux with `glibc`.
|
||||
|
||||
To better understand how the `glibc` memory allocator works, see these references:
|
||||
|
||||
- Basics: <https://www.gnu.org/software/libc/manual/html_node/The-GNU-Allocator.html>
|
||||
- Details: <https://sourceware.org/glibc/wiki/MallocInternals>
|
||||
|
||||
Note the differences between memory allocated as chunks in an arena vs. memory allocated with `mmap`. Under `glibc`'s default configuration, most model tensors get allocated as chunks in an arena making them vulnerable to the problem of fragmentation.
|
||||
|
||||
[model install docs]: ./installation/models.md
|
||||
[system requirements]: ./installation/requirements.md
|
||||
[latest release]: https://github.com/invoke-ai/InvokeAI/releases/latest
|
||||
[create an issue]: https://github.com/invoke-ai/InvokeAI/issues
|
||||
[discord]: https://discord.gg/ZmtBAhwWhy
|
||||
[configuration docs]: ./configuration.md
|
||||
[access token]: https://huggingface.co/docs/hub/security-tokens#how-to-manage-user-access-tokens
|
||||
88
docs/features/CONCEPTS.md
Normal file
@@ -0,0 +1,88 @@
|
||||
---
|
||||
title: Textual Inversion Embeddings and LoRAs
|
||||
---
|
||||
|
||||
# :material-library-shelves: Textual Inversions and LoRAs
|
||||
|
||||
With the advances in research, many new capabilities are available to customize the knowledge and understanding of novel concepts not originally contained in the base model.
|
||||
|
||||
|
||||
## Using Textual Inversion Files
|
||||
|
||||
Textual inversion (TI) files are small models that customize the output of
|
||||
Stable Diffusion image generation. They can augment SD with specialized subjects
|
||||
and artistic styles. They are also known as "embeds" in the machine learning
|
||||
world.
|
||||
|
||||
Each TI file introduces one or more vocabulary terms to the SD model. These are
|
||||
known in InvokeAI as "triggers." Triggers are denoted using angle brackets
|
||||
as in "<trigger-phrase>". The two most common type of
|
||||
TI files that you'll encounter are `.pt` and `.bin` files, which are produced by
|
||||
different TI training packages. InvokeAI supports both formats, but its
|
||||
[built-in TI training system](TRAINING.md) produces `.pt`.
|
||||
|
||||
The [Hugging Face company](https://huggingface.co/sd-concepts-library) has
|
||||
amassed a large ligrary of >800 community-contributed TI files covering a
|
||||
broad range of subjects and styles. You can also install your own or others' TI files
|
||||
by placing them in the designated directory for the compatible model type
|
||||
|
||||
### An Example
|
||||
|
||||
Here are a few examples to illustrate how it works. All these images were
|
||||
generated using the command-line client and the Stable Diffusion 1.5 model:
|
||||
|
||||
| Japanese gardener | Japanese gardener <ghibli-face> | Japanese gardener <hoi4-leaders> | Japanese gardener <cartoona-animals> |
|
||||
| :--------------------------------: | :-----------------------------------: | :------------------------------------: | :----------------------------------------: |
|
||||
|  |  |  |  |
|
||||
|
||||
You can also combine styles and concepts:
|
||||
|
||||
<figure markdown>
|
||||
| A portrait of <alf> in <cartoona-animal> style |
|
||||
| :--------------------------------------------------------: |
|
||||
|  |
|
||||
</figure>
|
||||
|
||||
|
||||
## Installing your Own TI Files
|
||||
|
||||
You may install any number of `.pt` and `.bin` files simply by copying them into
|
||||
the `embedding` directory of the corresponding InvokeAI models directory (usually `invokeai`
|
||||
in your home directory). For example, you can simply move a Stable Diffusion 1.5 embedding file to
|
||||
the `sd-1/embedding` folder. Be careful not to overwrite one file with another.
|
||||
For example, TI files generated by the Hugging Face toolkit share the named
|
||||
`learned_embedding.bin`. You can rename these, or use subdirectories to keep them distinct.
|
||||
|
||||
At startup time, InvokeAI will scan the various `embedding` directories and load any TI
|
||||
files it finds there for compatible models. At startup you will see a message similar to this one:
|
||||
|
||||
```bash
|
||||
>> Current embedding manager terms: <HOI4-Leader>, <princess-knight>
|
||||
```
|
||||
To use these when generating, simply type the `<` key in your prompt to open the Textual Inversion WebUI and
|
||||
select the embedding you'd like to use. This UI has type-ahead support, so you can easily find supported embeddings.
|
||||
|
||||
## Using LoRAs
|
||||
|
||||
LoRA files are models that customize the output of Stable Diffusion
|
||||
image generation. Larger than embeddings, but much smaller than full
|
||||
models, they augment SD with improved understanding of subjects and
|
||||
artistic styles.
|
||||
|
||||
Unlike TI files, LoRAs do not introduce novel vocabulary into the
|
||||
model's known tokens. Instead, LoRAs augment the model's weights that
|
||||
are applied to generate imagery. LoRAs may be supplied with a
|
||||
"trigger" word that they have been explicitly trained on, or may
|
||||
simply apply their effect without being triggered.
|
||||
|
||||
LoRAs are typically stored in .safetensors files, which are the most
|
||||
secure way to store and transmit these types of weights. You may
|
||||
install any number of `.safetensors` LoRA files simply by copying them
|
||||
into the `autoimport/lora` directory of the corresponding InvokeAI models
|
||||
directory (usually `invokeai` in your home directory).
|
||||
|
||||
To use these when generating, open the LoRA menu item in the options
|
||||
panel, select the LoRAs you want to apply and ensure that they have
|
||||
the appropriate weight recommended by the model provider. Typically,
|
||||
most LoRAs perform best at a weight of .75-1.
|
||||
|
||||
282
docs/features/CONFIGURATION.md
Normal file
@@ -0,0 +1,282 @@
|
||||
---
|
||||
title: Configuration
|
||||
---
|
||||
|
||||
# :material-tune-variant: InvokeAI Configuration
|
||||
|
||||
## Intro
|
||||
|
||||
InvokeAI has numerous runtime settings which can be used to adjust
|
||||
many aspects of its operations, including the location of files and
|
||||
directories, memory usage, and performance. These settings can be
|
||||
viewed and customized in several ways:
|
||||
|
||||
1. By editing settings in the `invokeai.yaml` file.
|
||||
2. By setting environment variables.
|
||||
3. On the command-line, when InvokeAI is launched.
|
||||
|
||||
In addition, the most commonly changed settings are accessible
|
||||
graphically via the `invokeai-configure` script.
|
||||
|
||||
### How the Configuration System Works
|
||||
|
||||
When InvokeAI is launched, the very first thing it needs to do is to
|
||||
find its "root" directory, which contains its configuration files,
|
||||
installed models, its database of images, and the folder(s) of
|
||||
generated images themselves. In this document, the root directory will
|
||||
be referred to as ROOT.
|
||||
|
||||
#### Finding the Root Directory
|
||||
|
||||
To find its root directory, InvokeAI uses the following recipe:
|
||||
|
||||
1. It first looks for the argument `--root <path>` on the command line
|
||||
it was launched from, and uses the indicated path if present.
|
||||
|
||||
2. Next it looks for the environment variable INVOKEAI_ROOT, and uses
|
||||
the directory path found there if present.
|
||||
|
||||
3. If neither of these are present, then InvokeAI looks for the
|
||||
folder containing the `.venv` Python virtual environment directory for
|
||||
the currently active environment. This directory is checked for files
|
||||
expected inside the InvokeAI root before it is used.
|
||||
|
||||
4. Finally, InvokeAI looks for a directory in the current user's home
|
||||
directory named `invokeai`.
|
||||
|
||||
#### Reading the InvokeAI Configuration File
|
||||
|
||||
Once the root directory has been located, InvokeAI looks for a file
|
||||
named `ROOT/invokeai.yaml`, and if present reads configuration values
|
||||
from it. The top of this file looks like this:
|
||||
|
||||
```
|
||||
InvokeAI:
|
||||
Web Server:
|
||||
host: localhost
|
||||
port: 9090
|
||||
allow_origins: []
|
||||
allow_credentials: true
|
||||
allow_methods:
|
||||
- '*'
|
||||
allow_headers:
|
||||
- '*'
|
||||
Features:
|
||||
esrgan: true
|
||||
internet_available: true
|
||||
log_tokenization: false
|
||||
patchmatch: true
|
||||
restore: true
|
||||
...
|
||||
```
|
||||
|
||||
This lines in this file are used to establish default values for
|
||||
Invoke's settings. In the above fragment, the Web Server's listening
|
||||
port is set to 9090 by the `port` setting.
|
||||
|
||||
You can edit this file with a text editor such as "Notepad" (do not
|
||||
use Word or any other word processor). When editing, be careful to
|
||||
maintain the indentation, and do not add extraneous text, as syntax
|
||||
errors will prevent InvokeAI from launching. A basic guide to the
|
||||
format of YAML files can be found
|
||||
[here](https://circleci.com/blog/what-is-yaml-a-beginner-s-guide/).
|
||||
|
||||
You can fix a broken `invokeai.yaml` by deleting it and running the
|
||||
configuration script again -- option [7] in the launcher, "Re-run the
|
||||
configure script".
|
||||
|
||||
#### Reading Environment Variables
|
||||
|
||||
Next InvokeAI looks for defined environment variables in the format
|
||||
`INVOKEAI_<setting_name>`, for example `INVOKEAI_port`. Environment
|
||||
variable values take precedence over configuration file variables. On
|
||||
a Macintosh system, for example, you could change the port that the
|
||||
web server listens on by setting the environment variable this way:
|
||||
|
||||
```
|
||||
export INVOKEAI_port=8000
|
||||
invokeai-web
|
||||
```
|
||||
|
||||
Please check out these
|
||||
[Macintosh](https://phoenixnap.com/kb/set-environment-variable-mac)
|
||||
and
|
||||
[Windows](https://phoenixnap.com/kb/windows-set-environment-variable)
|
||||
guides for setting temporary and permanent environment variables.
|
||||
|
||||
#### Reading the Command Line
|
||||
|
||||
Lastly, InvokeAI takes settings from the command line, which override
|
||||
everything else. The command-line settings have the same name as the
|
||||
corresponding configuration file settings, preceded by a `--`, for
|
||||
example `--port 8000`.
|
||||
|
||||
If you are using the launcher (`invoke.sh` or `invoke.bat`) to launch
|
||||
InvokeAI, then just pass the command-line arguments to the launcher:
|
||||
|
||||
```
|
||||
invoke.bat --port 8000 --host 0.0.0.0
|
||||
```
|
||||
|
||||
The arguments will be applied when you select the web server option
|
||||
(and the other options as well).
|
||||
|
||||
If, on the other hand, you prefer to launch InvokeAI directly from the
|
||||
command line, you would first activate the virtual environment (known
|
||||
as the "developer's console" in the launcher), and run `invokeai-web`:
|
||||
|
||||
```
|
||||
> C:\Users\Fred\invokeai\.venv\scripts\activate
|
||||
(.venv) > invokeai-web --port 8000 --host 0.0.0.0
|
||||
```
|
||||
|
||||
You can get a listing and brief instructions for each of the
|
||||
command-line options by giving the `--help` argument:
|
||||
|
||||
```
|
||||
(.venv) > invokeai-web --help
|
||||
usage: InvokeAI [-h] [--host HOST] [--port PORT] [--allow_origins [ALLOW_ORIGINS ...]] [--allow_credentials | --no-allow_credentials] [--allow_methods [ALLOW_METHODS ...]]
|
||||
[--allow_headers [ALLOW_HEADERS ...]] [--esrgan | --no-esrgan] [--internet_available | --no-internet_available] [--log_tokenization | --no-log_tokenization]
|
||||
[--patchmatch | --no-patchmatch] [--restore | --no-restore]
|
||||
[--always_use_cpu | --no-always_use_cpu] [--free_gpu_mem | --no-free_gpu_mem] [--max_loaded_models MAX_LOADED_MODELS] [--max_cache_size MAX_CACHE_SIZE]
|
||||
[--max_vram_cache_size MAX_VRAM_CACHE_SIZE] [--gpu_mem_reserved GPU_MEM_RESERVED] [--precision {auto,float16,float32,autocast}]
|
||||
[--sequential_guidance | --no-sequential_guidance] [--xformers_enabled | --no-xformers_enabled] [--tiled_decode | --no-tiled_decode] [--root ROOT]
|
||||
[--autoimport_dir AUTOIMPORT_DIR] [--lora_dir LORA_DIR] [--embedding_dir EMBEDDING_DIR] [--controlnet_dir CONTROLNET_DIR] [--conf_path CONF_PATH]
|
||||
[--models_dir MODELS_DIR] [--legacy_conf_dir LEGACY_CONF_DIR] [--db_dir DB_DIR] [--outdir OUTDIR] [--from_file FROM_FILE]
|
||||
[--use_memory_db | --no-use_memory_db] [--model MODEL] [--log_handlers [LOG_HANDLERS ...]] [--log_format {plain,color,syslog,legacy}]
|
||||
[--log_level {debug,info,warning,error,critical}] [--version | --no-version]
|
||||
```
|
||||
|
||||
## The Configuration Settings
|
||||
|
||||
The configuration settings are divided into several distinct
|
||||
groups in `invokeia.yaml`:
|
||||
|
||||
### Web Server
|
||||
|
||||
| Setting | Default Value | Description |
|
||||
|----------|----------------|--------------|
|
||||
| `host` | `localhost` | Name or IP address of the network interface that the web server will listen on |
|
||||
| `port` | `9090` | Network port number that the web server will listen on |
|
||||
| `allow_origins` | `[]` | A list of host names or IP addresses that are allowed to connect to the InvokeAI API in the format `['host1','host2',...]` |
|
||||
| `allow_credentials | `true` | Require credentials for a foreign host to access the InvokeAI API (don't change this) |
|
||||
| `allow_methods` | `*` | List of HTTP methods ("GET", "POST") that the web server is allowed to use when accessing the API |
|
||||
| `allow_headers` | `*` | List of HTTP headers that the web server will accept when accessing the API |
|
||||
|
||||
The documentation for InvokeAI's API can be accessed by browsing to the following URL: [http://localhost:9090/docs].
|
||||
|
||||
### Features
|
||||
|
||||
These configuration settings allow you to enable and disable various InvokeAI features:
|
||||
|
||||
| Setting | Default Value | Description |
|
||||
|----------|----------------|--------------|
|
||||
| `esrgan` | `true` | Activate the ESRGAN upscaling options|
|
||||
| `internet_available` | `true` | When a resource is not available locally, try to fetch it via the internet |
|
||||
| `log_tokenization` | `false` | Before each text2image generation, print a color-coded representation of the prompt to the console; this can help understand why a prompt is not working as expected |
|
||||
| `patchmatch` | `true` | Activate the "patchmatch" algorithm for improved inpainting |
|
||||
| `restore` | `true` | Activate the facial restoration features (DEPRECATED; restoration features will be removed in 3.0.0) |
|
||||
|
||||
### Memory/Performance
|
||||
|
||||
These options tune InvokeAI's memory and performance characteristics.
|
||||
|
||||
| Setting | Default Value | Description |
|
||||
|----------|----------------|--------------|
|
||||
| `always_use_cpu` | `false` | Use the CPU to generate images, even if a GPU is available |
|
||||
| `free_gpu_mem` | `false` | Aggressively free up GPU memory after each operation; this will allow you to run in low-VRAM environments with some performance penalties |
|
||||
| `max_cache_size` | `6` | Amount of CPU RAM (in GB) to reserve for caching models in memory; more cache allows you to keep models in memory and switch among them quickly |
|
||||
| `max_vram_cache_size` | `2.75` | Amount of GPU VRAM (in GB) to reserve for caching models in VRAM; more cache speeds up generation but reduces the size of the images that can be generated. This can be set to zero to maximize the amount of memory available for generation. |
|
||||
| `precision` | `auto` | Floating point precision. One of `auto`, `float16` or `float32`. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system |
|
||||
| `sequential_guidance` | `false` | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss |
|
||||
| `xformers_enabled` | `true` | If the x-formers memory-efficient attention module is installed, activate it for better memory usage and generation speed|
|
||||
| `tiled_decode` | `false` | If true, then during the VAE decoding phase the image will be decoded a section at a time, reducing memory consumption at the cost of a performance hit |
|
||||
|
||||
### Paths
|
||||
|
||||
These options set the paths of various directories and files used by
|
||||
InvokeAI. Relative paths are interpreted relative to INVOKEAI_ROOT, so
|
||||
if INVOKEAI_ROOT is `/home/fred/invokeai` and the path is
|
||||
`autoimport/main`, then the corresponding directory will be located at
|
||||
`/home/fred/invokeai/autoimport/main`.
|
||||
|
||||
| Setting | Default Value | Description |
|
||||
|----------|----------------|--------------|
|
||||
| `autoimport_dir` | `autoimport/main` | At startup time, read and import any main model files found in this directory |
|
||||
| `lora_dir` | `autoimport/lora` | At startup time, read and import any LoRA/LyCORIS models found in this directory |
|
||||
| `embedding_dir` | `autoimport/embedding` | At startup time, read and import any textual inversion (embedding) models found in this directory |
|
||||
| `controlnet_dir` | `autoimport/controlnet` | At startup time, read and import any ControlNet models found in this directory |
|
||||
| `conf_path` | `configs/models.yaml` | Location of the `models.yaml` model configuration file |
|
||||
| `models_dir` | `models` | Location of the directory containing models installed by InvokeAI's model manager |
|
||||
| `legacy_conf_dir` | `configs/stable-diffusion` | Location of the directory containing the .yaml configuration files for legacy checkpoint models |
|
||||
| `db_dir` | `databases` | Location of the directory containing InvokeAI's image, schema and session database |
|
||||
| `outdir` | `outputs` | Location of the directory in which the gallery of generated and uploaded images will be stored |
|
||||
| `use_memory_db` | `false` | Keep database information in memory rather than on disk; this will not preserve image gallery information across restarts |
|
||||
|
||||
Note that the autoimport directories will be searched recursively,
|
||||
allowing you to organize the models into folders and subfolders in any
|
||||
way you wish. In addition, while we have split up autoimport
|
||||
directories by the type of model they contain, this isn't
|
||||
necessary. You can combine different model types in the same folder
|
||||
and InvokeAI will figure out what they are. So you can easily use just
|
||||
one autoimport directory by commenting out the unneeded paths:
|
||||
|
||||
```
|
||||
Paths:
|
||||
autoimport_dir: autoimport
|
||||
# lora_dir: null
|
||||
# embedding_dir: null
|
||||
# controlnet_dir: null
|
||||
```
|
||||
|
||||
### Logging
|
||||
|
||||
These settings control the information, warning, and debugging
|
||||
messages printed to the console log while InvokeAI is running:
|
||||
|
||||
| Setting | Default Value | Description |
|
||||
|----------|----------------|--------------|
|
||||
| `log_handlers` | `console` | This controls where log messages are sent, and can be a list of one or more destinations. Values include `console`, `file`, `syslog` and `http`. These are described in more detail below |
|
||||
| `log_format` | `color` | This controls the formatting of the log messages. Values are `plain`, `color`, `legacy` and `syslog` |
|
||||
| `log_level` | `debug` | This filters messages according to the level of severity and can be one of `debug`, `info`, `warning`, `error` and `critical`. For example, setting to `warning` will display all messages at the warning level or higher, but won't display "debug" or "info" messages |
|
||||
|
||||
Several different log handler destinations are available, and multiple destinations are supported by providing a list:
|
||||
|
||||
```
|
||||
log_handlers:
|
||||
- console
|
||||
- syslog=localhost
|
||||
- file=/var/log/invokeai.log
|
||||
```
|
||||
|
||||
* `console` is the default. It prints log messages to the command-line window from which InvokeAI was launched.
|
||||
|
||||
* `syslog` is only available on Linux and Macintosh systems. It uses
|
||||
the operating system's "syslog" facility to write log file entries
|
||||
locally or to a remote logging machine. `syslog` offers a variety
|
||||
of configuration options:
|
||||
|
||||
```
|
||||
syslog=/dev/log` - log to the /dev/log device
|
||||
syslog=localhost` - log to the network logger running on the local machine
|
||||
syslog=localhost:512` - same as above, but using a non-standard port
|
||||
syslog=fredserver,facility=LOG_USER,socktype=SOCK_DRAM`
|
||||
- Log to LAN-connected server "fredserver" using the facility LOG_USER and datagram packets.
|
||||
```
|
||||
|
||||
* `http` can be used to log to a remote web server. The server must be
|
||||
properly configured to receive and act on log messages. The option
|
||||
accepts the URL to the web server, and a `method` argument
|
||||
indicating whether the message should be submitted using the GET or
|
||||
POST method.
|
||||
|
||||
```
|
||||
http=http://my.server/path/to/logger,method=POST
|
||||
```
|
||||
|
||||
The `log_format` option provides several alternative formats:
|
||||
|
||||
* `color` - default format providing time, date and a message, using text colors to distinguish different log severities
|
||||
* `plain` - same as above, but monochrome text only
|
||||
* `syslog` - the log level and error message only, allowing the syslog system to attach the time and date
|
||||
* `legacy` - a format similar to the one used by the legacy 2.3 InvokeAI releases.
|
||||
136
docs/features/CONTROLNET.md
Normal file
@@ -0,0 +1,136 @@
|
||||
---
|
||||
title: ControlNet
|
||||
---
|
||||
|
||||
# :material-loupe: ControlNet
|
||||
|
||||
## ControlNet
|
||||
|
||||
ControlNet
|
||||
|
||||
ControlNet is a powerful set of features developed by the open-source
|
||||
community (notably, Stanford researcher
|
||||
[**@ilyasviel**](https://github.com/lllyasviel)) that allows you to
|
||||
apply a secondary neural network model to your image generation
|
||||
process in Invoke.
|
||||
|
||||
With ControlNet, you can get more control over the output of your
|
||||
image generation, providing you with a way to direct the network
|
||||
towards generating images that better fit your desired style or
|
||||
outcome.
|
||||
|
||||
|
||||
### How it works
|
||||
|
||||
ControlNet works by analyzing an input image, pre-processing that
|
||||
image to identify relevant information that can be interpreted by each
|
||||
specific ControlNet model, and then inserting that control information
|
||||
into the generation process. This can be used to adjust the style,
|
||||
composition, or other aspects of the image to better achieve a
|
||||
specific result.
|
||||
|
||||
|
||||
### Models
|
||||
|
||||
InvokeAI provides access to a series of ControlNet models that provide
|
||||
different effects or styles in your generated images. Currently
|
||||
InvokeAI only supports "diffuser" style ControlNet models. These are
|
||||
folders that contain the files `config.json` and/or
|
||||
`diffusion_pytorch_model.safetensors` and
|
||||
`diffusion_pytorch_model.fp16.safetensors`. The name of the folder is
|
||||
the name of the model.
|
||||
|
||||
***InvokeAI does not currently support checkpoint-format
|
||||
ControlNets. These come in the form of a single file with the
|
||||
extension `.safetensors`.***
|
||||
|
||||
Diffuser-style ControlNet models are available at HuggingFace
|
||||
(http://huggingface.co) and accessed via their repo IDs (identifiers
|
||||
in the format "author/modelname"). The easiest way to install them is
|
||||
to use the InvokeAI model installer application. Use the
|
||||
`invoke.sh`/`invoke.bat` launcher to select item [5] and then navigate
|
||||
to the CONTROLNETS section. Select the models you wish to install and
|
||||
press "APPLY CHANGES". You may also enter additional HuggingFace
|
||||
repo_ids in the "Additional models" textbox:
|
||||
|
||||
{:width="640px"}
|
||||
|
||||
Command-line users can launch the model installer using the command
|
||||
`invokeai-model-install`.
|
||||
|
||||
_Be aware that some ControlNet models require additional code
|
||||
functionality in order to work properly, so just installing a
|
||||
third-party ControlNet model may not have the desired effect._ Please
|
||||
read and follow the documentation for installing a third party model
|
||||
not currently included among InvokeAI's default list.
|
||||
|
||||
The models currently supported include:
|
||||
|
||||
**Canny**:
|
||||
|
||||
When the Canny model is used in ControlNet, Invoke will attempt to generate images that match the edges detected.
|
||||
|
||||
Canny edge detection works by detecting the edges in an image by looking for abrupt changes in intensity. It is known for its ability to detect edges accurately while reducing noise and false edges, and the preprocessor can identify more information by decreasing the thresholds.
|
||||
|
||||
**M-LSD**:
|
||||
|
||||
M-LSD is another edge detection algorithm used in ControlNet. It stands for Multi-Scale Line Segment Detector.
|
||||
|
||||
It detects straight line segments in an image by analyzing the local structure of the image at multiple scales. It can be useful for architectural imagery, or anything where straight-line structural information is needed for the resulting output.
|
||||
|
||||
**Lineart**:
|
||||
|
||||
The Lineart model in ControlNet generates line drawings from an input image. The resulting pre-processed image is a simplified version of the original, with only the outlines of objects visible.The Lineart model in ControlNet is known for its ability to accurately capture the contours of the objects in an input sketch.
|
||||
|
||||
**Lineart Anime**:
|
||||
|
||||
A variant of the Lineart model that generates line drawings with a distinct style inspired by anime and manga art styles.
|
||||
|
||||
**Depth**:
|
||||
A model that generates depth maps of images, allowing you to create more realistic 3D models or to simulate depth effects in post-processing.
|
||||
|
||||
**Normal Map (BAE):**
|
||||
A model that generates normal maps from input images, allowing for more realistic lighting effects in 3D rendering.
|
||||
|
||||
**Image Segmentation**:
|
||||
A model that divides input images into segments or regions, each of which corresponds to a different object or part of the image. (More details coming soon)
|
||||
|
||||
|
||||
**Openpose**:
|
||||
The OpenPose control model allows for the identification of the general pose of a character by pre-processing an existing image with a clear human structure. With advanced options, Openpose can also detect the face or hands in the image.
|
||||
|
||||
**Mediapipe Face**:
|
||||
|
||||
The MediaPipe Face identification processor is able to clearly identify facial features in order to capture vivid expressions of human faces.
|
||||
|
||||
**Tile (experimental)**:
|
||||
|
||||
The Tile model fills out details in the image to match the image, rather than the prompt. The Tile Model is a versatile tool that offers a range of functionalities. Its primary capabilities can be boiled down to two main behaviors:
|
||||
|
||||
- It can reinterpret specific details within an image and create fresh, new elements.
|
||||
- It has the ability to disregard global instructions if there's a discrepancy between them and the local context or specific parts of the image. In such cases, it uses the local context to guide the process.
|
||||
|
||||
The Tile Model can be a powerful tool in your arsenal for enhancing image quality and details. If there are undesirable elements in your images, such as blurriness caused by resizing, this model can effectively eliminate these issues, resulting in cleaner, crisper images. Moreover, it can generate and add refined details to your images, improving their overall quality and appeal.
|
||||
|
||||
**Pix2Pix (experimental)**
|
||||
|
||||
With Pix2Pix, you can input an image into the controlnet, and then "instruct" the model to change it using your prompt. For example, you can say "Make it winter" to add more wintry elements to a scene.
|
||||
|
||||
**Inpaint**: Coming Soon - Currently this model is available but not functional on the Canvas. An upcoming release will provide additional capabilities for using this model when inpainting.
|
||||
|
||||
Each of these models can be adjusted and combined with other ControlNet models to achieve different results, giving you even more control over your image generation process.
|
||||
|
||||
|
||||
## Using ControlNet
|
||||
|
||||
To use ControlNet, you can simply select the desired model and adjust both the ControlNet and Pre-processor settings to achieve the desired result. You can also use multiple ControlNet models at the same time, allowing you to achieve even more complex effects or styles in your generated images.
|
||||
|
||||
|
||||
Each ControlNet has two settings that are applied to the ControlNet.
|
||||
|
||||
Weight - Strength of the Controlnet model applied to the generation for the section, defined by start/end.
|
||||
|
||||
Start/End - 0 represents the start of the generation, 1 represents the end. The Start/end setting controls what steps during the generation process have the ControlNet applied.
|
||||
|
||||
Additionally, each ControlNet section can be expanded in order to manipulate settings for the image pre-processor that adjusts your uploaded image before using it in when you Invoke.
|
||||
151
docs/features/IMG2IMG.md
Normal file
@@ -0,0 +1,151 @@
|
||||
---
|
||||
title: Image-to-Image
|
||||
---
|
||||
|
||||
# :material-image-multiple: Image-to-Image
|
||||
|
||||
InvokeAI provides an "img2img" feature that lets you seed your
|
||||
creations with an initial drawing or photo. This is a really cool
|
||||
feature that tells stable diffusion to build the prompt on top of the
|
||||
image you provide, preserving the original's basic shape and layout.
|
||||
|
||||
For a walkthrough of using Image-to-Image in the Web UI, see [InvokeAI
|
||||
Web Server](./WEB.md#image-to-image).
|
||||
|
||||
The main difference between `img2img` and `prompt2img` is the starting point.
|
||||
While `prompt2img` always starts with pure gaussian noise and progressively
|
||||
refines it over the requested number of steps, `img2img` skips some of these
|
||||
earlier steps (how many it skips is indirectly controlled by the `--strength`
|
||||
parameter), and uses instead your initial image mixed with gaussian noise as the
|
||||
starting image.
|
||||
|
||||
**Let's start** by thinking about vanilla `prompt2img`, just generating an image
|
||||
from a prompt. If the step count is 10, then the "latent space" (Stable
|
||||
Diffusion's internal representation of the image) for the prompt "fire" with
|
||||
seed `1592514025` develops something like this:
|
||||
|
||||
!!! example ""
|
||||
|
||||
<figure markdown>
|
||||
{ width=720 }
|
||||
</figure>
|
||||
|
||||
Put simply: starting from a frame of fuzz/static, SD finds details in each frame
|
||||
that it thinks look like "fire" and brings them a little bit more into focus,
|
||||
gradually scrubbing out the fuzz until a clear image remains.
|
||||
|
||||
**When you use `img2img`** some of the earlier steps are cut, and instead an
|
||||
initial image of your choice is used. But because of how the maths behind Stable
|
||||
Diffusion works, this image needs to be mixed with just the right amount of
|
||||
noise (fuzz/static) for where it is being inserted. This is where the strength
|
||||
parameter comes in. Depending on the set strength, your image will be inserted
|
||||
into the sequence at the appropriate point, with just the right amount of noise.
|
||||
|
||||
### A concrete example
|
||||
|
||||
!!! example "I want SD to draw a fire based on this hand-drawn image"
|
||||
|
||||
{ align=left }
|
||||
|
||||
Let's only do 10 steps, to make it easier to see what's happening. If strength
|
||||
is `0.7`, this is what the internal steps the algorithm has to take will look
|
||||
like:
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
With strength `0.4`, the steps look more like this:
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
Notice how much more fuzzy the starting image is for strength `0.7` compared to
|
||||
`0.4`, and notice also how much longer the sequence is with `0.7`:
|
||||
|
||||
| | strength = 0.7 | strength = 0.4 |
|
||||
| --------------------------- | ------------------------------------------------------------- | ------------------------------------------------------------- |
|
||||
| initial image that SD sees |  |  |
|
||||
| steps argument to `invoke>` | `-S10` | `-S10` |
|
||||
| steps actually taken | `7` | `4` |
|
||||
| latent space at each step |  |  |
|
||||
| output |  |  |
|
||||
|
||||
Both of the outputs look kind of like what I was thinking of. With the strength
|
||||
higher, my input becomes more vague, _and_ Stable Diffusion has more steps to
|
||||
refine its output. But it's not really making what I want, which is a picture of
|
||||
cheery open fire. With the strength lower, my input is more clear, _but_ Stable
|
||||
Diffusion has less chance to refine itself, so the result ends up inheriting all
|
||||
the problems of my bad drawing.
|
||||
|
||||
If you want to try this out yourself, all of these are using a seed of
|
||||
`1592514025` with a width/height of `384`, step count `10`, the
|
||||
`k_lms` sampler, and the single-word prompt `"fire"`.
|
||||
|
||||
### Compensating for the reduced step count
|
||||
|
||||
After putting this guide together I was curious to see how the difference would
|
||||
be if I increased the step count to compensate, so that SD could have the same
|
||||
amount of steps to develop the image regardless of the strength. So I ran the
|
||||
generation again using the same seed, but this time adapting the step count to
|
||||
give each generation 20 steps.
|
||||
|
||||
Here's strength `0.4` (note step count `50`, which is `20 ÷ 0.4` to make sure SD
|
||||
does `20` steps from my image):
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
and here is strength `0.7` (note step count `30`, which is roughly `20 ÷ 0.7` to
|
||||
make sure SD does `20` steps from my image):
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
In both cases the image is nice and clean and "finished", but because at
|
||||
strength `0.7` Stable Diffusion has been give so much more freedom to improve on
|
||||
my badly-drawn flames, they've come out looking much better. You can really see
|
||||
the difference when looking at the latent steps. There's more noise on the first
|
||||
image with strength `0.7`:
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
than there is for strength `0.4`:
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
and that extra noise gives the algorithm more choices when it is evaluating how
|
||||
to denoise any particular pixel in the image.
|
||||
|
||||
Unfortunately, it seems that `img2img` is very sensitive to the step count.
|
||||
Here's strength `0.7` with a step count of `29` (SD did 19 steps from my image):
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
By comparing the latents we can sort of see that something got interpreted
|
||||
differently enough on the third or fourth step to lead to a rather different
|
||||
interpretation of the flames.
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
This is the result of a difference in the de-noising "schedule" - basically the
|
||||
noise has to be cleaned by a certain degree each step or the model won't
|
||||
"converge" on the image properly (see
|
||||
[stable diffusion blog](https://huggingface.co/blog/stable_diffusion) for more
|
||||
about that). A different step count means a different schedule, which means
|
||||
things get interpreted slightly differently at every step.
|
||||
171
docs/features/LOGGING.md
Normal file
@@ -0,0 +1,171 @@
|
||||
---
|
||||
title: Controlling Logging
|
||||
---
|
||||
|
||||
# :material-image-off: Controlling Logging
|
||||
|
||||
## Controlling How InvokeAI Logs Status Messages
|
||||
|
||||
InvokeAI logs status messages using a configurable logging system. You
|
||||
can log to the terminal window, to a designated file on the local
|
||||
machine, to the syslog facility on a Linux or Mac, or to a properly
|
||||
configured web server. You can configure several logs at the same
|
||||
time, and control the level of message logged and the logging format
|
||||
(to a limited extent).
|
||||
|
||||
Three command-line options control logging:
|
||||
|
||||
### `--log_handlers <handler1> <handler2> ...`
|
||||
|
||||
This option activates one or more log handlers. Options are "console",
|
||||
"file", "syslog" and "http". To specify more than one, separate them
|
||||
by spaces:
|
||||
|
||||
```bash
|
||||
invokeai-web --log_handlers console syslog=/dev/log file=C:\Users\fred\invokeai.log
|
||||
```
|
||||
|
||||
The format of these options is described below.
|
||||
|
||||
### `--log_format {plain|color|legacy|syslog}`
|
||||
|
||||
This controls the format of log messages written to the console. Only
|
||||
the "console" log handler is currently affected by this setting.
|
||||
|
||||
* "plain" provides formatted messages like this:
|
||||
|
||||
```bash
|
||||
|
||||
[2023-05-24 23:18:2[2023-05-24 23:18:50,352]::[InvokeAI]::DEBUG --> this is a debug message
|
||||
[2023-05-24 23:18:50,352]::[InvokeAI]::INFO --> this is an informational messages
|
||||
[2023-05-24 23:18:50,352]::[InvokeAI]::WARNING --> this is a warning
|
||||
[2023-05-24 23:18:50,352]::[InvokeAI]::ERROR --> this is an error
|
||||
[2023-05-24 23:18:50,352]::[InvokeAI]::CRITICAL --> this is a critical error
|
||||
```
|
||||
|
||||
* "color" produces similar output, but the text will be color coded to
|
||||
indicate the severity of the message.
|
||||
|
||||
* "legacy" produces output similar to InvokeAI versions 2.3 and earlier:
|
||||
|
||||
```bash
|
||||
### this is a critical error
|
||||
*** this is an error
|
||||
** this is a warning
|
||||
>> this is an informational messages
|
||||
| this is a debug message
|
||||
```
|
||||
|
||||
* "syslog" produces messages suitable for syslog entries:
|
||||
|
||||
```bash
|
||||
InvokeAI [2691178] <CRITICAL> this is a critical error
|
||||
InvokeAI [2691178] <ERROR> this is an error
|
||||
InvokeAI [2691178] <WARNING> this is a warning
|
||||
InvokeAI [2691178] <INFO> this is an informational messages
|
||||
InvokeAI [2691178] <DEBUG> this is a debug message
|
||||
```
|
||||
|
||||
(note that the date, time and hostname will be added by the syslog
|
||||
system)
|
||||
|
||||
### `--log_level {debug|info|warning|error|critical}`
|
||||
|
||||
Providing this command-line option will cause only messages at the
|
||||
specified level or above to be emitted.
|
||||
|
||||
## Console logging
|
||||
|
||||
When "console" is provided to `--log_handlers`, messages will be
|
||||
written to the command line window in which InvokeAI was launched. By
|
||||
default, the color formatter will be used unless overridden by
|
||||
`--log_format`.
|
||||
|
||||
## File logging
|
||||
|
||||
When "file" is provided to `--log_handlers`, entries will be written
|
||||
to the file indicated in the path argument. By default, the "plain"
|
||||
format will be used:
|
||||
|
||||
```bash
|
||||
invokeai-web --log_handlers file=/var/log/invokeai.log
|
||||
```
|
||||
|
||||
## Syslog logging
|
||||
|
||||
When "syslog" is requested, entries will be sent to the syslog
|
||||
system. There are a variety of ways to control where the log message
|
||||
is sent:
|
||||
|
||||
* Send to the local machine using the `/dev/log` socket:
|
||||
|
||||
```
|
||||
invokeai-web --log_handlers syslog=/dev/log
|
||||
```
|
||||
|
||||
* Send to the local machine using a UDP message:
|
||||
|
||||
```
|
||||
invokeai-web --log_handlers syslog=localhost
|
||||
```
|
||||
|
||||
* Send to the local machine using a UDP message on a nonstandard
|
||||
port:
|
||||
|
||||
```
|
||||
invokeai-web --log_handlers syslog=localhost:512
|
||||
```
|
||||
|
||||
* Send to a remote machine named "loghost" on the local LAN using
|
||||
facility LOG_USER and UDP packets:
|
||||
|
||||
```
|
||||
invokeai-web --log_handlers syslog=loghost,facility=LOG_USER,socktype=SOCK_DGRAM
|
||||
```
|
||||
|
||||
This can be abbreviated `syslog=loghost`, as LOG_USER and SOCK_DGRAM
|
||||
are defaults.
|
||||
|
||||
* Send to a remote machine named "loghost" using the facility LOCAL0
|
||||
and using a TCP socket:
|
||||
|
||||
```
|
||||
invokeai-web --log_handlers syslog=loghost,facility=LOG_LOCAL0,socktype=SOCK_STREAM
|
||||
```
|
||||
|
||||
If no arguments are specified (just a bare "syslog"), then the logging
|
||||
system will look for a UNIX socket named `/dev/log`, and if not found
|
||||
try to send a UDP message to `localhost`. The Macintosh OS used to
|
||||
support logging to a socket named `/var/run/syslog`, but this feature
|
||||
has since been disabled.
|
||||
|
||||
## Web logging
|
||||
|
||||
If you have access to a web server that is configured to log messages
|
||||
when a particular URL is requested, you can log using the "http"
|
||||
method:
|
||||
|
||||
```
|
||||
invokeai-web --log_handlers http=http://my.server/path/to/logger,method=POST
|
||||
```
|
||||
|
||||
The optional [,method=] part can be used to specify whether the URL
|
||||
accepts GET (default) or POST messages.
|
||||
|
||||
Currently password authentication and SSL are not supported.
|
||||
|
||||
## Using the configuration file
|
||||
|
||||
You can set and forget logging options by adding a "Logging" section
|
||||
to `invokeai.yaml`:
|
||||
|
||||
```
|
||||
InvokeAI:
|
||||
[... other settings...]
|
||||
Logging:
|
||||
log_handlers:
|
||||
- console
|
||||
- syslog=/dev/log
|
||||
log_level: info
|
||||
log_format: color
|
||||
```
|
||||
73
docs/features/MODEL_MERGING.md
Normal file
@@ -0,0 +1,73 @@
|
||||
---
|
||||
title: Model Merging
|
||||
---
|
||||
|
||||
# :material-image-off: Model Merging
|
||||
|
||||
## How to Merge Models
|
||||
|
||||
As of version 2.3, InvokeAI comes with a script that allows you to
|
||||
merge two or three diffusers-type models into a new merged model. The
|
||||
resulting model will combine characteristics of the original, and can
|
||||
be used to teach an old model new tricks.
|
||||
|
||||
You may run the merge script by starting the invoke launcher
|
||||
(`invoke.sh` or `invoke.bat`) and choosing the option for _merge
|
||||
models_. This will launch a text-based interactive user interface that
|
||||
prompts you to select the models to merge, how to merge them, and the
|
||||
merged model name.
|
||||
|
||||
Alternatively you may activate InvokeAI's virtual environment from the
|
||||
command line, and call the script via `merge_models --gui` to open up
|
||||
a version that has a nice graphical front end. To get the commandline-
|
||||
only version, omit `--gui`.
|
||||
|
||||
The user interface for the text-based interactive script is
|
||||
straightforward. It shows you a series of setting fields. Use control-N (^N)
|
||||
to move to the next field, and control-P (^P) to move to the previous
|
||||
one. You can also use TAB and shift-TAB to move forward and
|
||||
backward. Once you are in a multiple choice field, use the up and down
|
||||
cursor arrows to move to your desired selection, and press <SPACE> or
|
||||
<ENTER> to select it. Change text fields by typing in them, and adjust
|
||||
scrollbars using the left and right arrow keys.
|
||||
|
||||
Once you are happy with your settings, press the OK button. Note that
|
||||
there may be two pages of settings, depending on the height of your
|
||||
screen, and the OK button may be on the second page. Advance past the
|
||||
last field of the first page to get to the second page, and reverse
|
||||
this to get back.
|
||||
|
||||
If the merge runs successfully, it will create a new diffusers model
|
||||
under the selected name and register it with InvokeAI.
|
||||
|
||||
## The Settings
|
||||
|
||||
* Model Selection -- there are three multiple choice fields that
|
||||
display all the diffusers-style models that InvokeAI knows about.
|
||||
If you do not see the model you are looking for, then it is probably
|
||||
a legacy checkpoint model and needs to be converted using the
|
||||
`invoke` command-line client and its `!optimize` command. You
|
||||
must select at least two models to merge. The third can be left at
|
||||
"None" if you desire.
|
||||
|
||||
* Alpha -- This is the ratio to use when combining models. It ranges
|
||||
from 0 to 1. The higher the value, the more weight is given to the
|
||||
2d and (optionally) 3d models. So if you have two models named "A"
|
||||
and "B", an alpha value of 0.25 will give you a merged model that is
|
||||
25% A and 75% B.
|
||||
|
||||
* Interpolation Method -- This is the method used to combine
|
||||
weights. The options are "weighted_sum" (the default), "sigmoid",
|
||||
"inv_sigmoid" and "add_difference". Each produces slightly different
|
||||
results. When three models are in use, only "add_difference" is
|
||||
available. (TODO: cite a reference that describes what these
|
||||
interpolation methods actually do and how to decide among them).
|
||||
|
||||
* Force -- Not all models are compatible with each other. The merge
|
||||
script will check for compatibility and refuse to merge ones that
|
||||
are incompatible. Set this checkbox to try merging anyway.
|
||||
|
||||
* Name for merged model - This is the name for the new model. Please
|
||||
use InvokeAI conventions - only alphanumeric letters and the
|
||||
characters ".+-".
|
||||
|
||||
208
docs/features/NODES.md
Normal file
@@ -0,0 +1,208 @@
|
||||
# Nodes Editor (Experimental)
|
||||
|
||||
🚨
|
||||
*The node editor is experimental. We've made it accessible because we use it to develop the application, but we have not addressed the many known rough edges. It's very easy to shoot yourself in the foot, and we cannot offer support for it until it sees full release (ETA v3.1). Everything is subject to change without warning.*
|
||||
🚨
|
||||
|
||||
The nodes editor is a blank canvas allowing for the use of individual functions and image transformations to control the image generation workflow. The node processing flow is usually done from left (inputs) to right (outputs), though linearity can become abstracted the more complex the node graph becomes. Nodes inputs and outputs are connected by dragging connectors from node to node.
|
||||
|
||||
To better understand how nodes are used, think of how an electric power bar works. It takes in one input (electricity from a wall outlet) and passes it to multiple devices through multiple outputs. Similarly, a node could have multiple inputs and outputs functioning at the same (or different) time, but all node outputs pass information onward like a power bar passes electricity. Not all outputs are compatible with all inputs, however - Each node has different constraints on how it is expecting to input/output information. In general, node outputs are colour-coded to match compatible inputs of other nodes.
|
||||
|
||||
## Anatomy of a Node
|
||||
|
||||
Individual nodes are made up of the following:
|
||||
|
||||
- Inputs: Edge points on the left side of the node window where you connect outputs from other nodes.
|
||||
- Outputs: Edge points on the right side of the node window where you connect to inputs on other nodes.
|
||||
- Options: Various options which are either manually configured, or overridden by connecting an output from another node to the input.
|
||||
|
||||
## Diffusion Overview
|
||||
|
||||
Taking the time to understand the diffusion process will help you to understand how to set up your nodes in the nodes editor.
|
||||
|
||||
There are two main spaces Stable Diffusion works in: image space and latent space.
|
||||
|
||||
Image space represents images in pixel form that you look at. Latent space represents compressed inputs. It’s in latent space that Stable Diffusion processes images. A VAE (Variational Auto Encoder) is responsible for compressing and encoding inputs into latent space, as well as decoding outputs back into image space.
|
||||
|
||||
When you generate an image using text-to-image, multiple steps occur in latent space:
|
||||
1. Random noise is generated at the chosen height and width. The noise’s characteristics are dictated by the chosen (or not chosen) seed. This noise tensor is passed into latent space. We’ll call this noise A.
|
||||
1. Using a model’s U-Net, a noise predictor examines noise A, and the words tokenized by CLIP from your prompt (conditioning). It generates its own noise tensor to predict what the final image might look like in latent space. We’ll call this noise B.
|
||||
1. Noise B is subtracted from noise A in an attempt to create a final latent image indicative of the inputs. This step is repeated for the number of sampler steps chosen.
|
||||
1. The VAE decodes the final latent image from latent space into image space.
|
||||
|
||||
image-to-image is a similar process, with only step 1 being different:
|
||||
1. The input image is decoded from image space into latent space by the VAE. Noise is then added to the input latent image. Denoising Strength dictates how much noise is added, 0 being none, and 1 being all-encompassing. We’ll call this noise A. The process is then the same as steps 2-4 in the text-to-image explanation above.
|
||||
|
||||
Furthermore, a model provides the CLIP prompt tokenizer, the VAE, and a U-Net (where noise prediction occurs given a prompt and initial noise tensor).
|
||||
|
||||
A noise scheduler (eg. DPM++ 2M Karras) schedules the subtraction of noise from the latent image across the sampler steps chosen (step 3 above). Less noise is usually subtracted at higher sampler steps.
|
||||
|
||||
## Node Types (Base Nodes)
|
||||
|
||||
| Node <img width=160 align="right"> | Function |
|
||||
| ---------------------------------- | --------------------------------------------------------------------------------------|
|
||||
| Add | Adds two numbers |
|
||||
| CannyImageProcessor | Canny edge detection for ControlNet |
|
||||
| ClipSkip | Skip layers in clip text_encoder model |
|
||||
| Collect | Collects values into a collection |
|
||||
| Prompt (Compel) | Parse prompt using compel package to conditioning |
|
||||
| ContentShuffleImageProcessor | Applies content shuffle processing to image |
|
||||
| ControlNet | Collects ControlNet info to pass to other nodes |
|
||||
| CvInpaint | Simple inpaint using opencv |
|
||||
| Divide | Divides two numbers |
|
||||
| DynamicPrompt | Parses a prompt using adieyal/dynamic prompt's random or combinatorial generator |
|
||||
| FloatLinearRange | Creates a range |
|
||||
| HedImageProcessor | Applies HED edge detection to image |
|
||||
| ImageBlur | Blurs an image |
|
||||
| ImageChannel | Gets a channel from an image |
|
||||
| ImageCollection | Load a collection of images and provide it as output |
|
||||
| ImageConvert | Converts an image to a different mode |
|
||||
| ImageCrop | Crops an image to a specified box. The box can be outside of the image. |
|
||||
| ImageInverseLerp | Inverse linear interpolation of all pixels of an image |
|
||||
| ImageLerp | Linear interpolation of all pixels of an image |
|
||||
| ImageMultiply | Multiplies two images together using `PIL.ImageChops.Multiply()` |
|
||||
| ImageNSFWBlurInvocation | Detects and blurs images that may contain sexually explicit content |
|
||||
| ImagePaste | Pastes an image into another image |
|
||||
| ImageProcessor | Base class for invocations that reprocess images for ControlNet |
|
||||
| ImageResize | Resizes an image to specific dimensions |
|
||||
| ImageScale | Scales an image by a factor |
|
||||
| ImageToLatents | Scales latents by a given factor |
|
||||
| ImageWatermarkInvocation | Adds an invisible watermark to images |
|
||||
| InfillColor | Infills transparent areas of an image with a solid color |
|
||||
| InfillPatchMatch | Infills transparent areas of an image using the PatchMatch algorithm |
|
||||
| InfillTile | Infills transparent areas of an image with tiles of the image |
|
||||
| Inpaint | Generates an image using inpaint |
|
||||
| Iterate | Iterates over a list of items |
|
||||
| LatentsToImage | Generates an image from latents |
|
||||
| LatentsToLatents | Generates latents using latents as base image |
|
||||
| LeresImageProcessor | Applies leres processing to image |
|
||||
| LineartAnimeImageProcessor | Applies line art anime processing to image |
|
||||
| LineartImageProcessor | Applies line art processing to image |
|
||||
| LoadImage | Load an image and provide it as output |
|
||||
| Lora Loader | Apply selected lora to unet and text_encoder |
|
||||
| Model Loader | Loads a main model, outputting its submodels |
|
||||
| MaskFromAlpha | Extracts the alpha channel of an image as a mask |
|
||||
| MediapipeFaceProcessor | Applies mediapipe face processing to image |
|
||||
| MidasDepthImageProcessor | Applies Midas depth processing to image |
|
||||
| MlsdImageProcessor | Applied MLSD processing to image |
|
||||
| Multiply | Multiplies two numbers |
|
||||
| Noise | Generates latent noise |
|
||||
| NormalbaeImageProcessor | Applies NormalBAE processing to image |
|
||||
| OpenposeImageProcessor | Applies Openpose processing to image |
|
||||
| ParamFloat | A float parameter |
|
||||
| ParamInt | An integer parameter |
|
||||
| PidiImageProcessor | Applies PIDI processing to an image |
|
||||
| Progress Image | Displays the progress image in the Node Editor |
|
||||
| RandomInit | Outputs a single random integer |
|
||||
| RandomRange | Creates a collection of random numbers |
|
||||
| Range | Creates a range of numbers from start to stop with step |
|
||||
| RangeOfSize | Creates a range from start to start + size with step |
|
||||
| ResizeLatents | Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8. |
|
||||
| RestoreFace | Restores faces in the image |
|
||||
| ScaleLatents | Scales latents by a given factor |
|
||||
| SegmentAnythingProcessor | Applies segment anything processing to image |
|
||||
| ShowImage | Displays a provided image, and passes it forward in the pipeline |
|
||||
| StepParamEasing | Experimental per-step parameter for easing for denoising steps |
|
||||
| Subtract | Subtracts two numbers |
|
||||
| TextToLatents | Generates latents from conditionings |
|
||||
| TileResampleProcessor | Bass class for invocations that preprocess images for ControlNet |
|
||||
| Upscale | Upscales an image |
|
||||
| VAE Loader | Loads a VAE model, outputting a VaeLoaderOutput |
|
||||
| ZoeDepthImageProcessor | Applies Zoe depth processing to image |
|
||||
|
||||
## Node Grouping Concepts
|
||||
|
||||
There are several node grouping concepts that can be examined with a narrow focus. These (and other) groupings can be pieced together to make up functional graph setups, and are important to understanding how groups of nodes work together as part of a whole. Note that the screenshots below aren't examples of complete functioning node graphs (see Examples).
|
||||
|
||||
### Noise
|
||||
|
||||
As described, an initial noise tensor is necessary for the latent diffusion process. As a result, all non-image *ToLatents nodes require a noise node input.
|
||||
|
||||

|
||||
|
||||
### Conditioning
|
||||
|
||||
As described, conditioning is necessary for the latent diffusion process, whether empty or not. As a result, all non-image *ToLatents nodes require positive and negative conditioning inputs. Conditioning is reliant on a CLIP tokenizer provided by the Model Loader node.
|
||||
|
||||

|
||||
|
||||
### Image Space & VAE
|
||||
|
||||
The ImageToLatents node doesn't require a noise node input, but requires a VAE input to convert the image from image space into latent space. In reverse, the LatentsToImage node requires a VAE input to convert from latent space back into image space.
|
||||
|
||||

|
||||
|
||||
### Defined & Random Seeds
|
||||
|
||||
It is common to want to use both the same seed (for continuity) and random seeds (for variance). To define a seed, simply enter it into the 'Seed' field on a noise node. Conversely, the RandomInt node generates a random integer between 'Low' and 'High', and can be used as input to the 'Seed' edge point on a noise node to randomize your seed.
|
||||
|
||||

|
||||
|
||||
### Control
|
||||
|
||||
Control means to guide the diffusion process to adhere to a defined input or structure. Control can be provided as input to non-image *ToLatents nodes from ControlNet nodes. ControlNet nodes usually require an image processor which converts an input image for use with ControlNet.
|
||||
|
||||

|
||||
|
||||
### LoRA
|
||||
|
||||
The Lora Loader node lets you load a LoRA (say that ten times fast) and pass it as output to both the Prompt (Compel) and non-image *ToLatents nodes. A model's CLIP tokenizer is passed through the LoRA into Prompt (Compel), where it affects conditioning. A model's U-Net is also passed through the LoRA into a non-image *ToLatents node, where it affects noise prediction.
|
||||
|
||||

|
||||
|
||||
### Scaling
|
||||
|
||||
Use the ImageScale, ScaleLatents, and Upscale nodes to upscale images and/or latent images. The chosen method differs across contexts. However, be aware that latents are already noisy and compressed at their original resolution; scaling an image could produce more detailed results.
|
||||
|
||||

|
||||
|
||||
### Iteration + Multiple Images as Input
|
||||
|
||||
Iteration is a common concept in any processing, and means to repeat a process with given input. In nodes, you're able to use the Iterate node to iterate through collections usually gathered by the Collect node. The Iterate node has many potential uses, from processing a collection of images one after another, to varying seeds across multiple image generations and more. This screenshot demonstrates how to collect several images and pass them out one at a time.
|
||||
|
||||

|
||||
|
||||
### Multiple Image Generation + Random Seeds
|
||||
|
||||
Multiple image generation in the node editor is done using the RandomRange node. In this case, the 'Size' field represents the number of images to generate. As RandomRange produces a collection of integers, we need to add the Iterate node to iterate through the collection.
|
||||
|
||||
To control seeds across generations takes some care. The first row in the screenshot will generate multiple images with different seeds, but using the same RandomRange parameters across invocations will result in the same group of random seeds being used across the images, producing repeatable results. In the second row, adding the RandomInt node as input to RandomRange's 'Seed' edge point will ensure that seeds are varied across all images across invocations, producing varied results.
|
||||
|
||||

|
||||
|
||||
## Examples
|
||||
|
||||
With our knowledge of node grouping and the diffusion process, let’s break down some basic graphs in the nodes editor. Note that a node's options can be overridden by inputs from other nodes. These examples aren't strict rules to follow and only demonstrate some basic configurations.
|
||||
|
||||
### Basic text-to-image Node Graph
|
||||
|
||||

|
||||
|
||||
- Model Loader: A necessity to generating images (as we’ve read above). We choose our model from the dropdown. It outputs a U-Net, CLIP tokenizer, and VAE.
|
||||
- Prompt (Compel): Another necessity. Two prompt nodes are created. One will output positive conditioning (what you want, ‘dog’), one will output negative (what you don’t want, ‘cat’). They both input the CLIP tokenizer that the Model Loader node outputs.
|
||||
- Noise: Consider this noise A from step one of the text-to-image explanation above. Choose a seed number, width, and height.
|
||||
- TextToLatents: This node takes many inputs for converting and processing text & noise from image space into latent space, hence the name TextTo**Latents**. In this setup, it inputs positive and negative conditioning from the prompt nodes for processing (step 2 above). It inputs noise from the noise node for processing (steps 2 & 3 above). Lastly, it inputs a U-Net from the Model Loader node for processing (step 2 above). It outputs latents for use in the next LatentsToImage node. Choose number of sampler steps, CFG scale, and scheduler.
|
||||
- LatentsToImage: This node takes in processed latents from the TextToLatents node, and the model’s VAE from the Model Loader node which is responsible for decoding latents back into the image space, hence the name LatentsTo**Image**. This node is the last stop, and once the image is decoded, it is saved to the gallery.
|
||||
|
||||
### Basic image-to-image Node Graph
|
||||
|
||||

|
||||
|
||||
- Model Loader: Choose a model from the dropdown.
|
||||
- Prompt (Compel): Two prompt nodes. One positive (dog), one negative (dog). Same CLIP inputs from the Model Loader node as before.
|
||||
- ImageToLatents: Upload a source image directly in the node window, via drag'n'drop from the gallery, or passed in as input. The ImageToLatents node inputs the VAE from the Model Loader node to decode the chosen image from image space into latent space, hence the name ImageTo**Latents**. It outputs latents for use in the next LatentsToLatents node. It also outputs the source image's width and height for use in the next Noise node if the final image is to be the same dimensions as the source image.
|
||||
- Noise: A noise tensor is created with the width and height of the source image, and connected to the next LatentsToLatents node. Notice the width and height fields are overridden by the input from the ImageToLatents width and height outputs.
|
||||
- LatentsToLatents: The inputs and options are nearly identical to TextToLatents, except that LatentsToLatents also takes latents as an input. Considering our source image is already converted to latents in the last ImageToLatents node, and text + noise are no longer the only inputs to process, we use the LatentsToLatents node.
|
||||
- LatentsToImage: Like previously, the LatentsToImage node will use the VAE from the Model Loader as input to decode the latents from LatentsToLatents into image space, and save it to the gallery.
|
||||
|
||||
### Basic ControlNet Node Graph
|
||||
|
||||

|
||||
|
||||
- Model Loader
|
||||
- Prompt (Compel)
|
||||
- Noise: Width and height of the CannyImageProcessor ControlNet image is passed in to set the dimensions of the noise passed to TextToLatents.
|
||||
- CannyImageProcessor: The CannyImageProcessor node is used to process the source image being used as a ControlNet. Each ControlNet processor node applies control in different ways, and has some different options to configure. Width and height are passed to noise, as mentioned. The processed ControlNet image is output to the ControlNet node.
|
||||
- ControlNet: Select the type of control model. In this case, canny is chosen as the CannyImageProcessor was used to generate the ControlNet image. Configure the control node options, and pass the control output to TextToLatents.
|
||||
- TextToLatents: Similar to the basic text-to-image example, except ControlNet is passed to the control input edge point.
|
||||
- LatentsToImage
|
||||
51
docs/features/OTHER.md
Normal file
@@ -0,0 +1,51 @@
|
||||
---
|
||||
title: Others
|
||||
---
|
||||
|
||||
# :fontawesome-regular-share-from-square: Others
|
||||
|
||||
## **Google Colab**
|
||||
|
||||
[{ align="right" }](https://colab.research.google.com/github/lstein/stable-diffusion/blob/main/notebooks/Stable_Diffusion_AI_Notebook.ipynb)
|
||||
|
||||
Open and follow instructions to use an isolated environment running Dream.
|
||||
|
||||
Output Example:
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## **Invisible Watermark**
|
||||
|
||||
In keeping with the principles for responsible AI generation, and to
|
||||
help AI researchers avoid synthetic images contaminating their
|
||||
training sets, InvokeAI adds an invisible watermark to each of the
|
||||
final images it generates. The watermark consists of the text
|
||||
"InvokeAI" and can be viewed using the
|
||||
[invisible-watermarks](https://github.com/ShieldMnt/invisible-watermark)
|
||||
tool.
|
||||
|
||||
Watermarking is controlled using the `invisible-watermark` setting in
|
||||
`invokeai.yaml`. To turn it off, add the following line under the `Features`
|
||||
category.
|
||||
|
||||
```
|
||||
invisible_watermark: false
|
||||
```
|
||||
|
||||
|
||||
## **Weighted Prompts**
|
||||
|
||||
You may weight different sections of the prompt to tell the sampler to attach different levels of
|
||||
priority to them, by adding `:<percent>` to the end of the section you wish to up- or downweight. For
|
||||
example consider this prompt:
|
||||
|
||||
```bash
|
||||
(tabby cat):0.25 (white duck):0.75 hybrid
|
||||
```
|
||||
|
||||
This will tell the sampler to invest 25% of its effort on the tabby cat aspect of the image and 75%
|
||||
on the white duck aspect (surprisingly, this example actually works). The prompt weights can use any
|
||||
combination of integers and floating point numbers, and they do not need to add up to 1.
|
||||
|
||||
122
docs/features/POSTPROCESS.md
Normal file
@@ -0,0 +1,122 @@
|
||||
---
|
||||
title: Postprocessing
|
||||
---
|
||||
|
||||
# :material-image-edit: Postprocessing
|
||||
|
||||
## Intro
|
||||
|
||||
This extension provides the ability to restore faces and upscale images.
|
||||
|
||||
## Face Fixing
|
||||
|
||||
The default face restoration module is GFPGAN. The default upscale is
|
||||
Real-ESRGAN. For an alternative face restoration module, see
|
||||
[CodeFormer Support](#codeformer-support) below.
|
||||
|
||||
As of version 1.14, environment.yaml will install the Real-ESRGAN package into
|
||||
the standard install location for python packages, and will put GFPGAN into a
|
||||
subdirectory of "src" in the InvokeAI directory. Upscaling with Real-ESRGAN
|
||||
should "just work" without further intervention. Simply indicate the desired scale on
|
||||
the popup in the Web GUI.
|
||||
|
||||
**GFPGAN** requires a series of downloadable model files to work. These are
|
||||
loaded when you run `invokeai-configure`. If GFPAN is failing with an
|
||||
error, please run the following from the InvokeAI directory:
|
||||
|
||||
```bash
|
||||
invokeai-configure
|
||||
```
|
||||
|
||||
If you do not run this script in advance, the GFPGAN module will attempt to
|
||||
download the models files the first time you try to perform facial
|
||||
reconstruction.
|
||||
|
||||
### Upscaling
|
||||
|
||||
Open the upscaling dialog by clicking on the "expand" icon located
|
||||
above the image display area in the Web UI:
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
There are three different upscaling parameters that you can
|
||||
adjust. The first is the scale itself, either 2x or 4x.
|
||||
|
||||
The second is the "Denoising Strength." Higher values will smooth out
|
||||
the image and remove digital chatter, but may lose fine detail at
|
||||
higher values.
|
||||
|
||||
Third, "Upscale Strength" allows you to adjust how the You can set the
|
||||
scaling stength between `0` and `1.0` to control the intensity of the
|
||||
scaling. AI upscalers generally tend to smooth out texture details. If
|
||||
you wish to retain some of those for natural looking results, we
|
||||
recommend using values between `0.5 to 0.8`.
|
||||
|
||||
[This figure](../assets/features/upscaling-montage.png) illustrates
|
||||
the effects of denoising and strength. The original image was 512x512,
|
||||
4x scaled to 2048x2048. The "original" version on the upper left was
|
||||
scaled using simple pixel averaging. The remainder use the ESRGAN
|
||||
upscaling algorithm at different levels of denoising and strength.
|
||||
|
||||
<figure markdown>
|
||||
{ width=720 }
|
||||
</figure>
|
||||
|
||||
Both denoising and strength default to 0.75.
|
||||
|
||||
### Face Restoration
|
||||
|
||||
InvokeAI offers alternative two face restoration algorithms,
|
||||
[GFPGAN](https://github.com/TencentARC/GFPGAN) and
|
||||
[CodeFormer](https://huggingface.co/spaces/sczhou/CodeFormer). These
|
||||
algorithms improve the appearance of faces, particularly eyes and
|
||||
mouths. Issues with faces are less common with the latest set of
|
||||
Stable Diffusion models than with the original 1.4 release, but the
|
||||
restoration algorithms can still make a noticeable improvement in
|
||||
certain cases. You can also apply restoration to old photographs you
|
||||
upload.
|
||||
|
||||
To access face restoration, click the "smiley face" icon in the
|
||||
toolbar above the InvokeAI image panel. You will be presented with a
|
||||
dialog that offers a choice between the two algorithm and sliders that
|
||||
allow you to adjust their parameters. Alternatively, you may open the
|
||||
left-hand accordion panel labeled "Face Restoration" and have the
|
||||
restoration algorithm of your choice applied to generated images
|
||||
automatically.
|
||||
|
||||
|
||||
Like upscaling, there are a number of parameters that adjust the face
|
||||
restoration output. GFPGAN has a single parameter, `strength`, which
|
||||
controls how much the algorithm is allowed to adjust the
|
||||
image. CodeFormer has two parameters, `strength`, and `fidelity`,
|
||||
which together control the quality of the output image as described in
|
||||
the [CodeFormer project
|
||||
page](https://shangchenzhou.com/projects/CodeFormer/). Default values
|
||||
are 0.75 for both parameters, which achieves a reasonable balance
|
||||
between changing the image too much and not enough.
|
||||
|
||||
[This figure](../assets/features/restoration-montage.png) illustrates
|
||||
the effects of adjusting GFPGAN and CodeFormer parameters.
|
||||
|
||||
<figure markdown>
|
||||
{ width=720 }
|
||||
</figure>
|
||||
|
||||
!!! note
|
||||
|
||||
GFPGAN and Real-ESRGAN are both memory intensive. In order to avoid crashes and memory overloads
|
||||
during the Stable Diffusion process, these effects are applied after Stable Diffusion has completed
|
||||
its work.
|
||||
|
||||
In single image generations, you will see the output right away but when you are using multiple
|
||||
iterations, the images will first be generated and then upscaled and face restored after that
|
||||
process is complete. While the image generation is taking place, you will still be able to preview
|
||||
the base images.
|
||||
|
||||
## How to disable
|
||||
|
||||
If, for some reason, you do not wish to load the GFPGAN and/or ESRGAN libraries,
|
||||
you can disable them on the invoke.py command line with the `--no_restore` and
|
||||
`--no_esrgan` options, respectively.
|
||||
348
docs/features/PROMPTS.md
Normal file
@@ -0,0 +1,348 @@
|
||||
---
|
||||
title: Prompting-Features
|
||||
---
|
||||
|
||||
# :octicons-command-palette-24: Prompting-Features
|
||||
|
||||
## **Negative and Unconditioned Prompts**
|
||||
|
||||
Any words between a pair of square brackets will instruct Stable
|
||||
Diffusion to attempt to ban the concept from the generated image. The
|
||||
same effect is achieved by placing words in the "Negative Prompts"
|
||||
textbox in the Web UI.
|
||||
|
||||
```text
|
||||
this is a test prompt [not really] to make you understand [cool] how this works.
|
||||
```
|
||||
|
||||
In the above statement, the words 'not really cool` will be ignored by Stable
|
||||
Diffusion.
|
||||
|
||||
Here's a prompt that depicts what it does.
|
||||
|
||||
original prompt:
|
||||
|
||||
`#!bash "A fantastical translucent pony made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve"`
|
||||
|
||||
`#!bash parameters: steps=20, dimensions=512x768, CFG=7.5, Scheduler=k_euler_a, seed=1654590180`
|
||||
|
||||
<figure markdown>
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
That image has a woman, so if we want the horse without a rider, we can
|
||||
influence the image not to have a woman by putting [woman] in the prompt, like
|
||||
this:
|
||||
|
||||
`#!bash "A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve [woman]"`
|
||||
(same parameters as above)
|
||||
|
||||
<figure markdown>
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
That's nice - but say we also don't want the image to be quite so blue. We can
|
||||
add "blue" to the list of negative prompts, so it's now [woman blue]:
|
||||
|
||||
`#!bash "A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve [woman blue]"`
|
||||
(same parameters as above)
|
||||
|
||||
<figure markdown>
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
Getting close - but there's no sense in having a saddle when our horse doesn't
|
||||
have a rider, so we'll add one more negative prompt: [woman blue saddle].
|
||||
|
||||
`#!bash "A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve [woman blue saddle]"`
|
||||
(same parameters as above)
|
||||
|
||||
<figure markdown>
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
!!! notes "Notes about this feature:"
|
||||
|
||||
* The only requirement for words to be ignored is that they are in between a pair of square brackets.
|
||||
* You can provide multiple words within the same bracket.
|
||||
* You can provide multiple brackets with multiple words in different places of your prompt. That works just fine.
|
||||
* To improve typical anatomy problems, you can add negative prompts like `[bad anatomy, extra legs, extra arms, extra fingers, poorly drawn hands, poorly drawn feet, disfigured, out of frame, tiling, bad art, deformed, mutated]`.
|
||||
|
||||
---
|
||||
|
||||
## **Prompt Syntax Features**
|
||||
|
||||
The InvokeAI prompting language has the following features:
|
||||
|
||||
### Attention weighting
|
||||
|
||||
Append a word or phrase with `-` or `+`, or a weight between `0` and `2`
|
||||
(`1`=default), to decrease or increase "attention" (= a mix of per-token CFG
|
||||
weighting multiplier and, for `-`, a weighted blend with the prompt without the
|
||||
term).
|
||||
|
||||
The following syntax is recognised:
|
||||
|
||||
- single words without parentheses: `a tall thin man picking apricots+`
|
||||
- single or multiple words with parentheses:
|
||||
`a tall thin man picking (apricots)+` `a tall thin man picking (apricots)-`
|
||||
`a tall thin man (picking apricots)+` `a tall thin man (picking apricots)-`
|
||||
- more effect with more symbols `a tall thin man (picking apricots)++`
|
||||
- nesting `a tall thin man (picking apricots+)++` (`apricots` effectively gets
|
||||
`+++`)
|
||||
- all of the above with explicit numbers `a tall thin man picking (apricots)1.1`
|
||||
`a tall thin man (picking (apricots)1.3)1.1`. (`+` is equivalent to 1.1, `++`
|
||||
is pow(1.1,2), `+++` is pow(1.1,3), etc; `-` means 0.9, `--` means pow(0.9,2),
|
||||
etc.)
|
||||
- attention also applies to `[unconditioning]` so
|
||||
`a tall thin man picking apricots [(ladder)0.01]` will _very gently_ nudge SD
|
||||
away from trying to draw the man on a ladder
|
||||
|
||||
You can use this to increase or decrease the amount of something. Starting from
|
||||
this prompt of `a man picking apricots from a tree`, let's see what happens if
|
||||
we increase and decrease how much attention we want Stable Diffusion to pay to
|
||||
the word `apricots`:
|
||||
|
||||
<figure markdown>
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
Using `-` to reduce apricot-ness:
|
||||
|
||||
| `a man picking apricots- from a tree` | `a man picking apricots-- from a tree` | `a man picking apricots--- from a tree` |
|
||||
| ------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
|  |  |  |
|
||||
|
||||
Using `+` to increase apricot-ness:
|
||||
|
||||
| `a man picking apricots+ from a tree` | `a man picking apricots++ from a tree` | `a man picking apricots+++ from a tree` | `a man picking apricots++++ from a tree` | `a man picking apricots+++++ from a tree` |
|
||||
| ------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
|  |  |  |  |  |
|
||||
|
||||
You can also change the balance between different parts of a prompt. For
|
||||
example, below is a `mountain man`:
|
||||
|
||||
<figure markdown>
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
And here he is with more mountain:
|
||||
|
||||
| `mountain+ man` | `mountain++ man` | `mountain+++ man` |
|
||||
| ---------------------------------------------- | ---------------------------------------------- | ---------------------------------------------- |
|
||||
|  |  |  |
|
||||
|
||||
Or, alternatively, with more man:
|
||||
|
||||
| `mountain man+` | `mountain man++` | `mountain man+++` | `mountain man++++` |
|
||||
| ---------------------------------------------- | ---------------------------------------------- | ---------------------------------------------- | ---------------------------------------------- |
|
||||
|  |  |  |  |
|
||||
|
||||
### Blending between prompts
|
||||
|
||||
- `("a tall thin man picking apricots", "a tall thin man picking pears").blend(1,1)`
|
||||
- The existing prompt blending using `:<weight>` will continue to be supported -
|
||||
`("a tall thin man picking apricots", "a tall thin man picking pears").blend(1,1)`
|
||||
is equivalent to
|
||||
`a tall thin man picking apricots:1 a tall thin man picking pears:1` in the
|
||||
old syntax.
|
||||
- Attention weights can be nested inside blends.
|
||||
- Non-normalized blends are supported by passing `no_normalize` as an additional
|
||||
argument to the blend weights, eg
|
||||
`("a tall thin man picking apricots", "a tall thin man picking pears").blend(1,-1,no_normalize)`.
|
||||
very fun to explore local maxima in the feature space, but also easy to
|
||||
produce garbage output.
|
||||
|
||||
See the section below on "Prompt Blending" for more information about how this
|
||||
works.
|
||||
|
||||
### Cross-Attention Control ('prompt2prompt')
|
||||
|
||||
Sometimes an image you generate is almost right, and you just want to change one
|
||||
detail without affecting the rest. You could use a photo editor and inpainting
|
||||
to overpaint the area, but that's a pain. Here's where `prompt2prompt` comes in
|
||||
handy.
|
||||
|
||||
Generate an image with a given prompt, record the seed of the image, and then
|
||||
use the `prompt2prompt` syntax to substitute words in the original prompt for
|
||||
words in a new prompt. This works for `img2img` as well.
|
||||
|
||||
For example, consider the prompt `a cat.swap(dog) playing with a ball in the forest`. Normally, because of the word words interact with each other when doing a stable diffusion image generation, these two prompts would generate different compositions:
|
||||
- `a cat playing with a ball in the forest`
|
||||
- `a dog playing with a ball in the forest`
|
||||
|
||||
| `a cat playing with a ball in the forest` | `a dog playing with a ball in the forest` |
|
||||
| --- | --- |
|
||||
| img | img |
|
||||
|
||||
|
||||
- For multiple word swaps, use parentheses: `a (fluffy cat).swap(barking dog) playing with a ball in the forest`.
|
||||
- To swap a comma, use quotes: `a ("fluffy, grey cat").swap("big, barking dog") playing with a ball in the forest`.
|
||||
- Supports options `t_start` and `t_end` (each 0-1) loosely corresponding to bloc97's `prompt_edit_tokens_start/_end` but with the math swapped to make it easier to
|
||||
intuitively understand. `t_start` and `t_end` are used to control on which steps cross-attention control should run. With the default values `t_start=0` and `t_end=1`, cross-attention control is active on every step of image generation. Other values can be used to turn cross-attention control off for part of the image generation process.
|
||||
- For example, if doing a diffusion with 10 steps for the prompt is `a cat.swap(dog, t_start=0.3, t_end=1.0) playing with a ball in the forest`, the first 3 steps will be run as `a cat playing with a ball in the forest`, while the last 7 steps will run as `a dog playing with a ball in the forest`, but the pixels that represent `dog` will be locked to the pixels that would have represented `cat` if the `cat` prompt had been used instead.
|
||||
- Conversely, for `a cat.swap(dog, t_start=0, t_end=0.7) playing with a ball in the forest`, the first 7 steps will run as `a dog playing with a ball in the forest` with the pixels that represent `dog` locked to the same pixels that would have represented `cat` if the `cat` prompt was being used instead. The final 3 steps will just run `a cat playing with a ball in the forest`.
|
||||
> For img2img, the step sequence does not start at 0 but instead at `(1.0-strength)` - so if the img2img `strength` is `0.7`, `t_start` and `t_end` must both be greater than `0.3` (`1.0-0.7`) to have any effect.
|
||||
|
||||
Prompt2prompt `.swap()` is not compatible with xformers, which will be temporarily disabled when doing a `.swap()` - so you should expect to use more VRAM and run slower that with xformers enabled.
|
||||
|
||||
The `prompt2prompt` code is based off
|
||||
[bloc97's colab](https://github.com/bloc97/CrossAttentionControl).
|
||||
|
||||
### Escaping parantheses () and speech marks ""
|
||||
|
||||
If the model you are using has parentheses () or speech marks "" as part of its
|
||||
syntax, you will need to "escape" these using a backslash, so that`(my_keyword)`
|
||||
becomes `\(my_keyword\)`. Otherwise, the prompt parser will attempt to interpret
|
||||
the parentheses as part of the prompt syntax and it will get confused.
|
||||
|
||||
---
|
||||
|
||||
## **Prompt Blending**
|
||||
|
||||
You may blend together different sections of the prompt to explore the AI's
|
||||
latent semantic space and generate interesting (and often surprising!)
|
||||
variations. The syntax is:
|
||||
|
||||
```bash
|
||||
blue sphere:0.25 red cube:0.75 hybrid
|
||||
```
|
||||
|
||||
This will tell the sampler to blend 25% of the concept of a blue sphere with 75%
|
||||
of the concept of a red cube. The blend weights can use any combination of
|
||||
integers and floating point numbers, and they do not need to add up to 1.
|
||||
Everything to the left of the `:XX` up to the previous `:XX` is used for
|
||||
merging, so the overall effect is:
|
||||
|
||||
```bash
|
||||
0.25 * "blue sphere" + 0.75 * "white duck" + hybrid
|
||||
```
|
||||
|
||||
Because you are exploring the "mind" of the AI, the AI's way of mixing two
|
||||
concepts may not match yours, leading to surprising effects. To illustrate, here
|
||||
are three images generated using various combinations of blend weights. As
|
||||
usual, unless you fix the seed, the prompts will give you different results each
|
||||
time you run them.
|
||||
|
||||
<figure markdown>
|
||||
|
||||
### "blue sphere, red cube, hybrid"
|
||||
|
||||
</figure>
|
||||
|
||||
This example doesn't use melding at all and represents the default way of mixing
|
||||
concepts.
|
||||
|
||||
<figure markdown>
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
It's interesting to see how the AI expressed the concept of "cube" as the four
|
||||
quadrants of the enclosing frame. If you look closely, there is depth there, so
|
||||
the enclosing frame is actually a cube.
|
||||
|
||||
<figure markdown>
|
||||
|
||||
### "blue sphere:0.25 red cube:0.75 hybrid"
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
Now that's interesting. We get neither a blue sphere nor a red cube, but a red
|
||||
sphere embedded in a brick wall, which represents a melding of concepts within
|
||||
the AI's "latent space" of semantic representations. Where is Ludwig
|
||||
Wittgenstein when you need him?
|
||||
|
||||
<figure markdown>
|
||||
|
||||
### "blue sphere:0.75 red cube:0.25 hybrid"
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
Definitely more blue-spherey. The cube is gone entirely, but it's really cool
|
||||
abstract art.
|
||||
|
||||
<figure markdown>
|
||||
|
||||
### "blue sphere:0.5 red cube:0.5 hybrid"
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
Whoa...! I see blue and red, but no spheres or cubes. Is the word "hybrid"
|
||||
summoning up the concept of some sort of scifi creature? Let's find out.
|
||||
|
||||
<figure markdown>
|
||||
|
||||
### "blue sphere:0.5 red cube:0.5"
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
Indeed, removing the word "hybrid" produces an image that is more like what we'd
|
||||
expect.
|
||||
|
||||
## Dynamic Prompts
|
||||
|
||||
Dynamic Prompts are a powerful feature designed to produce a variety of prompts based on user-defined options. Using a special syntax, you can construct a prompt with multiple possibilities, and the system will automatically generate a series of permutations based on your settings. This is extremely beneficial for ideation, exploring various scenarios, or testing different concepts swiftly and efficiently.
|
||||
|
||||
### Structure of a Dynamic Prompt
|
||||
|
||||
A Dynamic Prompt comprises of regular text, supplemented with alternatives enclosed within curly braces {} and separated by a vertical bar |. For example: {option1|option2|option3}. The system will then select one of the options to include in the final prompt. This flexible system allows for options to be placed throughout the text as needed.
|
||||
|
||||
Furthermore, Dynamic Prompts can designate multiple selections from a single group of options. This feature is triggered by prefixing the options with a numerical value followed by $$. For example, in {2$$option1|option2|option3}, the system will select two distinct options from the set.
|
||||
### Creating Dynamic Prompts
|
||||
|
||||
To create a Dynamic Prompt, follow these steps:
|
||||
|
||||
Draft your sentence or phrase, identifying words or phrases with multiple possible options.
|
||||
Encapsulate the different options within curly braces {}.
|
||||
Within the braces, separate each option using a vertical bar |.
|
||||
If you want to include multiple options from a single group, prefix with the desired number and $$.
|
||||
|
||||
For instance: A {house|apartment|lodge|cottage} in {summer|winter|autumn|spring} designed in {2$$style1|style2|style3}.
|
||||
### How Dynamic Prompts Work
|
||||
|
||||
Once a Dynamic Prompt is configured, the system generates an array of combinations using the options provided. Each group of options in curly braces is treated independently, with the system selecting one option from each group. For a prefixed set (e.g., 2$$), the system will select two distinct options.
|
||||
|
||||
For example, the following prompts could be generated from the above Dynamic Prompt:
|
||||
|
||||
A house in summer designed in style1, style2
|
||||
A lodge in autumn designed in style3, style1
|
||||
A cottage in winter designed in style2, style3
|
||||
And many more!
|
||||
|
||||
When the `Combinatorial` setting is on, Invoke will disable the "Images" selection, and generate every combination up until the setting for Max Prompts is reached.
|
||||
When the `Combinatorial` setting is off, Invoke will randomly generate combinations up until the setting for Images has been reached.
|
||||
|
||||
|
||||
|
||||
### Tips and Tricks for Using Dynamic Prompts
|
||||
|
||||
Below are some useful strategies for creating Dynamic Prompts:
|
||||
|
||||
Utilize Dynamic Prompts to generate a wide spectrum of prompts, perfect for brainstorming and exploring diverse ideas.
|
||||
Ensure that the options within a group are contextually relevant to the part of the sentence where they are used. For instance, group building types together, and seasons together.
|
||||
Apply the 2$$ prefix when you want to incorporate more than one option from a single group. This becomes quite handy when mixing and matching different elements.
|
||||
Experiment with different quantities for the prefix. For example, 3$$ will select three distinct options.
|
||||
Be aware of coherence in your prompts. Although the system can generate all possible combinations, not all may semantically make sense. Therefore, carefully choose the options for each group.
|
||||
Always review and fine-tune the generated prompts as needed. While Dynamic Prompts can help you generate a multitude of combinations, the final polishing and refining remain in your hands.
|
||||
286
docs/features/TRAINING.md
Normal file
@@ -0,0 +1,286 @@
|
||||
---
|
||||
title: Training
|
||||
---
|
||||
|
||||
# :material-file-document: Training
|
||||
|
||||
# Textual Inversion Training
|
||||
## **Personalizing Text-to-Image Generation**
|
||||
|
||||
You may personalize the generated images to provide your own styles or objects
|
||||
by training a new LDM checkpoint and introducing a new vocabulary to the fixed
|
||||
model as a (.pt) embeddings file. Alternatively, you may use or train
|
||||
HuggingFace Concepts embeddings files (.bin) from
|
||||
<https://huggingface.co/sd-concepts-library> and its associated
|
||||
notebooks.
|
||||
|
||||
## **Hardware and Software Requirements**
|
||||
|
||||
You will need a GPU to perform training in a reasonable length of
|
||||
time, and at least 12 GB of VRAM. We recommend using the [`xformers`
|
||||
library](../installation/070_INSTALL_XFORMERS.md) to accelerate the
|
||||
training process further. During training, about ~8 GB is temporarily
|
||||
needed in order to store intermediate models, checkpoints and logs.
|
||||
|
||||
## **Preparing for Training**
|
||||
|
||||
To train, prepare a folder that contains 3-5 images that illustrate
|
||||
the object or concept. It is good to provide a variety of examples or
|
||||
poses to avoid overtraining the system. Format these images as PNG
|
||||
(preferred) or JPG. You do not need to resize or crop the images in
|
||||
advance, but for more control you may wish to do so.
|
||||
|
||||
Place the training images in a directory on the machine InvokeAI runs
|
||||
on. We recommend placing them in a subdirectory of the
|
||||
`text-inversion-training-data` folder located in the InvokeAI root
|
||||
directory, ordinarily `~/invokeai` (Linux/Mac), or
|
||||
`C:\Users\your_name\invokeai` (Windows). For example, to create an
|
||||
embedding for the "psychedelic" style, you'd place the training images
|
||||
into the directory
|
||||
`~invokeai/text-inversion-training-data/psychedelic`.
|
||||
|
||||
## **Launching Training Using the Console Front End**
|
||||
|
||||
InvokeAI 2.3 and higher comes with a text console-based training front
|
||||
end. From within the `invoke.sh`/`invoke.bat` Invoke launcher script,
|
||||
start the front end by selecting choice (3):
|
||||
|
||||
```sh
|
||||
Do you want to generate images using the
|
||||
1: Browser-based UI
|
||||
2: Command-line interface
|
||||
3: Run textual inversion training
|
||||
4: Merge models (diffusers type only)
|
||||
5: Download and install models
|
||||
6: Change InvokeAI startup options
|
||||
7: Re-run the configure script to fix a broken install
|
||||
8: Open the developer console
|
||||
9: Update InvokeAI
|
||||
10: Command-line help
|
||||
Q: Quit
|
||||
|
||||
Please enter 1-10, Q: [1]
|
||||
```
|
||||
|
||||
From the command line, with the InvokeAI virtual environment active,
|
||||
you can launch the front end with the command `invokeai-ti --gui`.
|
||||
|
||||
This will launch a text-based front end that will look like this:
|
||||
|
||||
<figure markdown>
|
||||

|
||||
</figure>
|
||||
|
||||
The interface is keyboard-based. Move from field to field using
|
||||
control-N (^N) to move to the next field and control-P (^P) to the
|
||||
previous one. <Tab> and <shift-TAB> work as well. Once a field is
|
||||
active, use the cursor keys. In a checkbox group, use the up and down
|
||||
cursor keys to move from choice to choice, and <space> to select a
|
||||
choice. In a scrollbar, use the left and right cursor keys to increase
|
||||
and decrease the value of the scroll. In textfields, type the desired
|
||||
values.
|
||||
|
||||
The number of parameters may look intimidating, but in most cases the
|
||||
predefined defaults work fine. The red circled fields in the above
|
||||
illustration are the ones you will adjust most frequently.
|
||||
|
||||
### Model Name
|
||||
|
||||
This will list all the diffusers models that are currently
|
||||
installed. Select the one you wish to use as the basis for your
|
||||
embedding. Be aware that if you use a SD-1.X-based model for your
|
||||
training, you will only be able to use this embedding with other
|
||||
SD-1.X-based models. Similarly, if you train on SD-2.X, you will only
|
||||
be able to use the embeddings with models based on SD-2.X.
|
||||
|
||||
### Trigger Term
|
||||
|
||||
This is the prompt term you will use to trigger the embedding. Type a
|
||||
single word or phrase you wish to use as the trigger, example
|
||||
"psychedelic" (without angle brackets). Within InvokeAI, you will then
|
||||
be able to activate the trigger using the syntax `<psychedelic>`.
|
||||
|
||||
### Initializer
|
||||
|
||||
This is a single character that is used internally during the training
|
||||
process as a placeholder for the trigger term. It defaults to "*" and
|
||||
can usually be left alone.
|
||||
|
||||
### Resume from last saved checkpoint
|
||||
|
||||
As training proceeds, textual inversion will write a series of
|
||||
intermediate files that can be used to resume training from where it
|
||||
was left off in the case of an interruption. This checkbox will be
|
||||
automatically selected if you provide a previously used trigger term
|
||||
and at least one checkpoint file is found on disk.
|
||||
|
||||
Note that as of 20 January 2023, resume does not seem to be working
|
||||
properly due to an issue with the upstream code.
|
||||
|
||||
### Data Training Directory
|
||||
|
||||
This is the location of the images to be used for training. When you
|
||||
select a trigger term like "my-trigger", the frontend will prepopulate
|
||||
this field with `~/invokeai/text-inversion-training-data/my-trigger`,
|
||||
but you can change the path to wherever you want.
|
||||
|
||||
### Output Destination Directory
|
||||
|
||||
This is the location of the logs, checkpoint files, and embedding
|
||||
files created during training. When you select a trigger term like
|
||||
"my-trigger", the frontend will prepopulate this field with
|
||||
`~/invokeai/text-inversion-output/my-trigger`, but you can change the
|
||||
path to wherever you want.
|
||||
|
||||
### Image resolution
|
||||
|
||||
The images in the training directory will be automatically scaled to
|
||||
the value you use here. For best results, you will want to use the
|
||||
same default resolution of the underlying model (512 pixels for
|
||||
SD-1.5, 768 for the larger version of SD-2.1).
|
||||
|
||||
### Center crop images
|
||||
|
||||
If this is selected, your images will be center cropped to make them
|
||||
square before resizing them to the desired resolution. Center cropping
|
||||
can indiscriminately cut off the top of subjects' heads for portrait
|
||||
aspect images, so if you have images like this, you may wish to use a
|
||||
photoeditor to manually crop them to a square aspect ratio.
|
||||
|
||||
### Mixed precision
|
||||
|
||||
Select the floating point precision for the embedding. "no" will
|
||||
result in a full 32-bit precision, "fp16" will provide 16-bit
|
||||
precision, and "bf16" will provide mixed precision (only available
|
||||
when XFormers is used).
|
||||
|
||||
### Max training steps
|
||||
|
||||
How many steps the training will take before the model converges. Most
|
||||
training sets will converge with 2000-3000 steps.
|
||||
|
||||
### Batch size
|
||||
|
||||
This adjusts how many training images are processed simultaneously in
|
||||
each step. Higher values will cause the training process to run more
|
||||
quickly, but use more memory. The default size will run with GPUs with
|
||||
as little as 12 GB.
|
||||
|
||||
### Learning rate
|
||||
|
||||
The rate at which the system adjusts its internal weights during
|
||||
training. Higher values risk overtraining (getting the same image each
|
||||
time), and lower values will take more steps to train a good
|
||||
model. The default of 0.0005 is conservative; you may wish to increase
|
||||
it to 0.005 to speed up training.
|
||||
|
||||
### Scale learning rate by number of GPUs, steps and batch size
|
||||
|
||||
If this is selected (the default) the system will adjust the provided
|
||||
learning rate to improve performance.
|
||||
|
||||
### Use xformers acceleration
|
||||
|
||||
This will activate XFormers memory-efficient attention. You need to
|
||||
have XFormers installed for this to have an effect.
|
||||
|
||||
### Learning rate scheduler
|
||||
|
||||
This adjusts how the learning rate changes over the course of
|
||||
training. The default "constant" means to use a constant learning rate
|
||||
for the entire training session. The other values scale the learning
|
||||
rate according to various formulas.
|
||||
|
||||
Only "constant" is supported by the XFormers library.
|
||||
|
||||
### Gradient accumulation steps
|
||||
|
||||
This is a parameter that allows you to use bigger batch sizes than
|
||||
your GPU's VRAM would ordinarily accommodate, at the cost of some
|
||||
performance.
|
||||
|
||||
### Warmup steps
|
||||
|
||||
If "constant_with_warmup" is selected in the learning rate scheduler,
|
||||
then this provides the number of warmup steps. Warmup steps have a
|
||||
very low learning rate, and are one way of preventing early
|
||||
overtraining.
|
||||
|
||||
## The training run
|
||||
|
||||
Start the training run by advancing to the OK button (bottom right)
|
||||
and pressing <enter>. A series of progress messages will be displayed
|
||||
as the training process proceeds. This may take an hour or two,
|
||||
depending on settings and the speed of your system. Various log and
|
||||
checkpoint files will be written into the output directory (ordinarily
|
||||
`~/invokeai/text-inversion-output/my-model/`)
|
||||
|
||||
At the end of successful training, the system will copy the file
|
||||
`learned_embeds.bin` into the InvokeAI root directory's `embeddings`
|
||||
directory, using a subdirectory named after the trigger token. For
|
||||
example, if the trigger token was `psychedelic`, then look for the
|
||||
embeddings file in
|
||||
`~/invokeai/embeddings/psychedelic/learned_embeds.bin`
|
||||
|
||||
You may now launch InvokeAI and try out a prompt that uses the trigger
|
||||
term. For example `a plate of banana sushi in <psychedelic> style`.
|
||||
|
||||
## **Training with the Command-Line Script**
|
||||
|
||||
Training can also be done using a traditional command-line script. It
|
||||
can be launched from within the "developer's console", or from the
|
||||
command line after activating InvokeAI's virtual environment.
|
||||
|
||||
It accepts a large number of arguments, which can be summarized by
|
||||
passing the `--help` argument:
|
||||
|
||||
```sh
|
||||
invokeai-ti --help
|
||||
```
|
||||
|
||||
Typical usage is shown here:
|
||||
```sh
|
||||
invokeai-ti \
|
||||
--model=stable-diffusion-1.5 \
|
||||
--resolution=512 \
|
||||
--learnable_property=style \
|
||||
--initializer_token='*' \
|
||||
--placeholder_token='<psychedelic>' \
|
||||
--train_data_dir=/home/lstein/invokeai/training-data/psychedelic \
|
||||
--output_dir=/home/lstein/invokeai/text-inversion-training/psychedelic \
|
||||
--scale_lr \
|
||||
--train_batch_size=8 \
|
||||
--gradient_accumulation_steps=4 \
|
||||
--max_train_steps=3000 \
|
||||
--learning_rate=0.0005 \
|
||||
--resume_from_checkpoint=latest \
|
||||
--lr_scheduler=constant \
|
||||
--mixed_precision=fp16 \
|
||||
--only_save_embeds
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### `Cannot load embedding for <trigger>. It was trained on a model with token dimension 1024, but the current model has token dimension 768`
|
||||
|
||||
Messages like this indicate you trained the embedding on a different base model than the currently selected one.
|
||||
|
||||
For example, in the error above, the training was done on SD2.1 (768x768) but it was used on SD1.5 (512x512).
|
||||
|
||||
## Reading
|
||||
|
||||
For more information on textual inversion, please see the following
|
||||
resources:
|
||||
|
||||
* The [textual inversion repository](https://github.com/rinongal/textual_inversion) and
|
||||
associated paper for details and limitations.
|
||||
* [HuggingFace's textual inversion training
|
||||
page](https://huggingface.co/docs/diffusers/training/text_inversion)
|
||||
* [HuggingFace example script
|
||||
documentation](https://github.com/huggingface/diffusers/tree/main/examples/textual_inversion)
|
||||
(Note that this script is similar to, but not identical, to
|
||||
`textual_inversion`, but produces embed files that are completely compatible.
|
||||
|
||||
---
|
||||
|
||||
copyright (c) 2023, Lincoln Stein and the InvokeAI Development Team
|
||||
284
docs/features/UNIFIED_CANVAS.md
Normal file
@@ -0,0 +1,284 @@
|
||||
---
|
||||
title: Unified Canvas
|
||||
---
|
||||
|
||||
The Unified Canvas is a tool designed to streamline and simplify the process of
|
||||
composing an image using Stable Diffusion. It offers artists all of the
|
||||
available Stable Diffusion generation modes (Text To Image, Image To Image,
|
||||
Inpainting, and Outpainting) as a single unified workflow. The flexibility of
|
||||
the tool allows you to tweak and edit image generations, extend images beyond
|
||||
their initial size, and to create new content in a freeform way both inside and
|
||||
outside of existing images.
|
||||
|
||||
This document explains the basics of using the Unified Canvas, introducing you
|
||||
to its features and tools one by one. It also describes some of the more
|
||||
advanced tools available to power users of the Canvas.
|
||||
|
||||
## Basics
|
||||
|
||||
The Unified Canvas consists of two layers: the **Base Layer** and the **Mask
|
||||
Layer**. You can swap from one layer to the other by selecting the layer you
|
||||
want in the drop-down menu on the top left corner of the Unified Canvas, or by
|
||||
pressing the (Q) hotkey.
|
||||
|
||||
### Base Layer
|
||||
|
||||
The **Base Layer** is the image content currently managed by the Canvas, and can
|
||||
be exported at any time to the gallery by using the **Save to Gallery** option.
|
||||
When the Base Layer is selected, the Brush (B) and Eraser (E) tools will
|
||||
directly manipulate the base layer. Any images uploaded to the Canvas, or sent
|
||||
to the Unified Canvas from the gallery, will clear out all existing content and
|
||||
set the Base layer to the new image.
|
||||
|
||||
### Staging Area
|
||||
|
||||
When you generate images, they will display in the Canvas's **Staging Area**,
|
||||
alongside the Staging Area toolbar buttons. While the Staging Area is active,
|
||||
you cannot interact with the Canvas itself.
|
||||
|
||||
<figure markdown>
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
Accepting generations will commit the new generation to the **Base Layer**. You
|
||||
can review all generated images using the Prev/Next arrows, save any individual
|
||||
generations to your gallery (without committing to the Base layer) or discard
|
||||
generations. While you can Undo a discard in an individual Canvas session, any
|
||||
generations that are not saved will be lost when the Canvas resets.
|
||||
|
||||
### Mask Layer
|
||||
|
||||
The **Mask Layer** consists of any masked sections that have been created to
|
||||
inform Inpainting generations. You can paint a new mask, or edit an existing
|
||||
mask, using the Brush tool and the Eraser with the Mask layer set as your Active
|
||||
layer. Any masked areas will only affect generation inside of the current
|
||||
bounding box.
|
||||
|
||||
### Bounding Box
|
||||
|
||||
When generating a new image, Invoke will process and apply new images within the
|
||||
area denoted by the **Bounding Box**. The Width & Height settings of the
|
||||
Bounding Box, as well as its location within the Unified Canvas and pixels or
|
||||
empty space that it encloses, determine how new invocations are generated - see
|
||||
[Inpainting & Outpainting](#inpainting-and-outpainting) below. The Bounding Box
|
||||
can be moved and resized using the Move (V) tool. It can also be resized using
|
||||
the Bounding Box options in the Options Panel. By using these controls you can
|
||||
generate larger or smaller images, control which sections of the image are being
|
||||
processed, as well as control Bounding Box tools like the Bounding Box
|
||||
fill/erase.
|
||||
|
||||
### <a name="inpainting-and-outpainting"></a> Inpainting & Outpainting
|
||||
|
||||
"Inpainting" means asking the AI to refine part of an image while leaving the
|
||||
rest alone. For example, updating a portrait of your grandmother to have her
|
||||
wear a biker's jacket.
|
||||
|
||||
| masked original | inpaint result |
|
||||
| :-------------------------------------------------------------: | :----------------------------------------------------------------------------------------: |
|
||||
|  |  |
|
||||
|
||||
"Outpainting" means asking the AI to expand the original image beyond its
|
||||
original borders, making a bigger image that's still based on the original. For
|
||||
example, extending the above image of your Grandmother in a biker's jacket to
|
||||
include her wearing jeans (and while we're at it, a motorcycle!)
|
||||
|
||||
<figure markdown>
|
||||
|
||||

|
||||
|
||||
</figure>
|
||||
|
||||
When you are using the Unified Canvas, Invoke decides automatically whether to
|
||||
do Inpainting, Outpainting, ImageToImage, or TextToImage by looking inside the
|
||||
area enclosed by the Bounding Box. It chooses the appropriate type of generation
|
||||
based on whether the Bounding Box contains empty (transparent) areas on the Base
|
||||
layer, or whether it contains colored areas from previous generations (or from
|
||||
painted brushstrokes) on the Base layer, and/or whether the Mask layer contains
|
||||
any brushstrokes. See [Generation Methods](#generation-methods) below for more
|
||||
information.
|
||||
|
||||
## Getting Started
|
||||
|
||||
To get started with the Unified Canvas, you will want to generate a new base
|
||||
layer using Txt2Img or importing an initial image. We'll refer to either of
|
||||
these methods as the "initial image" in the below guide.
|
||||
|
||||
From there, you can consider the following techniques to augment your image:
|
||||
|
||||
- **New Images**: Move the bounding box to an empty area of the Canvas, type in
|
||||
your prompt, and Invoke, to generate a new image using the Text to Image
|
||||
function.
|
||||
- **Image Correction**: Use the color picker and brush tool to paint corrections
|
||||
on the image, switch to the Mask layer, and brush a mask over your painted
|
||||
area to use **Inpainting**. You can also use the **ImageToImage** generation
|
||||
method to invoke new interpretations of the image.
|
||||
- **Image Expansion**: Move the bounding box to include a portion of your
|
||||
initial image, and a portion of transparent/empty pixels, then Invoke using a
|
||||
prompt that describes what you'd like to see in that area. This will Outpaint
|
||||
the image. You'll typically find more coherent results if you keep about
|
||||
50-60% of the original image in the bounding box. Make sure that the Image To
|
||||
Image Strength slider is set to a high value - you may need to set it higher
|
||||
than you are used to.
|
||||
- **New Content on Existing Images**: If you want to add new details or objects
|
||||
into your image, use the brush tool to paint a sketch of what you'd like to
|
||||
see on the image, switch to the Mask layer, and brush a mask over your painted
|
||||
area to use **Inpainting**. If the masked area is small, consider using a
|
||||
smaller bounding box to take advantage of Invoke's automatic Scaling features,
|
||||
which can help to produce better details.
|
||||
- **And more**: There are a number of creative ways to use the Canvas, and the
|
||||
above are just starting points. We're excited to see what you come up with!
|
||||
|
||||
## <a name="generation-methods"></a> Generation Methods
|
||||
|
||||
The Canvas can use all generation methods available (Txt2Img, Img2Img,
|
||||
Inpainting, and Outpainting), and these will be automatically selected and used
|
||||
based on the current selection area within the Bounding Box.
|
||||
|
||||
### Text to Image
|
||||
|
||||
If the Bounding Box is placed over an area of Canvas with an **empty Base
|
||||
Layer**, invoking a new image will use **TextToImage**. This generates an
|
||||
entirely new image based on your prompt.
|
||||
|
||||
### Image to Image
|
||||
|
||||
If the Bounding Box is placed over an area of Canvas with an **existing Base
|
||||
Layer area with no transparent pixels or masks**, invoking a new image will use
|
||||
**ImageToImage**. This uses the image within the bounding box and your prompt to
|
||||
interpret a new image. The image will be closer to your original image at lower
|
||||
Image to Image strengths.
|
||||
|
||||
### Inpainting
|
||||
|
||||
If the Bounding Box is placed over an area of Canvas with an **existing Base
|
||||
Layer and any pixels selected using the Mask layer**, invoking a new image will
|
||||
use **Inpainting**. Inpainting uses the existing colors/forms in the masked area
|
||||
in order to generate a new image for the masked area only. The unmasked portion
|
||||
of the image will remain the same. Image to Image strength applies to the
|
||||
inpainted area.
|
||||
|
||||
If you desire something completely different from the original image in your new
|
||||
generation (i.e., if you want Invoke to ignore existing colors/forms), consider
|
||||
toggling the Inpaint Replace setting on, and use high values for both Inpaint
|
||||
Replace and Image To Image Strength.
|
||||
|
||||
!!! note
|
||||
|
||||
By default, the **Scale Before Processing** option — which
|
||||
inpaints more coherent details by generating at a larger resolution and then
|
||||
scaling — is only activated when the Bounding Box is relatively small.
|
||||
To get the best inpainting results you should therefore resize your Bounding
|
||||
Box to the smallest area that contains your mask and enough surrounding detail
|
||||
to help Stable Diffusion understand the context of what you want it to draw.
|
||||
You should also update your prompt so that it describes _just_ the area within
|
||||
the Bounding Box.
|
||||
|
||||
### Outpainting
|
||||
|
||||
If the Bounding Box is placed over an area of Canvas partially filled by an
|
||||
existing Base Layer area and partially by transparent pixels or masks, invoking
|
||||
a new image will use **Outpainting**, as well as **Inpainting** any masked
|
||||
areas.
|
||||
|
||||
---
|
||||
|
||||
## Advanced Features
|
||||
|
||||
Features with non-obvious behavior are detailed below, in order to provide
|
||||
clarity on the intent and common use cases we expect for utilizing them.
|
||||
|
||||
### Toolbar
|
||||
|
||||
#### Mask Options
|
||||
|
||||
- **Enable Mask** - This flag can be used to Enable or Disable the currently
|
||||
painted mask. If you have painted a mask, but you don't want it affect the
|
||||
next invocation, but you _also_ don't want to delete it, then you can set this
|
||||
option to Disable. When you want the mask back, set this back to Enable.
|
||||
- **Preserve Masked Area** - When enabled, Preserve Masked Area inverts the
|
||||
effect of the Mask on the Inpainting process. Pixels in masked areas will be
|
||||
kept unchanged, and unmasked areas will be regenerated.
|
||||
|
||||
#### Creative Tools
|
||||
|
||||
- **Brush - Base/Mask Modes** - The Brush tool switches automatically between
|
||||
different modes of operation for the Base and Mask layers respectively.
|
||||
- On the Base layer, the brush will directly paint on the Canvas using the
|
||||
color selected on the Brush Options menu.
|
||||
- On the Mask layer, the brush will create a new mask. If you're finding the
|
||||
mask difficult to see over the existing content of the Unified Canvas, you
|
||||
can change the color it is drawn with using the color selector on the Mask
|
||||
Options dropdown.
|
||||
- **Erase Bounding Box** - On the Base layer, erases all pixels within the
|
||||
Bounding Box.
|
||||
- **Fill Bounding Box** - On the Base layer, fills all pixels within the
|
||||
Bounding Box with the currently selected color.
|
||||
|
||||
#### Canvas Tools
|
||||
|
||||
- **Move Tool** - Allows for manipulation of the Canvas view (by dragging on the
|
||||
Canvas, outside the bounding box), the Bounding Box (by dragging the edges of
|
||||
the box), or the Width/Height of the Bounding Box (by dragging one of the 9
|
||||
directional handles).
|
||||
- **Reset View** - Click to re-orients the view to the center of the Bounding
|
||||
Box.
|
||||
- **Merge Visible** - If your browser is having performance problems drawing the
|
||||
image in the Unified Canvas, click this to consolidate all of the information
|
||||
currently being rendered by your browser into a merged copy of the image. This
|
||||
lowers the resource requirements and should improve performance.
|
||||
|
||||
### Seam Correction
|
||||
|
||||
When doing Inpainting or Outpainting, Invoke needs to merge the pixels generated
|
||||
by Stable Diffusion into your existing image. To do this, the area around the
|
||||
`seam` at the boundary between your image and the new generation is
|
||||
automatically blended to produce a seamless output. In a fully automatic
|
||||
process, a mask is generated to cover the seam, and then the area of the seam is
|
||||
Inpainted.
|
||||
|
||||
Although the default options should work well most of the time, sometimes it can
|
||||
help to alter the parameters that control the seam Inpainting. A wider seam and
|
||||
a blur setting of about 1/3 of the seam have been noted as producing
|
||||
consistently strong results (e.g. 96 wide and 16 blur - adds up to 32 blur with
|
||||
both sides). Seam strength of 0.7 is best for reducing hard seams.
|
||||
|
||||
- **Seam Size** - The size of the seam masked area. Set higher to make a larger
|
||||
mask around the seam.
|
||||
- **Seam Blur** - The size of the blur that is applied on _each_ side of the
|
||||
masked area.
|
||||
- **Seam Strength** - The Image To Image Strength parameter used for the
|
||||
Inpainting generation that is applied to the seam area.
|
||||
- **Seam Steps** - The number of generation steps that should be used to Inpaint
|
||||
the seam.
|
||||
|
||||
### Infill & Scaling
|
||||
|
||||
- **Scale Before Processing & W/H**: When generating images with a bounding box
|
||||
smaller than the optimized W/H of the model (e.g., 512x512 for SD1.5), this
|
||||
feature first generates at a larger size with the same aspect ratio, and then
|
||||
scales that image down to fill the selected area. This is particularly useful
|
||||
when inpainting very small details. Scaling is optional but is enabled by
|
||||
default.
|
||||
- **Inpaint Replace**: When Inpainting, the default method is to utilize the
|
||||
existing RGB values of the Base layer to inform the generation process. If
|
||||
Inpaint Replace is enabled, noise is generated and blended with the existing
|
||||
pixels (completely replacing the original RGB values at an Inpaint Replace
|
||||
value of 1). This can help generate more variation from the pixels on the Base
|
||||
layers.
|
||||
- When using Inpaint Replace you should use a higher Image To Image Strength
|
||||
value, especially at higher Inpaint Replace values
|
||||
- **Infill Method**: Invoke currently supports two methods for producing RGB
|
||||
values for use in the Outpainting process: Patchmatch and Tile. We believe
|
||||
that Patchmatch is the superior method, however we provide support for Tile in
|
||||
case Patchmatch cannot be installed or is unavailable on your computer.
|
||||
- **Tile Size**: The Tile method for Outpainting sources small portions of the
|
||||
original image and randomly place these into the areas being Outpainted. This
|
||||
value sets the size of those tiles.
|
||||
|
||||
## Hot Keys
|
||||
|
||||
The Unified Canvas is a tool that excels when you use hotkeys. You can view the
|
||||
full list of keyboard shortcuts, updated with all new features, by clicking the
|
||||
Keyboard Shortcuts icon at the top right of the InvokeAI WebUI.
|
||||