From 45aa770cd165b9503775226f2f7b83da87c971cf Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sun, 5 Mar 2023 01:52:28 -0500
Subject: [PATCH] implemented multiprocessing across multiple GPUs

---
 ldm/generate.py            |   2 +
 ldm/invoke/CLI.py          |  28 ++++----
 ldm/invoke/args.py         |   7 +-
 ldm/invoke/pngwriter.py    |   2 -
 scripts/dynamic_prompts.py | 128 ++++++++++++++++++++++++++++++-------
 5 files changed, 128 insertions(+), 39 deletions(-)

diff --git a/ldm/generate.py b/ldm/generate.py
index 413a1e25cb..c17451a69e 100644
--- a/ldm/generate.py
+++ b/ldm/generate.py
@@ -200,6 +200,8 @@ class Generate:
         # it wasn't actually doing anything. This logic could be reinstated.
         self.device = torch.device(choose_torch_device())
         print(f">> Using device_type {self.device.type}")
+        if self.device.type == 'cuda':
+            print(f">> CUDA device '{torch.cuda.get_device_name(torch.cuda.current_device())}' (GPU {os.environ.get('CUDA_VISIBLE_DEVICES') or 0})")
         if full_precision:
             if self.precision != "auto":
                 raise ValueError("Remove --full_precision / -F if using --precision")
diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py
index b755eafed4..302bbe18ca 100644
--- a/ldm/invoke/CLI.py
+++ b/ldm/invoke/CLI.py
@@ -389,6 +389,7 @@ def main_loop(gen, opt):
                         prior_variations,
                         postprocessed,
                         first_seed,
+                        gen.model_name,
                     )
                     path = file_writer.save_image_and_prompt_to_png(
                         image=image,
@@ -402,6 +403,7 @@ def main_loop(gen, opt):
                                 else first_seed
                             ],
                             model_hash=gen.model_hash,
+                            model_id=gen.model_name,
                         ),
                         name=filename,
                         compress_level=opt.png_compression,
@@ -941,13 +943,14 @@ def add_postprocessing_to_metadata(opt, original_file, new_file, tool, command):
 
 
 def prepare_image_metadata(
-    opt,
-    prefix,
-    seed,
-    operation="generate",
-    prior_variations=[],
-    postprocessed=False,
-    first_seed=None,
+        opt,
+        prefix,
+        seed,
+        operation="generate",
+        prior_variations=[],
+        postprocessed=False,
+        first_seed=None,
+        model_id='unknown',
 ):
     if postprocessed and opt.save_original:
         filename = choose_postprocess_name(opt, prefix, seed)
@@ -955,7 +958,9 @@ def prepare_image_metadata(
         wildcards = dict(opt.__dict__)
         wildcards["prefix"] = prefix
         wildcards["seed"] = seed
+        wildcards["model_id"] = model_id
         try:
+            print(f'DEBUG: fnformat={opt.fnformat}')
             filename = opt.fnformat.format(**wildcards)
         except KeyError as e:
             print(
@@ -972,18 +977,17 @@ def prepare_image_metadata(
         first_seed = first_seed or seed
         this_variation = [[seed, opt.variation_amount]]
         opt.with_variations = prior_variations + this_variation
-        formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed)
+        formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed,model_id=model_id)
     elif len(prior_variations) > 0:
-        formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed)
+        formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed,model_id=model_id)
     elif operation == "postprocess":
         formatted_dream_prompt = "!fix " + opt.dream_prompt_str(
-            seed=seed, prompt=opt.input_file_path
+            seed=seed, prompt=opt.input_file_path, model_id=model_id,
         )
     else:
-        formatted_dream_prompt = opt.dream_prompt_str(seed=seed)
+        formatted_dream_prompt = opt.dream_prompt_str(seed=seed,model_id=model_id)
     return filename, formatted_dream_prompt
 
-
 def choose_postprocess_name(opt, prefix, seed) -> str:
     match = re.search("postprocess:(\w+)", opt.last_operation)
     if match:
diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py
index b23238cf09..d813336c4f 100644
--- a/ldm/invoke/args.py
+++ b/ldm/invoke/args.py
@@ -333,7 +333,7 @@ class Args(object):
             switches.append(f'-V {formatted_variations}')
         if 'variations' in a and len(a['variations'])>0:
             switches.append(f'-V {a["variations"]}')
-        return ' '.join(switches)
+        return ' '.join(switches) + f' # model_id={kwargs.get("model_id","unknown model")}'
 
     def __getattribute__(self,name):
         '''
@@ -878,7 +878,7 @@ class Args(object):
         )
         render_group.add_argument(
             '--fnformat',
-            default='{prefix}.{seed}.png',
+            default=None,
             type=str,
             help='Overwrite the filename format. You can use any argument as wildcard enclosed in curly braces. Default is {prefix}.{seed}.png',
         )
@@ -1155,6 +1155,7 @@ def format_metadata(**kwargs):
 def metadata_dumps(opt,
                    seeds=[],
                    model_hash=None,
+                   model_id=None,
                    postprocessing=None):
     '''
     Given an Args object, returns a dict containing the keys and
@@ -1167,7 +1168,7 @@ def metadata_dumps(opt,
     # top-level metadata minus `image` or `images`
     metadata = {
         'model'       : 'stable diffusion',
-        'model_id'    : opt.model,
+        'model_id'    : model_id or opt.model,
         'model_hash'  : model_hash,
         'app_id'      : ldm.invoke.__app_id__,
         'app_version' : ldm.invoke.__version__,
diff --git a/ldm/invoke/pngwriter.py b/ldm/invoke/pngwriter.py
index 8810cefbd8..da5af82aa8 100644
--- a/ldm/invoke/pngwriter.py
+++ b/ldm/invoke/pngwriter.py
@@ -108,8 +108,6 @@ class PromptFormatter:
         switches.append(f'-H{opt.height       or t2i.height}')
         switches.append(f'-C{opt.cfg_scale    or t2i.cfg_scale}')
         switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
-# to do: put model name into the t2i object
-#        switches.append(f'--model{t2i.model_name}')
         if opt.seamless or t2i.seamless:
             switches.append('--seamless')
         if opt.init_img:
diff --git a/scripts/dynamic_prompts.py b/scripts/dynamic_prompts.py
index e63264f3a7..7edd54e622 100755
--- a/scripts/dynamic_prompts.py
+++ b/scripts/dynamic_prompts.py
@@ -8,17 +8,19 @@ that scan across steps and other parameters.
 import argparse
 import io
 import json
+import os
 import pydoc
 import re
 import shutil
 import sys
 import numpy as np
-from dataclasses import dataclass
 from io import TextIOBase
 from itertools import product
 from pathlib import Path
+from multiprocessing import Process, Pipe
+from multiprocessing.connection import Connection
 from subprocess import PIPE, Popen
-from typing import Iterable, List, Union
+from typing import Iterable, List
 
 import yaml
 from omegaconf import OmegaConf, dictconfig, listconfig
@@ -29,6 +31,7 @@ def expand_prompts(
     run_invoke: bool = False,
     invoke_model: str = None,
     invoke_outdir: Path = None,
+    processes_per_gpu: int = 1
 ):
     """
     :param template_file: A YAML file containing templated prompts and args
@@ -42,24 +45,98 @@ def expand_prompts(
                 conf = OmegaConf.load(fh)
     else:
         conf = OmegaConf.load(template_file)
+
+    # loading here to avoid long wait for help message
+    import torch
+    torch.multiprocessing.set_start_method('spawn')
+    gpu_count = torch.cuda.device_count() if torch.cuda.is_available() else 1
+    commands = expanded_invokeai_commands(conf, run_invoke)
+    children = list()
+    
     try:
         if run_invoke:
-            invokeai_args = [shutil.which("invokeai")]
+            invokeai_args = [shutil.which("invokeai"),"--from_file","-"]
             if invoke_model:
                 invokeai_args.extend(("--model", invoke_model))
             if invoke_outdir:
-                invokeai_args.extend(("--outdir", invoke_outdir))
-            print(f"Calling invokeai with arguments {invokeai_args}", file=sys.stderr)
-            process = Popen(invokeai_args, stdin=PIPE, text=True)
-            with process.stdin as fh:
-                _do_expand(conf, file=fh)
-            process.wait()
+                invokeai_args.extend(("--outdir", os.path.expanduser(invoke_outdir)))
+
+            processes_to_launch = gpu_count * processes_per_gpu
+            print(f'>> Spawning {processes_to_launch} invokeai processes across {gpu_count} CUDA gpus', file=sys.stderr)
+            import ldm.invoke.CLI
+            parent_conn, child_conn = Pipe()
+            children = set()
+            for i in range(processes_to_launch):
+                p = Process(target=_run_invoke,
+                            args=(child_conn,
+                                  parent_conn,
+                                  invokeai_args,
+                                  i%gpu_count,
+                                  )
+                            )
+                p.start()
+                children.add(p)
+            child_conn.close()
+            sequence = 0
+            for command in commands:
+                sequence += 1
+                parent_conn.send(command+f' --fnformat=dp.{sequence:04}.{{prompt}}.png')
+            parent_conn.close()
         else:
-            _do_expand(conf)
+            for command in commands:
+                print(command)
     except KeyboardInterrupt:
-        process.kill()
+        for p in children:
+            p.terminate()
 
+class MessageToStdin(object):
+    def __init__(self, connection: Connection):
+        self.connection = connection
+        self.linebuffer = list()
 
+    def readline(self)->str:
+        try:
+            if len(self.linebuffer) == 0:
+                message = self.connection.recv()
+                self.linebuffer = message.split("\n")
+            result = self.linebuffer.pop(0)
+            return result
+        except EOFError:
+            return None
+
+class FilterStream(object):
+    def __init__(self, stream: TextIOBase, include: re.Pattern=None, exclude: re.Pattern=None):
+        self.stream = stream
+        self.include = include
+        self.exclude = exclude
+        
+    def write(self, data: str):
+        if self.include and self.include.match(data):
+            self.stream.write(data)
+            self.stream.flush()
+        elif self.exclude and not self.exclude.match(data):
+            self.stream.write(data)
+            self.stream.flush()
+
+    def flush(self):
+        self.stream.flush()
+    
+def _run_invoke(conn_in: Connection, conn_out: Connection, args: List[str], gpu: int=0):
+    print(f'>> Process {os.getpid()} running on GPU {gpu}', file=sys.stderr)
+    conn_out.close()
+    os.environ['CUDA_VISIBLE_DEVICES'] = f"{gpu}"
+    from ldm.invoke.CLI import main
+    sys.argv = args
+    sys.stdin = MessageToStdin(conn_in)
+    sys.stdout = FilterStream(sys.stdout,include=re.compile('^\[\d+\]'))
+    sys.stderr = FilterStream(sys.stdout,exclude=re.compile('^(>>|\s*\d+%|Fetching)'))
+    main()
+
+def _filter_output(stream: TextIOBase):
+    while line := stream.readline():
+        if re.match('^\[\d+\]',line):
+            print(line)
+    
 def main():
     parser = argparse.ArgumentParser(
         description=HELP,
@@ -88,12 +165,12 @@ def main():
         dest="instructions",
         action="store_true",
         default=False,
-        help=f"Print verbose instructions.",
+        help="Print verbose instructions.",
     )
     parser.add_argument(
         "--invoke",
         action="store_true",
-        help="Execute invokeai using specified optional --model and --outdir",
+        help="Execute invokeai using specified optional --model, --processes_per_gpu and --outdir",
     )
     parser.add_argument(
         "--model",
@@ -102,6 +179,12 @@ def main():
     parser.add_argument(
         "--outdir", type=Path, help="Write images and log into indicated directory"
     )
+    parser.add_argument(
+        "--processes_per_gpu",
+        type=int,
+        default=1,
+        help="When executing invokeai, how many parallel processes to execute per CUDA GPU.",
+    )
     opt = parser.parse_args()
 
     if opt.example:
@@ -125,9 +208,10 @@ def main():
         run_invoke=opt.invoke,
         invoke_model=opt.model,
         invoke_outdir=opt.outdir,
+        processes_per_gpu=opt.processes_per_gpu,
     )
 
-def _do_expand(conf: OmegaConf, file: TextIOBase = sys.stdout):
+def expanded_invokeai_commands(conf: OmegaConf, always_switch_models: bool=False)->List[List[str]]:
     models = expand_values(conf.get("model"))
     steps = expand_values(conf.get("steps")) or [30]
     cfgs = expand_values(conf.get("cfg")) or [7.5]
@@ -144,17 +228,17 @@ def _do_expand(conf: OmegaConf, file: TextIOBase = sys.stdout):
         *[models, seeds, prompts, samplers, cfgs, steps, perlin, threshold, init_img, strength, dimensions]
     )
     previous_model = None
+    
+    result = list()
     for p in cross_product:
         (model, seed, prompt, sampler, cfg, step, perlin, threshold, init_img, strength, dimensions) = tuple(p)
         (width, height) = dimensions.split("x")
-        if previous_model != model:
-            previous_model = model
-            print(f"!switch {model}", file=file)
+        switch_args = f"!switch {model}\n" if always_switch_models or previous_model != model else ''
         image_args = f'-I{init_img} -f{strength}' if init_img else ''
-        print(
-            f'"{prompt}" -S{seed} -A{sampler} -C{cfg} -s{step} {image_args} --perlin={perlin} --threshold={threshold} -W{width} -H{height}',
-            file=file,
-        )
+        command = f'{switch_args}{prompt} -S{seed} -A{sampler} -C{cfg} -s{step} {image_args} --perlin={perlin} --threshold={threshold} -W{width} -H{height}'
+        result.append(command)
+        previous_model = model
+    return result
 
 
 def expand_prompt(
@@ -215,7 +299,7 @@ def _yaml_to_json(yaml_input: str) -> str:
     return json.dumps(data, indent=2)
 
 
-HELP = f"""
+HELP = """
 This script takes a prompt template file that contains multiple
 alternative values for the prompt and its generation arguments (such
 as steps). It then expands out the prompts using all combinations of