SD - Implement seed arrays for batch runs (#1690 )

* SD Scripts and UI tabs that support batch_count can now take a string containing a JSON array, or a list of integers, as their seed input. * Each batch in a run will now take the seed specified at the corresponding array index if one exists. If there is no seed at that index, the seed value will be treated as -1 and a random seed will be assigned at that position. If an integer rather than a list or json array has been, everything works as before. * UI seed input controls are now Textboxes with info lines about the seed formats allowed. * UI error handling updated to be more helpful if the seed input is invalid.
mega vicuna merge pt 2 (#1685 )
2026-04-20 03:00:34 -04:00 · 2023-07-24 19:22:34 -07:00 · 2023-07-24 12:42:20 -05:00 · 2023-07-24 10:18:03 -07:00 · 2023-07-24 08:03:46 -07:00 · 2023-07-21 21:56:27 +05:30
21 changed files with 3893 additions and 3838 deletions
--- a/.github/workflows/test-models.yml
+++ b/.github/workflows/test-models.yml
@@ -115,6 +115,7 @@ jobs:
        pytest --forked --benchmark=native --ci --ci_sha=${SHORT_SHA} --update_tank --tank_url="gs://shark_tank/nightly/" -k cpu 
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv
+        python build_tools/vicuna_testing.py

    - name: Validate Models on NVIDIA GPU
      if: matrix.suite == 'cuda'
--- a/apps/language_models/langchain/README.md
+++ b/apps/language_models/langchain/README.md
@@ -5,6 +5,7 @@
 1.) Install all the dependencies by running:
 ```shell
 pip install -r apps/language_models/langchain/langchain_requirements.txt
+sudo apt-get install -y libmagic-dev poppler-utils tesseract-ocr libtesseract-dev libreoffice
 ```

 2.) Create a folder named `user_path` in `apps/language_models/langchain/` directory.
--- a/apps/language_models/langchain/cli.py
+++ b/apps/language_models/langchain/cli.py
@@ -2,7 +2,7 @@ import copy
 import torch

 from evaluate_params import eval_func_param_names
-from gen import get_score_model, get_model, evaluate, check_locals
+from gen import Langchain
 from prompter import non_hf_types
 from utils import clear_torch_cache, NullContext, get_kwargs

@@ -87,7 +87,7 @@ def run_cli(  # for local function:
    # unique to this function:
    cli_loop=None,
 ):
-    check_locals(**locals())
+    Langchain.check_locals(**locals())

    score_model = ""  # FIXME: For now, so user doesn't have to pass
    n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0
@@ -98,16 +98,20 @@ def run_cli(  # for local function:
        from functools import partial

        # get score model
-        smodel, stokenizer, sdevice = get_score_model(
+        smodel, stokenizer, sdevice = Langchain.get_score_model(
            reward_type=True,
            **get_kwargs(
-                get_score_model, exclude_names=["reward_type"], **locals()
+                Langchain.get_score_model,
+                exclude_names=["reward_type"],
+                **locals()
            )
        )

-        model, tokenizer, device = get_model(
+        model, tokenizer, device = Langchain.get_model(
            reward_type=False,
-            **get_kwargs(get_model, exclude_names=["reward_type"], **locals())
+            **get_kwargs(
+                Langchain.get_model, exclude_names=["reward_type"], **locals()
+            )
        )
        model_dict = dict(
            base_model=base_model,
@@ -121,11 +125,11 @@ def run_cli(  # for local function:
        model_state.update(model_dict)
        my_db_state = [None]
        fun = partial(
-            evaluate,
+            Langchain.evaluate,
            model_state,
            my_db_state,
            **get_kwargs(
-                evaluate,
+                Langchain.evaluate,
                exclude_names=["model_state", "my_db_state"]
                + eval_func_param_names,
                **locals()
--- a/apps/language_models/langchain/eval.py
+++ b/apps/language_models/langchain/eval.py
@@ -7,7 +7,7 @@ import torch
 from matplotlib import pyplot as plt

 from evaluate_params import eval_func_param_names, eval_extra_columns
-from gen import get_context, get_score_model, get_model, evaluate, check_locals
+from gen import Langchain
 from prompter import Prompter
 from utils import clear_torch_cache, NullContext, get_kwargs

@@ -94,7 +94,7 @@ def run_eval(  # for local function:
    force_langchain_evaluate=None,
    model_state_none=None,
 ):
-    check_locals(**locals())
+    Langchain.check_locals(**locals())

    if eval_prompts_only_num > 0:
        np.random.seed(eval_prompts_only_seed)
@@ -144,7 +144,7 @@ def run_eval(  # for local function:
                ] = ""  # no input
                examplenew[
                    eval_func_param_names.index("context")
-                ] = get_context(chat_context, prompt_type)
+                ] = Langchain.get_context(chat_context, prompt_type)
                examples.append(examplenew)
                responses.append(output)
        else:
@@ -170,7 +170,7 @@ def run_eval(  # for local function:
                ] = ""  # no input
                examplenew[
                    eval_func_param_names.index("context")
-                ] = get_context(chat_context, prompt_type)
+                ] = Langchain.get_context(chat_context, prompt_type)
                examples.append(examplenew)
                responses.append(output)

@@ -210,18 +210,22 @@ def run_eval(  # for local function:
        from functools import partial

        # get score model
-        smodel, stokenizer, sdevice = get_score_model(
+        smodel, stokenizer, sdevice = Langchain.get_score_model(
            reward_type=True,
            **get_kwargs(
-                get_score_model, exclude_names=["reward_type"], **locals()
+                Langchain.get_score_model,
+                exclude_names=["reward_type"],
+                **locals()
            )
        )

        if not eval_as_output:
-            model, tokenizer, device = get_model(
+            model, tokenizer, device = Langchain.get_model(
                reward_type=False,
                **get_kwargs(
-                    get_model, exclude_names=["reward_type"], **locals()
+                    Langchain.get_model,
+                    exclude_names=["reward_type"],
+                    **locals()
                )
            )
            model_dict = dict(
@@ -236,11 +240,11 @@ def run_eval(  # for local function:
            model_state.update(model_dict)
            my_db_state = [None]
            fun = partial(
-                evaluate,
+                Langchain.evaluate,
                model_state,
                my_db_state,
                **get_kwargs(
-                    evaluate,
+                    Langchain.evaluate,
                    exclude_names=["model_state", "my_db_state"]
                    + eval_func_param_names,
                    **locals()
--- a/apps/language_models/langchain/gen.py
+++ b/apps/language_models/langchain/gen.py
--- a/apps/language_models/langchain/gpt_langchain.py
+++ b/apps/language_models/langchain/gpt_langchain.py
@@ -34,7 +34,7 @@ from enums import (
    LangChainMode,
 )
 from evaluate_params import gen_hyper
-from gen import get_model, SEED
+from gen import Langchain, SEED
 from prompter import non_hf_types, PromptType, Prompter
 from utils import (
    wrapped_partial,
@@ -44,7 +44,6 @@ from utils import (
    makedirs,
    get_url,
    flatten_list,
-    get_device,
    ProgressParallel,
    remove,
    hash_file,
@@ -92,6 +91,7 @@ from langchain.chains.question_answering import load_qa_chain
 from langchain.docstore.document import Document
 from langchain import PromptTemplate, HuggingFaceTextGenInference
 from langchain.vectorstores import Chroma
+from apps.stable_diffusion.src import args


 def get_db(
@@ -371,8 +371,8 @@ def get_embedding(
        # to ensure can fork without deadlock
        from langchain.embeddings import HuggingFaceEmbeddings

-        device, torch_dtype, context_class = get_device_dtype()
-        model_kwargs = dict(device=device)
+        torch_dtype, context_class = get_dtype()
+        model_kwargs = dict(device=args.device)
        if "instructor" in hf_embedding_model:
            encode_kwargs = {"normalize_embeddings": True}
            embedding = HuggingFaceInstructEmbeddings(
@@ -907,7 +907,7 @@ def get_llm(
                # model_name = 'h2oai/h2ogpt-oig-oasst1-512-6_9b'
                # model_name = 'h2oai/h2ogpt-oasst1-512-20b'
            inference_server = ""
-            model, tokenizer, device = get_model(
+            model, tokenizer, _ = Langchain.get_model(
                load_8bit=True,
                base_model=model_name,
                inference_server=inference_server,
@@ -974,17 +974,15 @@ def get_llm(
    return llm, model_name, streamer, prompt_type


-def get_device_dtype():
+def get_dtype():
    # torch.device("cuda") leads to cuda:x cuda:y mismatches for multi-GPU consistently
    import torch

-    n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0
-    device = "cpu" if n_gpus == 0 else "cuda"
    # from utils import NullContext
    # context_class = NullContext if n_gpus > 1 or n_gpus == 0 else context_class
    context_class = torch.device
-    torch_dtype = torch.float16 if device == "cuda" else torch.float32
-    return device, torch_dtype, context_class
+    torch_dtype = torch.float16 if args.device == "cuda" else torch.float32
+    return torch_dtype, context_class


 def get_wiki_data(
@@ -1715,7 +1713,7 @@ def path_to_docs(
        caption_loader
        and not isinstance(caption_loader, (bool, str))
        and caption_loader.device != "cpu"
-        or get_device() == "cuda"
+        or args.device == "cuda"
    ):
        # to avoid deadlocks, presume was preloaded and so can't fork due to cuda context
        n_jobs_image = 1
@@ -2549,15 +2547,15 @@ def _run_qa_db(
    # context stuff similar to used in evaluate()
    import torch

-    device, torch_dtype, context_class = get_device_dtype()
+    torch_dtype, context_class = get_dtype()
    with torch.no_grad():
        have_lora_weights = lora_weights not in [no_lora_str, "", None]
        context_class_cast = (
            NullContext
-            if device == "cpu" or have_lora_weights
+            if args.device == "cpu" or have_lora_weights
            else torch.autocast
        )
-        with context_class_cast(device):
+        with context_class_cast(args.device):
            answer = chain()

    if not use_context:
--- a/apps/language_models/langchain/h2oai_pipeline.py
+++ b/apps/language_models/langchain/h2oai_pipeline.py
@@ -28,18 +28,18 @@ global_precision = "fp16"
 if not args.run_docuchat_web:
    args.device = global_device
    args.precision = global_precision
+tensor_device = "cpu" if args.device == "cpu" else "cuda"


 class H2OGPTSHARKModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        model_name = "h2ogpt_falcon_7b"
-        path_str = (
-            model_name + "_" + args.precision + "_" + args.device + ".vmfb"
+        extended_model_name = (
+            model_name + "_" + args.precision + "_" + args.device
        )
-        vmfb_path = Path(path_str)
-        path_str = model_name + "_" + args.precision + ".mlir"
-        mlir_path = Path(path_str)
+        vmfb_path = Path(extended_model_name + ".vmfb")
+        mlir_path = Path(model_name + "_" + args.precision + ".mlir")
        shark_module = None

        if not vmfb_path.exists():
@@ -50,7 +50,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
                # Downloading VMFB from shark_tank
                print("Downloading vmfb from shark tank.")
                download_public_file(
-                    "gs://shark_tank/langchain/" + path_str,
+                    "gs://shark_tank/langchain/" + str(vmfb_path),
                    vmfb_path.absolute(),
                    single_file=True,
                )
@@ -61,11 +61,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
                else:
                    # Downloading MLIR from shark_tank
                    download_public_file(
-                        "gs://shark_tank/langchain/"
-                        + model_name
-                        + "_"
-                        + args.precision
-                        + ".mlir",
+                        "gs://shark_tank/langchain/" + str(mlir_path),
                        mlir_path.absolute(),
                        single_file=True,
                    )
@@ -83,16 +79,18 @@ class H2OGPTSHARKModel(torch.nn.Module):
                    mlir_dialect="linalg",
                )
                print(f"[DEBUG] generating vmfb.")
-                shark_module = _compile_module(shark_module, vmfb_path, [])
+                shark_module = _compile_module(
+                    shark_module, extended_model_name, []
+                )
                print("Saved newly generated vmfb.")

        if shark_module is None:
            if vmfb_path.exists():
                print("Compiled vmfb found. Loading it from: ", vmfb_path)
                shark_module = SharkInference(
-                    None, device=global_device, mlir_dialect="linalg"
+                    None, device=args.device, mlir_dialect="linalg"
                )
-                shark_module.load_module(vmfb_path)
+                shark_module.load_module(str(vmfb_path))
                print("Compiled vmfb loaded successfully.")
            else:
                raise ValueError("Unable to download/generate a vmfb.")
@@ -105,7 +103,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
                "forward",
                (input_ids.to(device="cpu"), attention_mask.to(device="cpu")),
            )
-        ).to(device=global_device)
+        ).to(device=tensor_device)
        return result


@@ -121,14 +119,14 @@ def pad_or_truncate_inputs(
        num_add_token = max_padding_length - inp_shape[1]
        padded_input_ids = torch.cat(
            [
-                torch.tensor([[11] * num_add_token]).to(device=global_device),
+                torch.tensor([[11] * num_add_token]).to(device=tensor_device),
                input_ids,
            ],
            dim=1,
        )
        padded_attention_mask = torch.cat(
            [
-                torch.tensor([[0] * num_add_token]).to(device=global_device),
+                torch.tensor([[0] * num_add_token]).to(device=tensor_device),
                attention_mask,
            ],
            dim=1,
@@ -331,7 +329,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
            model_inputs["input_ids"], model_inputs["attention_mask"]
        )

-        if global_precision == "fp16":
+        if args.precision == "fp16":
            outputs = outputs.to(dtype=torch.float32)
        next_token_logits = outputs

@@ -458,7 +456,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
        if isinstance(eos_token_id, int):
            eos_token_id = [eos_token_id]
        self.eos_token_id_tensor = (
-            torch.tensor(eos_token_id).to(device=global_device)
+            torch.tensor(eos_token_id).to(device=tensor_device)
            if eos_token_id is not None
            else None
        )
@@ -536,7 +534,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
        self.input_ids = torch.cat(
            [
                torch.tensor(self.truncated_input_ids)
-                .to(device=global_device)
+                .to(device=tensor_device)
                .unsqueeze(dim=0),
                self.input_ids,
            ],
@@ -615,22 +613,9 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
            **generate_kwargs,
        )
        out_b = generated_sequence.shape[0]
-        if self.framework == "pt":
-            generated_sequence = generated_sequence.reshape(
-                in_b, out_b // in_b, *generated_sequence.shape[1:]
-            )
-        elif self.framework == "tf":
-            from transformers import is_tf_available
-
-            if is_tf_available():
-                import tensorflow as tf
-
-                generated_sequence = tf.reshape(
-                    generated_sequence,
-                    (in_b, out_b // in_b, *generated_sequence.shape[1:]),
-                )
-            else:
-                raise ValueError("TF not avaialble.")
+        generated_sequence = generated_sequence.reshape(
+            in_b, out_b // in_b, *generated_sequence.shape[1:]
+        )
        return {
            "generated_sequence": generated_sequence,
            "input_ids": input_ids,
--- a/apps/language_models/langchain/langchain_requirements.txt
+++ b/apps/language_models/langchain/langchain_requirements.txt
@@ -1,11 +1,12 @@
 # for generate (gradio server) and finetune
 datasets==2.13.0
 sentencepiece==0.1.99
-gradio==3.35.2
-huggingface_hub==0.15.1
+# gradio==3.37.0
+huggingface_hub==0.16.4
 appdirs==1.4.4
 fire==0.5.0
 docutils==0.20.1
+# torch==2.0.1; sys_platform != "darwin" and platform_machine != "arm64"
 evaluate==0.4.0
 rouge_score==0.1.2
 sacrebleu==2.3.1
@@ -18,7 +19,9 @@ matplotlib==3.7.1
 loralib==0.1.1
 bitsandbytes==0.39.0
 accelerate==0.20.3
-git+https://github.com/huggingface/peft.git@0b62b4378b4ce9367932c73540349da9a41bdea8
+peft==0.4.0
+# 4.31.0+ breaks load_in_8bit=True (https://github.com/huggingface/transformers/issues/25026)
+# transformers==4.30.2
 tokenizers==0.13.3
 APScheduler==3.10.1

@@ -33,7 +36,7 @@ tensorboard==2.13.0
 neptune==1.2.0

 # for gradio client
-gradio_client==0.2.7
+gradio_client==0.2.10
 beautifulsoup4==4.12.2
 markdown==3.4.3

@@ -43,8 +46,9 @@ pytest-xdist==3.2.1
 nltk==3.8.1
 textstat==0.7.3
 # pandoc==2.3
-#pypandoc==1.11
-pypandoc_binary==1.11
+pypandoc==1.11; sys_platform == "darwin" and platform_machine == "arm64"
+pypandoc_binary==1.11; platform_machine == "x86_64"
+pypandoc_binary==1.11; sys_platform == "win32"
 openpyxl==3.1.2
 lm_dataformat==0.0.20
 bioc==2.0
@@ -104,3 +108,15 @@ pip-licenses==4.3.0

 # weaviate vector db
 weaviate-client==3.22.1
+
+gpt4all==1.0.5
+llama-cpp-python==0.1.73
+
+arxiv==1.4.8
+pymupdf==1.22.5 # AGPL license
+# extract-msg==0.41.1  # GPL3
+
+# sometimes unstructured fails, these work in those cases.  See https://github.com/h2oai/h2ogpt/issues/320
+playwright==1.36.0
+# requires Chrome binary to be in path
+selenium==4.10.0
--- a/apps/language_models/scripts/vicuna.py
+++ b/apps/language_models/scripts/vicuna.py
--- a/apps/stable_diffusion/src/utils/init.py
+++ b/apps/stable_diffusion/src/utils/init.py
@@ -28,6 +28,7 @@ from apps.stable_diffusion.src.utils.utils import (
    fetch_and_update_base_model_id,
    get_path_to_diffusers_checkpoint,
    sanitize_seed,
+    parse_seed_input,
    batch_seeds,
    get_path_stem,
    get_extended_name,
--- a/apps/stable_diffusion/src/utils/stable_args.py
+++ b/apps/stable_diffusion/src/utils/stable_args.py
@@ -66,9 +66,9 @@ p.add_argument(

 p.add_argument(
    "--seed",
-    type=int,
+    type=str,
    default=-1,
-    help="The seed to use. -1 for a random one.",
+    help="The seed or list of seeds to use. -1 for a random one.",
 )

 p.add_argument(
--- a/apps/stable_diffusion/src/utils/utils.py
+++ b/apps/stable_diffusion/src/utils/utils.py
@@ -727,7 +727,8 @@ def fetch_and_update_base_model_id(model_to_run, base_model=""):

 # Generate and return a new seed if the provided one is not in the
 # supported range (including -1)
-def sanitize_seed(seed):
+def sanitize_seed(seed: int | str):
+    seed = int(seed)
    uint32_info = np.iinfo(np.uint32)
    uint32_min, uint32_max = uint32_info.min, uint32_info.max
    if seed < uint32_min or seed >= uint32_max:
@@ -735,20 +736,48 @@ def sanitize_seed(seed):
    return seed


-# Generate a set of seeds, using as the first seed of the set,
-# optionally using it as the rng seed for subsequent seeds in the set
-def batch_seeds(seed, batch_count, repeatable=False):
-    # use the passed seed as the initial seed of the batch
-    seeds = [sanitize_seed(seed)]
+# take a seed expression in an input format and convert it to
+# a list of integers, where possible
+def parse_seed_input(seed_input: str | list | int):
+    if isinstance(seed_input, str):
+        try:
+            seed_input = json.loads(seed_input)
+        except (ValueError, TypeError):
+            seed_input = None
+
+    if isinstance(seed_input, int):
+        return [seed_input]
+
+    if isinstance(seed_input, list) and all(
+        type(seed) is int for seed in seed_input
+    ):
+        return seed_input
+
+    raise TypeError(
+        "Seed input must be an integer or an array of integers in JSON format"
+    )
+
+
+# Generate a set of seeds from an input expression for batch_count batches,
+# optionally using that input as the rng seed for any randomly generated seeds.
+def batch_seeds(
+    seed_input: str | list | int, batch_count: int, repeatable=False
+):
+    # turn the input into a list if possible
+    seeds = parse_seed_input(seed_input)
+
+    # slice or pad the list to be of batch_count length
+    seeds = seeds[:batch_count] + [-1] * (batch_count - len(seeds))

    if repeatable:
-        # use the initial seed as the rng generator seed
+        # set seed for the rng based on what we have so far
        saved_random_state = random_getstate()
-        seed_random(seed)
+        if all(seed < 0 for seed in seeds):
+            seeds[0] = sanitize_seed(seeds[0])
+        seed_random(str(seeds))

-    # generate the additional seeds
-    for i in range(1, batch_count):
-        seeds.append(sanitize_seed(-1))
+    # generate any seeds that are unspecified
+    seeds = [sanitize_seed(seed) for seed in seeds]

    if repeatable:
        # reset the rng back to normal
--- a/apps/stable_diffusion/web/ui/h2ogpt.py
+++ b/apps/stable_diffusion/web/ui/h2ogpt.py
@@ -21,129 +21,134 @@ def user(message, history):


 sharkModel = 0
-sharded_model = 0
 h2ogpt_model = 0

-past_key_values = None
-
-model_map = {
-    "codegen": "Salesforce/codegen25-7b-multi",
-    "vicuna1p3": "lmsys/vicuna-7b-v1.3",
-    "vicuna": "TheBloke/vicuna-7B-1.1-HF",
-    "StableLM": "stabilityai/stablelm-tuned-alpha-3b",
-}

 # NOTE: Each `model_name` should have its own start message
-start_message = {
-    "StableLM": (
-        "<|SYSTEM|># StableLM Tuned (Alpha version)"
-        "\n- StableLM is a helpful and harmless open-source AI language model "
-        "developed by StabilityAI."
-        "\n- StableLM is excited to be able to help the user, but will refuse "
-        "to do anything that could be considered harmful to the user."
-        "\n- StableLM is more than just an information source, StableLM is also "
-        "able to write poetry, short stories, and make jokes."
-        "\n- StableLM will refuse to participate in anything that "
-        "could harm a human."
-    ),
-    "vicuna": (
-        "A chat between a curious user and an artificial intelligence assistant. "
-        "The assistant gives helpful, detailed, and polite answers to the user's "
-        "questions.\n"
-    ),
-    "vicuna1p3": (
-        "A chat between a curious user and an artificial intelligence assistant. "
-        "The assistant gives helpful, detailed, and polite answers to the user's "
-        "questions.\n"
-    ),
-    "codegen": "",
-}
+start_message = """
+    SHARK DocuChat
+    Chat with an AI, contextualized with provided files.
+"""


-def create_prompt(model_name, history):
-    system_message = start_message[model_name]
+def create_prompt(history):
+    system_message = start_message

-    if model_name in ["StableLM", "vicuna", "vicuna1p3"]:
-        conversation = "".join(
-            [
-                "".join(["<|USER|>" + item[0], "<|ASSISTANT|>" + item[1]])
-                for item in history
-            ]
-        )
-    else:
-        conversation = "".join(
-            ["".join([item[0], item[1]]) for item in history]
-        )
+    conversation = "".join(["".join([item[0], item[1]]) for item in history])

    msg = system_message + conversation
    msg = msg.strip()
    return msg


-def chat(curr_system_message, history, model, device, precision):
+def chat(curr_system_message, history, device, precision):
    args.run_docuchat_web = True
-    global sharded_model
-    global past_key_values
    global h2ogpt_model
+    global h2ogpt_tokenizer
+    global model_state
+    global langchain
+    global userpath_selector

-    model_name, model_path = list(map(str.strip, model.split("=>")))
-    print(f"In chat for {model_name}")
+    if h2ogpt_model == 0:
+        if "cuda" in device:
+            shark_device = "cuda"
+        elif "sync" in device:
+            shark_device = "cpu"
+        elif "task" in device:
+            shark_device = "cpu"
+        elif "vulkan" in device:
+            shark_device = "vulkan"
+        else:
+            print("unrecognized device")

-    # if h2ogpt_model == 0:
-    #     if "cuda" in device:
-    #         device = "cuda"
-    #     elif "sync" in device:
-    #         device = "cpu-sync"
-    #     elif "task" in device:
-    #         device = "cpu-task"
-    #     elif "vulkan" in device:
-    #         device = "vulkan"
-    #     else:
-    #         print("unrecognized device")
+        device = "cpu" if shark_device == "cpu" else "cuda"

-    #     max_toks = 128 if model_name == "codegen" else 512
-    #     h2ogpt_model = UnshardedVicuna(
-    #         model_name,
-    #         hf_model_path=model_path,
-    #         device=device,
-    #         precision=precision,
-    #         max_num_tokens=max_toks,
-    #     )
-    # prompt = create_prompt(model_name, history)
-    # print("prompt = ", prompt)
+        args.device = shark_device
+        args.precision = precision

-    # for partial_text in h2ogpt_model.generate(prompt):
-    #     history[-1][1] = partial_text
-    #     yield history
-    output = gen.evaluate(
-        None,  # model_state
-        None,  # my_db_state
-        None,  # instruction
-        None,  # iinput
-        history,  # context
-        False,  # stream_output
-        None,  # prompt_type
-        None,  # prompt_dict
-        None,  # temperature
-        None,  # top_p
-        None,  # top_k
-        None,  # num_beams
-        None,  # max_new_tokens
-        None,  # min_new_tokens
-        None,  # early_stopping
-        None,  # max_time
-        None,  # repetition_penalty
-        None,  # num_return_sequences
-        False,  # do_sample
-        False,  # chat
-        None,  # instruction_nochat
-        curr_system_message,  # iinput_nochat
-        "Disabled",  # langchain_mode
-        LangChainAction.QUERY.value,  # langchain_action
-        3,  # top_k_docs
-        True,  # chunk
-        512,  # chunk_size
-        [DocumentChoices.All_Relevant.name],  # document_choice
+        from apps.language_models.langchain.gen import Langchain
+
+        langchain = Langchain(device, precision)
+        h2ogpt_model, h2ogpt_tokenizer, _ = langchain.get_model(
+            load_4bit=True
+            if device == "cuda"
+            else False,  # load model in 4bit if device is cuda to save memory
+            load_gptq="",
+            use_safetensors=False,
+            infer_devices=True,
+            device=device,
+            base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
+            inference_server="",
+            tokenizer_base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
+            lora_weights="",
+            gpu_id=0,
+            reward_type=None,
+            local_files_only=False,
+            resume_download=True,
+            use_auth_token=False,
+            trust_remote_code=True,
+            offload_folder=None,
+            compile_model=False,
+            verbose=False,
+        )
+        model_state = dict(
+            model=h2ogpt_model,
+            tokenizer=h2ogpt_tokenizer,
+            device=device,
+            base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
+            tokenizer_base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
+            lora_weights="",
+            inference_server="",
+            prompt_type=None,
+            prompt_dict=None,
+        )
+
+    prompt = create_prompt(history)
+    output = langchain.evaluate(
+        model_state=model_state,
+        my_db_state=None,
+        instruction=prompt,
+        iinput="",
+        context="",
+        stream_output=True,
+        prompt_type="prompt_answer",
+        prompt_dict={
+            "promptA": "",
+            "promptB": "",
+            "PreInstruct": "<|prompt|>",
+            "PreInput": None,
+            "PreResponse": "<|answer|>",
+            "terminate_response": [
+                "<|prompt|>",
+                "<|answer|>",
+                "<|endoftext|>",
+            ],
+            "chat_sep": "<|endoftext|>",
+            "chat_turn_sep": "<|endoftext|>",
+            "humanstr": "<|prompt|>",
+            "botstr": "<|answer|>",
+            "generates_leading_space": False,
+        },
+        temperature=0.1,
+        top_p=0.75,
+        top_k=40,
+        num_beams=1,
+        max_new_tokens=256,
+        min_new_tokens=0,
+        early_stopping=False,
+        max_time=180,
+        repetition_penalty=1.07,
+        num_return_sequences=1,
+        do_sample=False,
+        chat=True,
+        instruction_nochat=prompt,
+        iinput_nochat="",
+        langchain_mode="UserData",
+        langchain_action=LangChainAction.QUERY.value,
+        top_k_docs=3,
+        chunk=True,
+        chunk_size=512,
+        document_choice=[DocumentChoices.All_Relevant.name],
        concurrency_count=1,
        memory_restriction_level=2,
        raise_generate_gpu_exceptions=False,
@@ -154,9 +159,13 @@ def chat(curr_system_message, history, model, device, precision):
        db_type="chroma",
        n_jobs=-1,
        first_para=False,
+        max_max_time=60 * 2,
+        model_state0=model_state,
+        model_lock=True,
+        user_path=userpath_selector.value,
    )
    for partial_text in output:
-        history[-1][1] = partial_text
+        history[-1][1] = partial_text["response"]
        yield history

    return history
@@ -164,14 +173,6 @@ def chat(curr_system_message, history, model, device, precision):

 with gr.Blocks(title="H2OGPT") as h2ogpt_web:
    with gr.Row():
-        model_choices = list(
-            map(lambda x: f"{x[0]: <10} => {x[1]}", model_map.items())
-        )
-        model = gr.Dropdown(
-            label="Select Model",
-            value=model_choices[0],
-            choices=model_choices,
-        )
        supported_devices = available_devices
        enabled = len(supported_devices) > 0
        # show cpu-task device first in list for chatbot
@@ -197,6 +198,14 @@ with gr.Blocks(title="H2OGPT") as h2ogpt_web:
            ],
            visible=True,
        )
+        userpath_selector = gr.Textbox(
+            label="Document Directory",
+            value=str(
+                os.path.abspath("apps/language_models/langchain/user_path/")
+            ),
+            interactive=True,
+            container=True,
+        )
    chatbot = gr.Chatbot(height=500)
    with gr.Row():
        with gr.Column():
@@ -220,7 +229,7 @@ with gr.Blocks(title="H2OGPT") as h2ogpt_web:
        fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False
    ).then(
        fn=chat,
-        inputs=[system_msg, chatbot, model, device, precision],
+        inputs=[system_msg, chatbot, device, precision],
        outputs=[chatbot],
        queue=True,
    )
@@ -228,7 +237,7 @@ with gr.Blocks(title="H2OGPT") as h2ogpt_web:
        fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False
    ).then(
        fn=chat,
-        inputs=[system_msg, chatbot, model, device, precision],
+        inputs=[system_msg, chatbot, device, precision],
        outputs=[chatbot],
        queue=True,
    )
--- a/apps/stable_diffusion/web/ui/img2img_ui.py
+++ b/apps/stable_diffusion/web/ui/img2img_ui.py
@@ -50,7 +50,7 @@ def img2img_inf(
    steps: int,
    strength: float,
    guidance_scale: float,
-    seed: int,
+    seed: str | int,
    batch_count: int,
    batch_size: int,
    scheduler: str,
@@ -230,10 +230,12 @@ def img2img_inf(
    start_time = time.time()
    global_obj.get_sd_obj().log = ""
    generated_imgs = []
-    seeds = []
-    seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
    extra_info = {"STRENGTH": strength}
    text_output = ""
+    try:
+        seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
+    except TypeError as error:
+        raise gr.Error(str(error)) from None

    for current_batch in range(batch_count):
        out_imgs = global_obj.get_sd_obj().generate_images(
@@ -617,8 +619,10 @@ with gr.Blocks(title="Image-to-Image") as img2img_web:
                            visible=False,
                        )
                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
+                    seed = gr.Textbox(
+                        value=args.seed,
+                        label="Seed",
+                        info="An integer or a JSON list of integers, -1 for random",
                    )
                    device = gr.Dropdown(
                        elem_id="device",
--- a/apps/stable_diffusion/web/ui/inpaint_ui.py
+++ b/apps/stable_diffusion/web/ui/inpaint_ui.py
@@ -49,7 +49,7 @@ def inpaint_inf(
    inpaint_full_res_padding: int,
    steps: int,
    guidance_scale: float,
-    seed: int,
+    seed: str | int,
    batch_count: int,
    batch_size: int,
    scheduler: str,
@@ -181,10 +181,13 @@ def inpaint_inf(
    start_time = time.time()
    global_obj.get_sd_obj().log = ""
    generated_imgs = []
-    seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
    image = image_dict["image"]
    mask_image = image_dict["mask"]
    text_output = ""
+    try:
+        seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
+    except TypeError as error:
+        raise gr.Error(str(error)) from None

    for current_batch in range(batch_count):
        out_imgs = global_obj.get_sd_obj().generate_images(
@@ -514,8 +517,10 @@ with gr.Blocks(title="Inpainting") as inpaint_web:
                            visible=False,
                        )
                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
+                    seed = gr.Textbox(
+                        value=args.seed,
+                        label="Seed",
+                        info="An integer or a JSON list of integers, -1 for random",
                    )
                    device = gr.Dropdown(
                        elem_id="device",
--- a/apps/stable_diffusion/web/ui/lora_train_ui.py
+++ b/apps/stable_diffusion/web/ui/lora_train_ui.py
@@ -3,7 +3,7 @@ import os
 import gradio as gr
 from PIL import Image
 from apps.stable_diffusion.scripts import lora_train
-from apps.stable_diffusion.src import prompt_examples, args
+from apps.stable_diffusion.src import prompt_examples, args, utils
 from apps.stable_diffusion.web.ui.utils import (
    available_devices,
    nodlogo_loc,
@@ -168,7 +168,9 @@ with gr.Blocks(title="Lora Training") as lora_train_web:
                        stop_batch = gr.Button("Stop Batch")
                with gr.Row():
                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
+                        value=utils.parse_seed_input(args.seed)[0],
+                        precision=0,
+                        label="Seed",
                    )
                    device = gr.Dropdown(
                        elem_id="device",
--- a/apps/stable_diffusion/web/ui/outpaint_ui.py
+++ b/apps/stable_diffusion/web/ui/outpaint_ui.py
@@ -49,7 +49,7 @@ def outpaint_inf(
    width: int,
    steps: int,
    guidance_scale: float,
-    seed: int,
+    seed: str,
    batch_count: int,
    batch_size: int,
    scheduler: str,
@@ -178,7 +178,10 @@ def outpaint_inf(
    start_time = time.time()
    global_obj.get_sd_obj().log = ""
    generated_imgs = []
-    seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
+    try:
+        seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
+    except TypeError as error:
+        raise gr.Error(str(error)) from None

    left = True if "left" in directions else False
    right = True if "right" in directions else False
@@ -542,8 +545,10 @@ with gr.Blocks(title="Outpainting") as outpaint_web:
                            visible=False,
                        )
                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
+                    seed = gr.Textbox(
+                        value=args.seed,
+                        label="Seed",
+                        info="An integer or a JSON list of integers, -1 for random",
                    )
                    device = gr.Dropdown(
                        elem_id="device",
--- a/apps/stable_diffusion/web/ui/stablelm_ui.py
+++ b/apps/stable_diffusion/web/ui/stablelm_ui.py
@@ -284,6 +284,13 @@ def llm_chat_api(InputData: dict):
    }


+def view_json_file(file_obj):
+    content = ""
+    with open(file_obj.name, "r") as fopen:
+        content = fopen.read()
+    return content
+
+
 with gr.Blocks(title="Chatbot") as stablelm_chat:
    with gr.Row():
        model_choices = list(
@@ -319,6 +326,14 @@ with gr.Blocks(title="Chatbot") as stablelm_chat:
            ],
            visible=True,
        )
+    with gr.Row():
+        with gr.Group():
+            config_file = gr.File(label="Upload sharding configuration")
+            json_view_button = gr.Button("View as JSON")
+        json_view = gr.JSON()
+        json_view_button.click(
+            fn=view_json_file, inputs=[config_file], outputs=[json_view]
+        )
    chatbot = gr.Chatbot(height=500)
    with gr.Row():
        with gr.Column():
--- a/apps/stable_diffusion/web/ui/txt2img_ui.py
+++ b/apps/stable_diffusion/web/ui/txt2img_ui.py
@@ -46,7 +46,7 @@ def txt2img_inf(
    width: int,
    steps: int,
    guidance_scale: float,
-    seed: int,
+    seed: str | int,
    batch_count: int,
    batch_size: int,
    scheduler: str,
@@ -178,8 +178,11 @@ def txt2img_inf(
    start_time = time.time()
    global_obj.get_sd_obj().log = ""
    generated_imgs = []
-    seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
    text_output = ""
+    try:
+        seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
+    except TypeError as error:
+        raise gr.Error(str(error)) from None

    for current_batch in range(batch_count):
        out_imgs = global_obj.get_sd_obj().generate_images(
@@ -481,8 +484,10 @@ with gr.Blocks(title="Text-to-Image") as txt2img_web:
                            label="Repeatable Seeds",
                        )
                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
+                    seed = gr.Textbox(
+                        value=args.seed,
+                        label="Seed",
+                        info="An integer or a JSON list of integers, -1 for random",
                    )
                    device = gr.Dropdown(
                        elem_id="device",
--- a/apps/stable_diffusion/web/ui/upscaler_ui.py
+++ b/apps/stable_diffusion/web/ui/upscaler_ui.py
@@ -42,7 +42,7 @@ def upscaler_inf(
    steps: int,
    noise_level: int,
    guidance_scale: float,
-    seed: int,
+    seed: str,
    batch_count: int,
    batch_size: int,
    scheduler: str,
@@ -177,8 +177,11 @@ def upscaler_inf(
    start_time = time.time()
    global_obj.get_sd_obj().log = ""
    generated_imgs = []
-    seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
    extra_info = {"NOISE LEVEL": noise_level}
+    try:
+        seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
+    except TypeError as error:
+        raise gr.Error(str(error)) from None

    for current_batch in range(batch_count):
        low_res_img = image
@@ -534,8 +537,10 @@ with gr.Blocks(title="Upscaler") as upscaler_web:
                            visible=False,
                        )
                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
+                    seed = gr.Textbox(
+                        value=args.seed,
+                        label="Seed",
+                        info="An integer or a JSON list of integers, -1 for random",
                    )
                    device = gr.Dropdown(
                        elem_id="device",
--- a/build_tools/vicuna_testing.py
+++ b/build_tools/vicuna_testing.py
@@ -0,0 +1,14 @@
+import os
+from sys import executable
+import subprocess
+from apps.language_models.scripts import vicuna
+
+
+def test_loop():
+    precisions = ["fp16", "int8", "int4"]
+    devices = ["cpu"]
+    for precision in precisions:
+        for device in devices:
+            model = vicuna.UnshardedVicuna(device=device, precision=precision)
+            model.compile()
+            del model
Author	SHA1	Message	Date
Stefan Kapusniak	289f983f41	SD - Implement seed arrays for batch runs (#1690 ) * SD Scripts and UI tabs that support batch_count can now take a string containing a JSON array, or a list of integers, as their seed input. * Each batch in a run will now take the seed specified at the corresponding array index if one exists. If there is no seed at that index, the seed value will be treated as -1 and a random seed will be assigned at that position. If an integer rather than a list or json array has been, everything works as before. * UI seed input controls are now Textboxes with info lines about the seed formats allowed. * UI error handling updated to be more helpful if the seed input is invalid.	2023-07-24 19:22:34 -07:00
Daniel Garvey	453e46562f	mega vicuna merge pt 2 (#1685 )	2023-07-24 12:42:20 -05:00
Gaurav Shukla	5497af1f56	[config] Add support for uploading sharding config file in chatbot (#1689 ) Signed-off-by: Gaurav Shukla <gaurav@nod-labs.com>	2023-07-24 10:18:03 -07:00
Vivek Khandelwal	f3cb63fc9c	Fix Langchain multiple device isssue (#1688 )	2023-07-24 08:03:46 -07:00
Vivek Khandelwal	d7092aafaa	Fix multiple issue for Langchain This commit fixes the following issue for the Langchain: 1.) Web UI not able to fetch results. 2.) For each query model getting reloaded. 3.) SHARK module not using user provided device and precision. 4.) Create a class for main Langchain code. 5.) Misc issues	2023-07-21 21:56:27 +05:30
Vivek Khandelwal	a415f3f70e	Fix Langchain Prompt issue and add web UI support (#1682 )	2023-07-21 06:36:55 -07:00