Compare commits

..

6 Commits

Author SHA1 Message Date
Stefan Kapusniak
289f983f41 SD - Implement seed arrays for batch runs (#1690)
* SD Scripts and UI tabs that support batch_count can now take a
string containing a JSON array, or a list of integers, as their seed
input.
* Each batch in a run will now take the seed specified at the
corresponding array index if one exists. If there is no seed at
that index, the seed value will be treated as -1 and a random
seed will be assigned at that position. If an integer rather than
a list or json array has been, everything works as before.
* UI seed input controls are now Textboxes with info lines about
the seed formats allowed.
* UI error handling updated to be more helpful if the seed input is
invalid.
2023-07-24 19:22:34 -07:00
Daniel Garvey
453e46562f mega vicuna merge pt 2 (#1685) 2023-07-24 12:42:20 -05:00
Gaurav Shukla
5497af1f56 [config] Add support for uploading sharding config file in chatbot (#1689)
Signed-off-by: Gaurav Shukla <gaurav@nod-labs.com>
2023-07-24 10:18:03 -07:00
Vivek Khandelwal
f3cb63fc9c Fix Langchain multiple device isssue (#1688) 2023-07-24 08:03:46 -07:00
Vivek Khandelwal
d7092aafaa Fix multiple issue for Langchain
This commit fixes the following issue for the Langchain:
1.) Web UI not able to fetch results.
2.) For each query model getting reloaded.
3.) SHARK module not using user provided device and precision.
4.) Create a class for main Langchain code.
5.) Misc issues
2023-07-21 21:56:27 +05:30
Vivek Khandelwal
a415f3f70e Fix Langchain Prompt issue and add web UI support (#1682) 2023-07-21 06:36:55 -07:00
21 changed files with 3893 additions and 3838 deletions

View File

@@ -115,6 +115,7 @@ jobs:
pytest --forked --benchmark=native --ci --ci_sha=${SHORT_SHA} --update_tank --tank_url="gs://shark_tank/nightly/" -k cpu
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv
python build_tools/vicuna_testing.py
- name: Validate Models on NVIDIA GPU
if: matrix.suite == 'cuda'

View File

@@ -5,6 +5,7 @@
1.) Install all the dependencies by running:
```shell
pip install -r apps/language_models/langchain/langchain_requirements.txt
sudo apt-get install -y libmagic-dev poppler-utils tesseract-ocr libtesseract-dev libreoffice
```
2.) Create a folder named `user_path` in `apps/language_models/langchain/` directory.

View File

@@ -2,7 +2,7 @@ import copy
import torch
from evaluate_params import eval_func_param_names
from gen import get_score_model, get_model, evaluate, check_locals
from gen import Langchain
from prompter import non_hf_types
from utils import clear_torch_cache, NullContext, get_kwargs
@@ -87,7 +87,7 @@ def run_cli( # for local function:
# unique to this function:
cli_loop=None,
):
check_locals(**locals())
Langchain.check_locals(**locals())
score_model = "" # FIXME: For now, so user doesn't have to pass
n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0
@@ -98,16 +98,20 @@ def run_cli( # for local function:
from functools import partial
# get score model
smodel, stokenizer, sdevice = get_score_model(
smodel, stokenizer, sdevice = Langchain.get_score_model(
reward_type=True,
**get_kwargs(
get_score_model, exclude_names=["reward_type"], **locals()
Langchain.get_score_model,
exclude_names=["reward_type"],
**locals()
)
)
model, tokenizer, device = get_model(
model, tokenizer, device = Langchain.get_model(
reward_type=False,
**get_kwargs(get_model, exclude_names=["reward_type"], **locals())
**get_kwargs(
Langchain.get_model, exclude_names=["reward_type"], **locals()
)
)
model_dict = dict(
base_model=base_model,
@@ -121,11 +125,11 @@ def run_cli( # for local function:
model_state.update(model_dict)
my_db_state = [None]
fun = partial(
evaluate,
Langchain.evaluate,
model_state,
my_db_state,
**get_kwargs(
evaluate,
Langchain.evaluate,
exclude_names=["model_state", "my_db_state"]
+ eval_func_param_names,
**locals()

View File

@@ -7,7 +7,7 @@ import torch
from matplotlib import pyplot as plt
from evaluate_params import eval_func_param_names, eval_extra_columns
from gen import get_context, get_score_model, get_model, evaluate, check_locals
from gen import Langchain
from prompter import Prompter
from utils import clear_torch_cache, NullContext, get_kwargs
@@ -94,7 +94,7 @@ def run_eval( # for local function:
force_langchain_evaluate=None,
model_state_none=None,
):
check_locals(**locals())
Langchain.check_locals(**locals())
if eval_prompts_only_num > 0:
np.random.seed(eval_prompts_only_seed)
@@ -144,7 +144,7 @@ def run_eval( # for local function:
] = "" # no input
examplenew[
eval_func_param_names.index("context")
] = get_context(chat_context, prompt_type)
] = Langchain.get_context(chat_context, prompt_type)
examples.append(examplenew)
responses.append(output)
else:
@@ -170,7 +170,7 @@ def run_eval( # for local function:
] = "" # no input
examplenew[
eval_func_param_names.index("context")
] = get_context(chat_context, prompt_type)
] = Langchain.get_context(chat_context, prompt_type)
examples.append(examplenew)
responses.append(output)
@@ -210,18 +210,22 @@ def run_eval( # for local function:
from functools import partial
# get score model
smodel, stokenizer, sdevice = get_score_model(
smodel, stokenizer, sdevice = Langchain.get_score_model(
reward_type=True,
**get_kwargs(
get_score_model, exclude_names=["reward_type"], **locals()
Langchain.get_score_model,
exclude_names=["reward_type"],
**locals()
)
)
if not eval_as_output:
model, tokenizer, device = get_model(
model, tokenizer, device = Langchain.get_model(
reward_type=False,
**get_kwargs(
get_model, exclude_names=["reward_type"], **locals()
Langchain.get_model,
exclude_names=["reward_type"],
**locals()
)
)
model_dict = dict(
@@ -236,11 +240,11 @@ def run_eval( # for local function:
model_state.update(model_dict)
my_db_state = [None]
fun = partial(
evaluate,
Langchain.evaluate,
model_state,
my_db_state,
**get_kwargs(
evaluate,
Langchain.evaluate,
exclude_names=["model_state", "my_db_state"]
+ eval_func_param_names,
**locals()

File diff suppressed because it is too large Load Diff

View File

@@ -34,7 +34,7 @@ from enums import (
LangChainMode,
)
from evaluate_params import gen_hyper
from gen import get_model, SEED
from gen import Langchain, SEED
from prompter import non_hf_types, PromptType, Prompter
from utils import (
wrapped_partial,
@@ -44,7 +44,6 @@ from utils import (
makedirs,
get_url,
flatten_list,
get_device,
ProgressParallel,
remove,
hash_file,
@@ -92,6 +91,7 @@ from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document
from langchain import PromptTemplate, HuggingFaceTextGenInference
from langchain.vectorstores import Chroma
from apps.stable_diffusion.src import args
def get_db(
@@ -371,8 +371,8 @@ def get_embedding(
# to ensure can fork without deadlock
from langchain.embeddings import HuggingFaceEmbeddings
device, torch_dtype, context_class = get_device_dtype()
model_kwargs = dict(device=device)
torch_dtype, context_class = get_dtype()
model_kwargs = dict(device=args.device)
if "instructor" in hf_embedding_model:
encode_kwargs = {"normalize_embeddings": True}
embedding = HuggingFaceInstructEmbeddings(
@@ -907,7 +907,7 @@ def get_llm(
# model_name = 'h2oai/h2ogpt-oig-oasst1-512-6_9b'
# model_name = 'h2oai/h2ogpt-oasst1-512-20b'
inference_server = ""
model, tokenizer, device = get_model(
model, tokenizer, _ = Langchain.get_model(
load_8bit=True,
base_model=model_name,
inference_server=inference_server,
@@ -974,17 +974,15 @@ def get_llm(
return llm, model_name, streamer, prompt_type
def get_device_dtype():
def get_dtype():
# torch.device("cuda") leads to cuda:x cuda:y mismatches for multi-GPU consistently
import torch
n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0
device = "cpu" if n_gpus == 0 else "cuda"
# from utils import NullContext
# context_class = NullContext if n_gpus > 1 or n_gpus == 0 else context_class
context_class = torch.device
torch_dtype = torch.float16 if device == "cuda" else torch.float32
return device, torch_dtype, context_class
torch_dtype = torch.float16 if args.device == "cuda" else torch.float32
return torch_dtype, context_class
def get_wiki_data(
@@ -1715,7 +1713,7 @@ def path_to_docs(
caption_loader
and not isinstance(caption_loader, (bool, str))
and caption_loader.device != "cpu"
or get_device() == "cuda"
or args.device == "cuda"
):
# to avoid deadlocks, presume was preloaded and so can't fork due to cuda context
n_jobs_image = 1
@@ -2549,15 +2547,15 @@ def _run_qa_db(
# context stuff similar to used in evaluate()
import torch
device, torch_dtype, context_class = get_device_dtype()
torch_dtype, context_class = get_dtype()
with torch.no_grad():
have_lora_weights = lora_weights not in [no_lora_str, "", None]
context_class_cast = (
NullContext
if device == "cpu" or have_lora_weights
if args.device == "cpu" or have_lora_weights
else torch.autocast
)
with context_class_cast(device):
with context_class_cast(args.device):
answer = chain()
if not use_context:

View File

@@ -28,18 +28,18 @@ global_precision = "fp16"
if not args.run_docuchat_web:
args.device = global_device
args.precision = global_precision
tensor_device = "cpu" if args.device == "cpu" else "cuda"
class H2OGPTSHARKModel(torch.nn.Module):
def __init__(self):
super().__init__()
model_name = "h2ogpt_falcon_7b"
path_str = (
model_name + "_" + args.precision + "_" + args.device + ".vmfb"
extended_model_name = (
model_name + "_" + args.precision + "_" + args.device
)
vmfb_path = Path(path_str)
path_str = model_name + "_" + args.precision + ".mlir"
mlir_path = Path(path_str)
vmfb_path = Path(extended_model_name + ".vmfb")
mlir_path = Path(model_name + "_" + args.precision + ".mlir")
shark_module = None
if not vmfb_path.exists():
@@ -50,7 +50,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
# Downloading VMFB from shark_tank
print("Downloading vmfb from shark tank.")
download_public_file(
"gs://shark_tank/langchain/" + path_str,
"gs://shark_tank/langchain/" + str(vmfb_path),
vmfb_path.absolute(),
single_file=True,
)
@@ -61,11 +61,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
else:
# Downloading MLIR from shark_tank
download_public_file(
"gs://shark_tank/langchain/"
+ model_name
+ "_"
+ args.precision
+ ".mlir",
"gs://shark_tank/langchain/" + str(mlir_path),
mlir_path.absolute(),
single_file=True,
)
@@ -83,16 +79,18 @@ class H2OGPTSHARKModel(torch.nn.Module):
mlir_dialect="linalg",
)
print(f"[DEBUG] generating vmfb.")
shark_module = _compile_module(shark_module, vmfb_path, [])
shark_module = _compile_module(
shark_module, extended_model_name, []
)
print("Saved newly generated vmfb.")
if shark_module is None:
if vmfb_path.exists():
print("Compiled vmfb found. Loading it from: ", vmfb_path)
shark_module = SharkInference(
None, device=global_device, mlir_dialect="linalg"
None, device=args.device, mlir_dialect="linalg"
)
shark_module.load_module(vmfb_path)
shark_module.load_module(str(vmfb_path))
print("Compiled vmfb loaded successfully.")
else:
raise ValueError("Unable to download/generate a vmfb.")
@@ -105,7 +103,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
"forward",
(input_ids.to(device="cpu"), attention_mask.to(device="cpu")),
)
).to(device=global_device)
).to(device=tensor_device)
return result
@@ -121,14 +119,14 @@ def pad_or_truncate_inputs(
num_add_token = max_padding_length - inp_shape[1]
padded_input_ids = torch.cat(
[
torch.tensor([[11] * num_add_token]).to(device=global_device),
torch.tensor([[11] * num_add_token]).to(device=tensor_device),
input_ids,
],
dim=1,
)
padded_attention_mask = torch.cat(
[
torch.tensor([[0] * num_add_token]).to(device=global_device),
torch.tensor([[0] * num_add_token]).to(device=tensor_device),
attention_mask,
],
dim=1,
@@ -331,7 +329,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
model_inputs["input_ids"], model_inputs["attention_mask"]
)
if global_precision == "fp16":
if args.precision == "fp16":
outputs = outputs.to(dtype=torch.float32)
next_token_logits = outputs
@@ -458,7 +456,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
if isinstance(eos_token_id, int):
eos_token_id = [eos_token_id]
self.eos_token_id_tensor = (
torch.tensor(eos_token_id).to(device=global_device)
torch.tensor(eos_token_id).to(device=tensor_device)
if eos_token_id is not None
else None
)
@@ -536,7 +534,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
self.input_ids = torch.cat(
[
torch.tensor(self.truncated_input_ids)
.to(device=global_device)
.to(device=tensor_device)
.unsqueeze(dim=0),
self.input_ids,
],
@@ -615,22 +613,9 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
**generate_kwargs,
)
out_b = generated_sequence.shape[0]
if self.framework == "pt":
generated_sequence = generated_sequence.reshape(
in_b, out_b // in_b, *generated_sequence.shape[1:]
)
elif self.framework == "tf":
from transformers import is_tf_available
if is_tf_available():
import tensorflow as tf
generated_sequence = tf.reshape(
generated_sequence,
(in_b, out_b // in_b, *generated_sequence.shape[1:]),
)
else:
raise ValueError("TF not avaialble.")
generated_sequence = generated_sequence.reshape(
in_b, out_b // in_b, *generated_sequence.shape[1:]
)
return {
"generated_sequence": generated_sequence,
"input_ids": input_ids,

View File

@@ -1,11 +1,12 @@
# for generate (gradio server) and finetune
datasets==2.13.0
sentencepiece==0.1.99
gradio==3.35.2
huggingface_hub==0.15.1
# gradio==3.37.0
huggingface_hub==0.16.4
appdirs==1.4.4
fire==0.5.0
docutils==0.20.1
# torch==2.0.1; sys_platform != "darwin" and platform_machine != "arm64"
evaluate==0.4.0
rouge_score==0.1.2
sacrebleu==2.3.1
@@ -18,7 +19,9 @@ matplotlib==3.7.1
loralib==0.1.1
bitsandbytes==0.39.0
accelerate==0.20.3
git+https://github.com/huggingface/peft.git@0b62b4378b4ce9367932c73540349da9a41bdea8
peft==0.4.0
# 4.31.0+ breaks load_in_8bit=True (https://github.com/huggingface/transformers/issues/25026)
# transformers==4.30.2
tokenizers==0.13.3
APScheduler==3.10.1
@@ -33,7 +36,7 @@ tensorboard==2.13.0
neptune==1.2.0
# for gradio client
gradio_client==0.2.7
gradio_client==0.2.10
beautifulsoup4==4.12.2
markdown==3.4.3
@@ -43,8 +46,9 @@ pytest-xdist==3.2.1
nltk==3.8.1
textstat==0.7.3
# pandoc==2.3
#pypandoc==1.11
pypandoc_binary==1.11
pypandoc==1.11; sys_platform == "darwin" and platform_machine == "arm64"
pypandoc_binary==1.11; platform_machine == "x86_64"
pypandoc_binary==1.11; sys_platform == "win32"
openpyxl==3.1.2
lm_dataformat==0.0.20
bioc==2.0
@@ -104,3 +108,15 @@ pip-licenses==4.3.0
# weaviate vector db
weaviate-client==3.22.1
gpt4all==1.0.5
llama-cpp-python==0.1.73
arxiv==1.4.8
pymupdf==1.22.5 # AGPL license
# extract-msg==0.41.1 # GPL3
# sometimes unstructured fails, these work in those cases. See https://github.com/h2oai/h2ogpt/issues/320
playwright==1.36.0
# requires Chrome binary to be in path
selenium==4.10.0

File diff suppressed because it is too large Load Diff

View File

@@ -28,6 +28,7 @@ from apps.stable_diffusion.src.utils.utils import (
fetch_and_update_base_model_id,
get_path_to_diffusers_checkpoint,
sanitize_seed,
parse_seed_input,
batch_seeds,
get_path_stem,
get_extended_name,

View File

@@ -66,9 +66,9 @@ p.add_argument(
p.add_argument(
"--seed",
type=int,
type=str,
default=-1,
help="The seed to use. -1 for a random one.",
help="The seed or list of seeds to use. -1 for a random one.",
)
p.add_argument(

View File

@@ -727,7 +727,8 @@ def fetch_and_update_base_model_id(model_to_run, base_model=""):
# Generate and return a new seed if the provided one is not in the
# supported range (including -1)
def sanitize_seed(seed):
def sanitize_seed(seed: int | str):
seed = int(seed)
uint32_info = np.iinfo(np.uint32)
uint32_min, uint32_max = uint32_info.min, uint32_info.max
if seed < uint32_min or seed >= uint32_max:
@@ -735,20 +736,48 @@ def sanitize_seed(seed):
return seed
# Generate a set of seeds, using as the first seed of the set,
# optionally using it as the rng seed for subsequent seeds in the set
def batch_seeds(seed, batch_count, repeatable=False):
# use the passed seed as the initial seed of the batch
seeds = [sanitize_seed(seed)]
# take a seed expression in an input format and convert it to
# a list of integers, where possible
def parse_seed_input(seed_input: str | list | int):
if isinstance(seed_input, str):
try:
seed_input = json.loads(seed_input)
except (ValueError, TypeError):
seed_input = None
if isinstance(seed_input, int):
return [seed_input]
if isinstance(seed_input, list) and all(
type(seed) is int for seed in seed_input
):
return seed_input
raise TypeError(
"Seed input must be an integer or an array of integers in JSON format"
)
# Generate a set of seeds from an input expression for batch_count batches,
# optionally using that input as the rng seed for any randomly generated seeds.
def batch_seeds(
seed_input: str | list | int, batch_count: int, repeatable=False
):
# turn the input into a list if possible
seeds = parse_seed_input(seed_input)
# slice or pad the list to be of batch_count length
seeds = seeds[:batch_count] + [-1] * (batch_count - len(seeds))
if repeatable:
# use the initial seed as the rng generator seed
# set seed for the rng based on what we have so far
saved_random_state = random_getstate()
seed_random(seed)
if all(seed < 0 for seed in seeds):
seeds[0] = sanitize_seed(seeds[0])
seed_random(str(seeds))
# generate the additional seeds
for i in range(1, batch_count):
seeds.append(sanitize_seed(-1))
# generate any seeds that are unspecified
seeds = [sanitize_seed(seed) for seed in seeds]
if repeatable:
# reset the rng back to normal

View File

@@ -21,129 +21,134 @@ def user(message, history):
sharkModel = 0
sharded_model = 0
h2ogpt_model = 0
past_key_values = None
model_map = {
"codegen": "Salesforce/codegen25-7b-multi",
"vicuna1p3": "lmsys/vicuna-7b-v1.3",
"vicuna": "TheBloke/vicuna-7B-1.1-HF",
"StableLM": "stabilityai/stablelm-tuned-alpha-3b",
}
# NOTE: Each `model_name` should have its own start message
start_message = {
"StableLM": (
"<|SYSTEM|># StableLM Tuned (Alpha version)"
"\n- StableLM is a helpful and harmless open-source AI language model "
"developed by StabilityAI."
"\n- StableLM is excited to be able to help the user, but will refuse "
"to do anything that could be considered harmful to the user."
"\n- StableLM is more than just an information source, StableLM is also "
"able to write poetry, short stories, and make jokes."
"\n- StableLM will refuse to participate in anything that "
"could harm a human."
),
"vicuna": (
"A chat between a curious user and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the user's "
"questions.\n"
),
"vicuna1p3": (
"A chat between a curious user and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the user's "
"questions.\n"
),
"codegen": "",
}
start_message = """
SHARK DocuChat
Chat with an AI, contextualized with provided files.
"""
def create_prompt(model_name, history):
system_message = start_message[model_name]
def create_prompt(history):
system_message = start_message
if model_name in ["StableLM", "vicuna", "vicuna1p3"]:
conversation = "".join(
[
"".join(["<|USER|>" + item[0], "<|ASSISTANT|>" + item[1]])
for item in history
]
)
else:
conversation = "".join(
["".join([item[0], item[1]]) for item in history]
)
conversation = "".join(["".join([item[0], item[1]]) for item in history])
msg = system_message + conversation
msg = msg.strip()
return msg
def chat(curr_system_message, history, model, device, precision):
def chat(curr_system_message, history, device, precision):
args.run_docuchat_web = True
global sharded_model
global past_key_values
global h2ogpt_model
global h2ogpt_tokenizer
global model_state
global langchain
global userpath_selector
model_name, model_path = list(map(str.strip, model.split("=>")))
print(f"In chat for {model_name}")
if h2ogpt_model == 0:
if "cuda" in device:
shark_device = "cuda"
elif "sync" in device:
shark_device = "cpu"
elif "task" in device:
shark_device = "cpu"
elif "vulkan" in device:
shark_device = "vulkan"
else:
print("unrecognized device")
# if h2ogpt_model == 0:
# if "cuda" in device:
# device = "cuda"
# elif "sync" in device:
# device = "cpu-sync"
# elif "task" in device:
# device = "cpu-task"
# elif "vulkan" in device:
# device = "vulkan"
# else:
# print("unrecognized device")
device = "cpu" if shark_device == "cpu" else "cuda"
# max_toks = 128 if model_name == "codegen" else 512
# h2ogpt_model = UnshardedVicuna(
# model_name,
# hf_model_path=model_path,
# device=device,
# precision=precision,
# max_num_tokens=max_toks,
# )
# prompt = create_prompt(model_name, history)
# print("prompt = ", prompt)
args.device = shark_device
args.precision = precision
# for partial_text in h2ogpt_model.generate(prompt):
# history[-1][1] = partial_text
# yield history
output = gen.evaluate(
None, # model_state
None, # my_db_state
None, # instruction
None, # iinput
history, # context
False, # stream_output
None, # prompt_type
None, # prompt_dict
None, # temperature
None, # top_p
None, # top_k
None, # num_beams
None, # max_new_tokens
None, # min_new_tokens
None, # early_stopping
None, # max_time
None, # repetition_penalty
None, # num_return_sequences
False, # do_sample
False, # chat
None, # instruction_nochat
curr_system_message, # iinput_nochat
"Disabled", # langchain_mode
LangChainAction.QUERY.value, # langchain_action
3, # top_k_docs
True, # chunk
512, # chunk_size
[DocumentChoices.All_Relevant.name], # document_choice
from apps.language_models.langchain.gen import Langchain
langchain = Langchain(device, precision)
h2ogpt_model, h2ogpt_tokenizer, _ = langchain.get_model(
load_4bit=True
if device == "cuda"
else False, # load model in 4bit if device is cuda to save memory
load_gptq="",
use_safetensors=False,
infer_devices=True,
device=device,
base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
inference_server="",
tokenizer_base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
lora_weights="",
gpu_id=0,
reward_type=None,
local_files_only=False,
resume_download=True,
use_auth_token=False,
trust_remote_code=True,
offload_folder=None,
compile_model=False,
verbose=False,
)
model_state = dict(
model=h2ogpt_model,
tokenizer=h2ogpt_tokenizer,
device=device,
base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
tokenizer_base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
lora_weights="",
inference_server="",
prompt_type=None,
prompt_dict=None,
)
prompt = create_prompt(history)
output = langchain.evaluate(
model_state=model_state,
my_db_state=None,
instruction=prompt,
iinput="",
context="",
stream_output=True,
prompt_type="prompt_answer",
prompt_dict={
"promptA": "",
"promptB": "",
"PreInstruct": "<|prompt|>",
"PreInput": None,
"PreResponse": "<|answer|>",
"terminate_response": [
"<|prompt|>",
"<|answer|>",
"<|endoftext|>",
],
"chat_sep": "<|endoftext|>",
"chat_turn_sep": "<|endoftext|>",
"humanstr": "<|prompt|>",
"botstr": "<|answer|>",
"generates_leading_space": False,
},
temperature=0.1,
top_p=0.75,
top_k=40,
num_beams=1,
max_new_tokens=256,
min_new_tokens=0,
early_stopping=False,
max_time=180,
repetition_penalty=1.07,
num_return_sequences=1,
do_sample=False,
chat=True,
instruction_nochat=prompt,
iinput_nochat="",
langchain_mode="UserData",
langchain_action=LangChainAction.QUERY.value,
top_k_docs=3,
chunk=True,
chunk_size=512,
document_choice=[DocumentChoices.All_Relevant.name],
concurrency_count=1,
memory_restriction_level=2,
raise_generate_gpu_exceptions=False,
@@ -154,9 +159,13 @@ def chat(curr_system_message, history, model, device, precision):
db_type="chroma",
n_jobs=-1,
first_para=False,
max_max_time=60 * 2,
model_state0=model_state,
model_lock=True,
user_path=userpath_selector.value,
)
for partial_text in output:
history[-1][1] = partial_text
history[-1][1] = partial_text["response"]
yield history
return history
@@ -164,14 +173,6 @@ def chat(curr_system_message, history, model, device, precision):
with gr.Blocks(title="H2OGPT") as h2ogpt_web:
with gr.Row():
model_choices = list(
map(lambda x: f"{x[0]: <10} => {x[1]}", model_map.items())
)
model = gr.Dropdown(
label="Select Model",
value=model_choices[0],
choices=model_choices,
)
supported_devices = available_devices
enabled = len(supported_devices) > 0
# show cpu-task device first in list for chatbot
@@ -197,6 +198,14 @@ with gr.Blocks(title="H2OGPT") as h2ogpt_web:
],
visible=True,
)
userpath_selector = gr.Textbox(
label="Document Directory",
value=str(
os.path.abspath("apps/language_models/langchain/user_path/")
),
interactive=True,
container=True,
)
chatbot = gr.Chatbot(height=500)
with gr.Row():
with gr.Column():
@@ -220,7 +229,7 @@ with gr.Blocks(title="H2OGPT") as h2ogpt_web:
fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False
).then(
fn=chat,
inputs=[system_msg, chatbot, model, device, precision],
inputs=[system_msg, chatbot, device, precision],
outputs=[chatbot],
queue=True,
)
@@ -228,7 +237,7 @@ with gr.Blocks(title="H2OGPT") as h2ogpt_web:
fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False
).then(
fn=chat,
inputs=[system_msg, chatbot, model, device, precision],
inputs=[system_msg, chatbot, device, precision],
outputs=[chatbot],
queue=True,
)

View File

@@ -50,7 +50,7 @@ def img2img_inf(
steps: int,
strength: float,
guidance_scale: float,
seed: int,
seed: str | int,
batch_count: int,
batch_size: int,
scheduler: str,
@@ -230,10 +230,12 @@ def img2img_inf(
start_time = time.time()
global_obj.get_sd_obj().log = ""
generated_imgs = []
seeds = []
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
extra_info = {"STRENGTH": strength}
text_output = ""
try:
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
except TypeError as error:
raise gr.Error(str(error)) from None
for current_batch in range(batch_count):
out_imgs = global_obj.get_sd_obj().generate_images(
@@ -617,8 +619,10 @@ with gr.Blocks(title="Image-to-Image") as img2img_web:
visible=False,
)
with gr.Row():
seed = gr.Number(
value=args.seed, precision=0, label="Seed"
seed = gr.Textbox(
value=args.seed,
label="Seed",
info="An integer or a JSON list of integers, -1 for random",
)
device = gr.Dropdown(
elem_id="device",

View File

@@ -49,7 +49,7 @@ def inpaint_inf(
inpaint_full_res_padding: int,
steps: int,
guidance_scale: float,
seed: int,
seed: str | int,
batch_count: int,
batch_size: int,
scheduler: str,
@@ -181,10 +181,13 @@ def inpaint_inf(
start_time = time.time()
global_obj.get_sd_obj().log = ""
generated_imgs = []
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
image = image_dict["image"]
mask_image = image_dict["mask"]
text_output = ""
try:
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
except TypeError as error:
raise gr.Error(str(error)) from None
for current_batch in range(batch_count):
out_imgs = global_obj.get_sd_obj().generate_images(
@@ -514,8 +517,10 @@ with gr.Blocks(title="Inpainting") as inpaint_web:
visible=False,
)
with gr.Row():
seed = gr.Number(
value=args.seed, precision=0, label="Seed"
seed = gr.Textbox(
value=args.seed,
label="Seed",
info="An integer or a JSON list of integers, -1 for random",
)
device = gr.Dropdown(
elem_id="device",

View File

@@ -3,7 +3,7 @@ import os
import gradio as gr
from PIL import Image
from apps.stable_diffusion.scripts import lora_train
from apps.stable_diffusion.src import prompt_examples, args
from apps.stable_diffusion.src import prompt_examples, args, utils
from apps.stable_diffusion.web.ui.utils import (
available_devices,
nodlogo_loc,
@@ -168,7 +168,9 @@ with gr.Blocks(title="Lora Training") as lora_train_web:
stop_batch = gr.Button("Stop Batch")
with gr.Row():
seed = gr.Number(
value=args.seed, precision=0, label="Seed"
value=utils.parse_seed_input(args.seed)[0],
precision=0,
label="Seed",
)
device = gr.Dropdown(
elem_id="device",

View File

@@ -49,7 +49,7 @@ def outpaint_inf(
width: int,
steps: int,
guidance_scale: float,
seed: int,
seed: str,
batch_count: int,
batch_size: int,
scheduler: str,
@@ -178,7 +178,10 @@ def outpaint_inf(
start_time = time.time()
global_obj.get_sd_obj().log = ""
generated_imgs = []
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
try:
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
except TypeError as error:
raise gr.Error(str(error)) from None
left = True if "left" in directions else False
right = True if "right" in directions else False
@@ -542,8 +545,10 @@ with gr.Blocks(title="Outpainting") as outpaint_web:
visible=False,
)
with gr.Row():
seed = gr.Number(
value=args.seed, precision=0, label="Seed"
seed = gr.Textbox(
value=args.seed,
label="Seed",
info="An integer or a JSON list of integers, -1 for random",
)
device = gr.Dropdown(
elem_id="device",

View File

@@ -284,6 +284,13 @@ def llm_chat_api(InputData: dict):
}
def view_json_file(file_obj):
content = ""
with open(file_obj.name, "r") as fopen:
content = fopen.read()
return content
with gr.Blocks(title="Chatbot") as stablelm_chat:
with gr.Row():
model_choices = list(
@@ -319,6 +326,14 @@ with gr.Blocks(title="Chatbot") as stablelm_chat:
],
visible=True,
)
with gr.Row():
with gr.Group():
config_file = gr.File(label="Upload sharding configuration")
json_view_button = gr.Button("View as JSON")
json_view = gr.JSON()
json_view_button.click(
fn=view_json_file, inputs=[config_file], outputs=[json_view]
)
chatbot = gr.Chatbot(height=500)
with gr.Row():
with gr.Column():

View File

@@ -46,7 +46,7 @@ def txt2img_inf(
width: int,
steps: int,
guidance_scale: float,
seed: int,
seed: str | int,
batch_count: int,
batch_size: int,
scheduler: str,
@@ -178,8 +178,11 @@ def txt2img_inf(
start_time = time.time()
global_obj.get_sd_obj().log = ""
generated_imgs = []
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
text_output = ""
try:
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
except TypeError as error:
raise gr.Error(str(error)) from None
for current_batch in range(batch_count):
out_imgs = global_obj.get_sd_obj().generate_images(
@@ -481,8 +484,10 @@ with gr.Blocks(title="Text-to-Image") as txt2img_web:
label="Repeatable Seeds",
)
with gr.Row():
seed = gr.Number(
value=args.seed, precision=0, label="Seed"
seed = gr.Textbox(
value=args.seed,
label="Seed",
info="An integer or a JSON list of integers, -1 for random",
)
device = gr.Dropdown(
elem_id="device",

View File

@@ -42,7 +42,7 @@ def upscaler_inf(
steps: int,
noise_level: int,
guidance_scale: float,
seed: int,
seed: str,
batch_count: int,
batch_size: int,
scheduler: str,
@@ -177,8 +177,11 @@ def upscaler_inf(
start_time = time.time()
global_obj.get_sd_obj().log = ""
generated_imgs = []
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
extra_info = {"NOISE LEVEL": noise_level}
try:
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
except TypeError as error:
raise gr.Error(str(error)) from None
for current_batch in range(batch_count):
low_res_img = image
@@ -534,8 +537,10 @@ with gr.Blocks(title="Upscaler") as upscaler_web:
visible=False,
)
with gr.Row():
seed = gr.Number(
value=args.seed, precision=0, label="Seed"
seed = gr.Textbox(
value=args.seed,
label="Seed",
info="An integer or a JSON list of integers, -1 for random",
)
device = gr.Dropdown(
elem_id="device",

View File

@@ -0,0 +1,14 @@
import os
from sys import executable
import subprocess
from apps.language_models.scripts import vicuna
def test_loop():
precisions = ["fp16", "int8", "int4"]
devices = ["cpu"]
for precision in precisions:
for device in devices:
model = vicuna.UnshardedVicuna(device=device, precision=precision)
model.compile()
del model