mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-04-20 03:00:34 -04:00
Compare commits
6 Commits
20230720.8
...
20230725.8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
289f983f41 | ||
|
|
453e46562f | ||
|
|
5497af1f56 | ||
|
|
f3cb63fc9c | ||
|
|
d7092aafaa | ||
|
|
a415f3f70e |
1
.github/workflows/test-models.yml
vendored
1
.github/workflows/test-models.yml
vendored
@@ -115,6 +115,7 @@ jobs:
|
||||
pytest --forked --benchmark=native --ci --ci_sha=${SHORT_SHA} --update_tank --tank_url="gs://shark_tank/nightly/" -k cpu
|
||||
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
|
||||
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv
|
||||
python build_tools/vicuna_testing.py
|
||||
|
||||
- name: Validate Models on NVIDIA GPU
|
||||
if: matrix.suite == 'cuda'
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
1.) Install all the dependencies by running:
|
||||
```shell
|
||||
pip install -r apps/language_models/langchain/langchain_requirements.txt
|
||||
sudo apt-get install -y libmagic-dev poppler-utils tesseract-ocr libtesseract-dev libreoffice
|
||||
```
|
||||
|
||||
2.) Create a folder named `user_path` in `apps/language_models/langchain/` directory.
|
||||
|
||||
@@ -2,7 +2,7 @@ import copy
|
||||
import torch
|
||||
|
||||
from evaluate_params import eval_func_param_names
|
||||
from gen import get_score_model, get_model, evaluate, check_locals
|
||||
from gen import Langchain
|
||||
from prompter import non_hf_types
|
||||
from utils import clear_torch_cache, NullContext, get_kwargs
|
||||
|
||||
@@ -87,7 +87,7 @@ def run_cli( # for local function:
|
||||
# unique to this function:
|
||||
cli_loop=None,
|
||||
):
|
||||
check_locals(**locals())
|
||||
Langchain.check_locals(**locals())
|
||||
|
||||
score_model = "" # FIXME: For now, so user doesn't have to pass
|
||||
n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0
|
||||
@@ -98,16 +98,20 @@ def run_cli( # for local function:
|
||||
from functools import partial
|
||||
|
||||
# get score model
|
||||
smodel, stokenizer, sdevice = get_score_model(
|
||||
smodel, stokenizer, sdevice = Langchain.get_score_model(
|
||||
reward_type=True,
|
||||
**get_kwargs(
|
||||
get_score_model, exclude_names=["reward_type"], **locals()
|
||||
Langchain.get_score_model,
|
||||
exclude_names=["reward_type"],
|
||||
**locals()
|
||||
)
|
||||
)
|
||||
|
||||
model, tokenizer, device = get_model(
|
||||
model, tokenizer, device = Langchain.get_model(
|
||||
reward_type=False,
|
||||
**get_kwargs(get_model, exclude_names=["reward_type"], **locals())
|
||||
**get_kwargs(
|
||||
Langchain.get_model, exclude_names=["reward_type"], **locals()
|
||||
)
|
||||
)
|
||||
model_dict = dict(
|
||||
base_model=base_model,
|
||||
@@ -121,11 +125,11 @@ def run_cli( # for local function:
|
||||
model_state.update(model_dict)
|
||||
my_db_state = [None]
|
||||
fun = partial(
|
||||
evaluate,
|
||||
Langchain.evaluate,
|
||||
model_state,
|
||||
my_db_state,
|
||||
**get_kwargs(
|
||||
evaluate,
|
||||
Langchain.evaluate,
|
||||
exclude_names=["model_state", "my_db_state"]
|
||||
+ eval_func_param_names,
|
||||
**locals()
|
||||
|
||||
@@ -7,7 +7,7 @@ import torch
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
from evaluate_params import eval_func_param_names, eval_extra_columns
|
||||
from gen import get_context, get_score_model, get_model, evaluate, check_locals
|
||||
from gen import Langchain
|
||||
from prompter import Prompter
|
||||
from utils import clear_torch_cache, NullContext, get_kwargs
|
||||
|
||||
@@ -94,7 +94,7 @@ def run_eval( # for local function:
|
||||
force_langchain_evaluate=None,
|
||||
model_state_none=None,
|
||||
):
|
||||
check_locals(**locals())
|
||||
Langchain.check_locals(**locals())
|
||||
|
||||
if eval_prompts_only_num > 0:
|
||||
np.random.seed(eval_prompts_only_seed)
|
||||
@@ -144,7 +144,7 @@ def run_eval( # for local function:
|
||||
] = "" # no input
|
||||
examplenew[
|
||||
eval_func_param_names.index("context")
|
||||
] = get_context(chat_context, prompt_type)
|
||||
] = Langchain.get_context(chat_context, prompt_type)
|
||||
examples.append(examplenew)
|
||||
responses.append(output)
|
||||
else:
|
||||
@@ -170,7 +170,7 @@ def run_eval( # for local function:
|
||||
] = "" # no input
|
||||
examplenew[
|
||||
eval_func_param_names.index("context")
|
||||
] = get_context(chat_context, prompt_type)
|
||||
] = Langchain.get_context(chat_context, prompt_type)
|
||||
examples.append(examplenew)
|
||||
responses.append(output)
|
||||
|
||||
@@ -210,18 +210,22 @@ def run_eval( # for local function:
|
||||
from functools import partial
|
||||
|
||||
# get score model
|
||||
smodel, stokenizer, sdevice = get_score_model(
|
||||
smodel, stokenizer, sdevice = Langchain.get_score_model(
|
||||
reward_type=True,
|
||||
**get_kwargs(
|
||||
get_score_model, exclude_names=["reward_type"], **locals()
|
||||
Langchain.get_score_model,
|
||||
exclude_names=["reward_type"],
|
||||
**locals()
|
||||
)
|
||||
)
|
||||
|
||||
if not eval_as_output:
|
||||
model, tokenizer, device = get_model(
|
||||
model, tokenizer, device = Langchain.get_model(
|
||||
reward_type=False,
|
||||
**get_kwargs(
|
||||
get_model, exclude_names=["reward_type"], **locals()
|
||||
Langchain.get_model,
|
||||
exclude_names=["reward_type"],
|
||||
**locals()
|
||||
)
|
||||
)
|
||||
model_dict = dict(
|
||||
@@ -236,11 +240,11 @@ def run_eval( # for local function:
|
||||
model_state.update(model_dict)
|
||||
my_db_state = [None]
|
||||
fun = partial(
|
||||
evaluate,
|
||||
Langchain.evaluate,
|
||||
model_state,
|
||||
my_db_state,
|
||||
**get_kwargs(
|
||||
evaluate,
|
||||
Langchain.evaluate,
|
||||
exclude_names=["model_state", "my_db_state"]
|
||||
+ eval_func_param_names,
|
||||
**locals()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -34,7 +34,7 @@ from enums import (
|
||||
LangChainMode,
|
||||
)
|
||||
from evaluate_params import gen_hyper
|
||||
from gen import get_model, SEED
|
||||
from gen import Langchain, SEED
|
||||
from prompter import non_hf_types, PromptType, Prompter
|
||||
from utils import (
|
||||
wrapped_partial,
|
||||
@@ -44,7 +44,6 @@ from utils import (
|
||||
makedirs,
|
||||
get_url,
|
||||
flatten_list,
|
||||
get_device,
|
||||
ProgressParallel,
|
||||
remove,
|
||||
hash_file,
|
||||
@@ -92,6 +91,7 @@ from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.docstore.document import Document
|
||||
from langchain import PromptTemplate, HuggingFaceTextGenInference
|
||||
from langchain.vectorstores import Chroma
|
||||
from apps.stable_diffusion.src import args
|
||||
|
||||
|
||||
def get_db(
|
||||
@@ -371,8 +371,8 @@ def get_embedding(
|
||||
# to ensure can fork without deadlock
|
||||
from langchain.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
device, torch_dtype, context_class = get_device_dtype()
|
||||
model_kwargs = dict(device=device)
|
||||
torch_dtype, context_class = get_dtype()
|
||||
model_kwargs = dict(device=args.device)
|
||||
if "instructor" in hf_embedding_model:
|
||||
encode_kwargs = {"normalize_embeddings": True}
|
||||
embedding = HuggingFaceInstructEmbeddings(
|
||||
@@ -907,7 +907,7 @@ def get_llm(
|
||||
# model_name = 'h2oai/h2ogpt-oig-oasst1-512-6_9b'
|
||||
# model_name = 'h2oai/h2ogpt-oasst1-512-20b'
|
||||
inference_server = ""
|
||||
model, tokenizer, device = get_model(
|
||||
model, tokenizer, _ = Langchain.get_model(
|
||||
load_8bit=True,
|
||||
base_model=model_name,
|
||||
inference_server=inference_server,
|
||||
@@ -974,17 +974,15 @@ def get_llm(
|
||||
return llm, model_name, streamer, prompt_type
|
||||
|
||||
|
||||
def get_device_dtype():
|
||||
def get_dtype():
|
||||
# torch.device("cuda") leads to cuda:x cuda:y mismatches for multi-GPU consistently
|
||||
import torch
|
||||
|
||||
n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0
|
||||
device = "cpu" if n_gpus == 0 else "cuda"
|
||||
# from utils import NullContext
|
||||
# context_class = NullContext if n_gpus > 1 or n_gpus == 0 else context_class
|
||||
context_class = torch.device
|
||||
torch_dtype = torch.float16 if device == "cuda" else torch.float32
|
||||
return device, torch_dtype, context_class
|
||||
torch_dtype = torch.float16 if args.device == "cuda" else torch.float32
|
||||
return torch_dtype, context_class
|
||||
|
||||
|
||||
def get_wiki_data(
|
||||
@@ -1715,7 +1713,7 @@ def path_to_docs(
|
||||
caption_loader
|
||||
and not isinstance(caption_loader, (bool, str))
|
||||
and caption_loader.device != "cpu"
|
||||
or get_device() == "cuda"
|
||||
or args.device == "cuda"
|
||||
):
|
||||
# to avoid deadlocks, presume was preloaded and so can't fork due to cuda context
|
||||
n_jobs_image = 1
|
||||
@@ -2549,15 +2547,15 @@ def _run_qa_db(
|
||||
# context stuff similar to used in evaluate()
|
||||
import torch
|
||||
|
||||
device, torch_dtype, context_class = get_device_dtype()
|
||||
torch_dtype, context_class = get_dtype()
|
||||
with torch.no_grad():
|
||||
have_lora_weights = lora_weights not in [no_lora_str, "", None]
|
||||
context_class_cast = (
|
||||
NullContext
|
||||
if device == "cpu" or have_lora_weights
|
||||
if args.device == "cpu" or have_lora_weights
|
||||
else torch.autocast
|
||||
)
|
||||
with context_class_cast(device):
|
||||
with context_class_cast(args.device):
|
||||
answer = chain()
|
||||
|
||||
if not use_context:
|
||||
|
||||
@@ -28,18 +28,18 @@ global_precision = "fp16"
|
||||
if not args.run_docuchat_web:
|
||||
args.device = global_device
|
||||
args.precision = global_precision
|
||||
tensor_device = "cpu" if args.device == "cpu" else "cuda"
|
||||
|
||||
|
||||
class H2OGPTSHARKModel(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
model_name = "h2ogpt_falcon_7b"
|
||||
path_str = (
|
||||
model_name + "_" + args.precision + "_" + args.device + ".vmfb"
|
||||
extended_model_name = (
|
||||
model_name + "_" + args.precision + "_" + args.device
|
||||
)
|
||||
vmfb_path = Path(path_str)
|
||||
path_str = model_name + "_" + args.precision + ".mlir"
|
||||
mlir_path = Path(path_str)
|
||||
vmfb_path = Path(extended_model_name + ".vmfb")
|
||||
mlir_path = Path(model_name + "_" + args.precision + ".mlir")
|
||||
shark_module = None
|
||||
|
||||
if not vmfb_path.exists():
|
||||
@@ -50,7 +50,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
|
||||
# Downloading VMFB from shark_tank
|
||||
print("Downloading vmfb from shark tank.")
|
||||
download_public_file(
|
||||
"gs://shark_tank/langchain/" + path_str,
|
||||
"gs://shark_tank/langchain/" + str(vmfb_path),
|
||||
vmfb_path.absolute(),
|
||||
single_file=True,
|
||||
)
|
||||
@@ -61,11 +61,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
|
||||
else:
|
||||
# Downloading MLIR from shark_tank
|
||||
download_public_file(
|
||||
"gs://shark_tank/langchain/"
|
||||
+ model_name
|
||||
+ "_"
|
||||
+ args.precision
|
||||
+ ".mlir",
|
||||
"gs://shark_tank/langchain/" + str(mlir_path),
|
||||
mlir_path.absolute(),
|
||||
single_file=True,
|
||||
)
|
||||
@@ -83,16 +79,18 @@ class H2OGPTSHARKModel(torch.nn.Module):
|
||||
mlir_dialect="linalg",
|
||||
)
|
||||
print(f"[DEBUG] generating vmfb.")
|
||||
shark_module = _compile_module(shark_module, vmfb_path, [])
|
||||
shark_module = _compile_module(
|
||||
shark_module, extended_model_name, []
|
||||
)
|
||||
print("Saved newly generated vmfb.")
|
||||
|
||||
if shark_module is None:
|
||||
if vmfb_path.exists():
|
||||
print("Compiled vmfb found. Loading it from: ", vmfb_path)
|
||||
shark_module = SharkInference(
|
||||
None, device=global_device, mlir_dialect="linalg"
|
||||
None, device=args.device, mlir_dialect="linalg"
|
||||
)
|
||||
shark_module.load_module(vmfb_path)
|
||||
shark_module.load_module(str(vmfb_path))
|
||||
print("Compiled vmfb loaded successfully.")
|
||||
else:
|
||||
raise ValueError("Unable to download/generate a vmfb.")
|
||||
@@ -105,7 +103,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
|
||||
"forward",
|
||||
(input_ids.to(device="cpu"), attention_mask.to(device="cpu")),
|
||||
)
|
||||
).to(device=global_device)
|
||||
).to(device=tensor_device)
|
||||
return result
|
||||
|
||||
|
||||
@@ -121,14 +119,14 @@ def pad_or_truncate_inputs(
|
||||
num_add_token = max_padding_length - inp_shape[1]
|
||||
padded_input_ids = torch.cat(
|
||||
[
|
||||
torch.tensor([[11] * num_add_token]).to(device=global_device),
|
||||
torch.tensor([[11] * num_add_token]).to(device=tensor_device),
|
||||
input_ids,
|
||||
],
|
||||
dim=1,
|
||||
)
|
||||
padded_attention_mask = torch.cat(
|
||||
[
|
||||
torch.tensor([[0] * num_add_token]).to(device=global_device),
|
||||
torch.tensor([[0] * num_add_token]).to(device=tensor_device),
|
||||
attention_mask,
|
||||
],
|
||||
dim=1,
|
||||
@@ -331,7 +329,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
|
||||
model_inputs["input_ids"], model_inputs["attention_mask"]
|
||||
)
|
||||
|
||||
if global_precision == "fp16":
|
||||
if args.precision == "fp16":
|
||||
outputs = outputs.to(dtype=torch.float32)
|
||||
next_token_logits = outputs
|
||||
|
||||
@@ -458,7 +456,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
|
||||
if isinstance(eos_token_id, int):
|
||||
eos_token_id = [eos_token_id]
|
||||
self.eos_token_id_tensor = (
|
||||
torch.tensor(eos_token_id).to(device=global_device)
|
||||
torch.tensor(eos_token_id).to(device=tensor_device)
|
||||
if eos_token_id is not None
|
||||
else None
|
||||
)
|
||||
@@ -536,7 +534,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
|
||||
self.input_ids = torch.cat(
|
||||
[
|
||||
torch.tensor(self.truncated_input_ids)
|
||||
.to(device=global_device)
|
||||
.to(device=tensor_device)
|
||||
.unsqueeze(dim=0),
|
||||
self.input_ids,
|
||||
],
|
||||
@@ -615,22 +613,9 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
|
||||
**generate_kwargs,
|
||||
)
|
||||
out_b = generated_sequence.shape[0]
|
||||
if self.framework == "pt":
|
||||
generated_sequence = generated_sequence.reshape(
|
||||
in_b, out_b // in_b, *generated_sequence.shape[1:]
|
||||
)
|
||||
elif self.framework == "tf":
|
||||
from transformers import is_tf_available
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
generated_sequence = tf.reshape(
|
||||
generated_sequence,
|
||||
(in_b, out_b // in_b, *generated_sequence.shape[1:]),
|
||||
)
|
||||
else:
|
||||
raise ValueError("TF not avaialble.")
|
||||
generated_sequence = generated_sequence.reshape(
|
||||
in_b, out_b // in_b, *generated_sequence.shape[1:]
|
||||
)
|
||||
return {
|
||||
"generated_sequence": generated_sequence,
|
||||
"input_ids": input_ids,
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
# for generate (gradio server) and finetune
|
||||
datasets==2.13.0
|
||||
sentencepiece==0.1.99
|
||||
gradio==3.35.2
|
||||
huggingface_hub==0.15.1
|
||||
# gradio==3.37.0
|
||||
huggingface_hub==0.16.4
|
||||
appdirs==1.4.4
|
||||
fire==0.5.0
|
||||
docutils==0.20.1
|
||||
# torch==2.0.1; sys_platform != "darwin" and platform_machine != "arm64"
|
||||
evaluate==0.4.0
|
||||
rouge_score==0.1.2
|
||||
sacrebleu==2.3.1
|
||||
@@ -18,7 +19,9 @@ matplotlib==3.7.1
|
||||
loralib==0.1.1
|
||||
bitsandbytes==0.39.0
|
||||
accelerate==0.20.3
|
||||
git+https://github.com/huggingface/peft.git@0b62b4378b4ce9367932c73540349da9a41bdea8
|
||||
peft==0.4.0
|
||||
# 4.31.0+ breaks load_in_8bit=True (https://github.com/huggingface/transformers/issues/25026)
|
||||
# transformers==4.30.2
|
||||
tokenizers==0.13.3
|
||||
APScheduler==3.10.1
|
||||
|
||||
@@ -33,7 +36,7 @@ tensorboard==2.13.0
|
||||
neptune==1.2.0
|
||||
|
||||
# for gradio client
|
||||
gradio_client==0.2.7
|
||||
gradio_client==0.2.10
|
||||
beautifulsoup4==4.12.2
|
||||
markdown==3.4.3
|
||||
|
||||
@@ -43,8 +46,9 @@ pytest-xdist==3.2.1
|
||||
nltk==3.8.1
|
||||
textstat==0.7.3
|
||||
# pandoc==2.3
|
||||
#pypandoc==1.11
|
||||
pypandoc_binary==1.11
|
||||
pypandoc==1.11; sys_platform == "darwin" and platform_machine == "arm64"
|
||||
pypandoc_binary==1.11; platform_machine == "x86_64"
|
||||
pypandoc_binary==1.11; sys_platform == "win32"
|
||||
openpyxl==3.1.2
|
||||
lm_dataformat==0.0.20
|
||||
bioc==2.0
|
||||
@@ -104,3 +108,15 @@ pip-licenses==4.3.0
|
||||
|
||||
# weaviate vector db
|
||||
weaviate-client==3.22.1
|
||||
|
||||
gpt4all==1.0.5
|
||||
llama-cpp-python==0.1.73
|
||||
|
||||
arxiv==1.4.8
|
||||
pymupdf==1.22.5 # AGPL license
|
||||
# extract-msg==0.41.1 # GPL3
|
||||
|
||||
# sometimes unstructured fails, these work in those cases. See https://github.com/h2oai/h2ogpt/issues/320
|
||||
playwright==1.36.0
|
||||
# requires Chrome binary to be in path
|
||||
selenium==4.10.0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -28,6 +28,7 @@ from apps.stable_diffusion.src.utils.utils import (
|
||||
fetch_and_update_base_model_id,
|
||||
get_path_to_diffusers_checkpoint,
|
||||
sanitize_seed,
|
||||
parse_seed_input,
|
||||
batch_seeds,
|
||||
get_path_stem,
|
||||
get_extended_name,
|
||||
|
||||
@@ -66,9 +66,9 @@ p.add_argument(
|
||||
|
||||
p.add_argument(
|
||||
"--seed",
|
||||
type=int,
|
||||
type=str,
|
||||
default=-1,
|
||||
help="The seed to use. -1 for a random one.",
|
||||
help="The seed or list of seeds to use. -1 for a random one.",
|
||||
)
|
||||
|
||||
p.add_argument(
|
||||
|
||||
@@ -727,7 +727,8 @@ def fetch_and_update_base_model_id(model_to_run, base_model=""):
|
||||
|
||||
# Generate and return a new seed if the provided one is not in the
|
||||
# supported range (including -1)
|
||||
def sanitize_seed(seed):
|
||||
def sanitize_seed(seed: int | str):
|
||||
seed = int(seed)
|
||||
uint32_info = np.iinfo(np.uint32)
|
||||
uint32_min, uint32_max = uint32_info.min, uint32_info.max
|
||||
if seed < uint32_min or seed >= uint32_max:
|
||||
@@ -735,20 +736,48 @@ def sanitize_seed(seed):
|
||||
return seed
|
||||
|
||||
|
||||
# Generate a set of seeds, using as the first seed of the set,
|
||||
# optionally using it as the rng seed for subsequent seeds in the set
|
||||
def batch_seeds(seed, batch_count, repeatable=False):
|
||||
# use the passed seed as the initial seed of the batch
|
||||
seeds = [sanitize_seed(seed)]
|
||||
# take a seed expression in an input format and convert it to
|
||||
# a list of integers, where possible
|
||||
def parse_seed_input(seed_input: str | list | int):
|
||||
if isinstance(seed_input, str):
|
||||
try:
|
||||
seed_input = json.loads(seed_input)
|
||||
except (ValueError, TypeError):
|
||||
seed_input = None
|
||||
|
||||
if isinstance(seed_input, int):
|
||||
return [seed_input]
|
||||
|
||||
if isinstance(seed_input, list) and all(
|
||||
type(seed) is int for seed in seed_input
|
||||
):
|
||||
return seed_input
|
||||
|
||||
raise TypeError(
|
||||
"Seed input must be an integer or an array of integers in JSON format"
|
||||
)
|
||||
|
||||
|
||||
# Generate a set of seeds from an input expression for batch_count batches,
|
||||
# optionally using that input as the rng seed for any randomly generated seeds.
|
||||
def batch_seeds(
|
||||
seed_input: str | list | int, batch_count: int, repeatable=False
|
||||
):
|
||||
# turn the input into a list if possible
|
||||
seeds = parse_seed_input(seed_input)
|
||||
|
||||
# slice or pad the list to be of batch_count length
|
||||
seeds = seeds[:batch_count] + [-1] * (batch_count - len(seeds))
|
||||
|
||||
if repeatable:
|
||||
# use the initial seed as the rng generator seed
|
||||
# set seed for the rng based on what we have so far
|
||||
saved_random_state = random_getstate()
|
||||
seed_random(seed)
|
||||
if all(seed < 0 for seed in seeds):
|
||||
seeds[0] = sanitize_seed(seeds[0])
|
||||
seed_random(str(seeds))
|
||||
|
||||
# generate the additional seeds
|
||||
for i in range(1, batch_count):
|
||||
seeds.append(sanitize_seed(-1))
|
||||
# generate any seeds that are unspecified
|
||||
seeds = [sanitize_seed(seed) for seed in seeds]
|
||||
|
||||
if repeatable:
|
||||
# reset the rng back to normal
|
||||
|
||||
@@ -21,129 +21,134 @@ def user(message, history):
|
||||
|
||||
|
||||
sharkModel = 0
|
||||
sharded_model = 0
|
||||
h2ogpt_model = 0
|
||||
|
||||
past_key_values = None
|
||||
|
||||
model_map = {
|
||||
"codegen": "Salesforce/codegen25-7b-multi",
|
||||
"vicuna1p3": "lmsys/vicuna-7b-v1.3",
|
||||
"vicuna": "TheBloke/vicuna-7B-1.1-HF",
|
||||
"StableLM": "stabilityai/stablelm-tuned-alpha-3b",
|
||||
}
|
||||
|
||||
# NOTE: Each `model_name` should have its own start message
|
||||
start_message = {
|
||||
"StableLM": (
|
||||
"<|SYSTEM|># StableLM Tuned (Alpha version)"
|
||||
"\n- StableLM is a helpful and harmless open-source AI language model "
|
||||
"developed by StabilityAI."
|
||||
"\n- StableLM is excited to be able to help the user, but will refuse "
|
||||
"to do anything that could be considered harmful to the user."
|
||||
"\n- StableLM is more than just an information source, StableLM is also "
|
||||
"able to write poetry, short stories, and make jokes."
|
||||
"\n- StableLM will refuse to participate in anything that "
|
||||
"could harm a human."
|
||||
),
|
||||
"vicuna": (
|
||||
"A chat between a curious user and an artificial intelligence assistant. "
|
||||
"The assistant gives helpful, detailed, and polite answers to the user's "
|
||||
"questions.\n"
|
||||
),
|
||||
"vicuna1p3": (
|
||||
"A chat between a curious user and an artificial intelligence assistant. "
|
||||
"The assistant gives helpful, detailed, and polite answers to the user's "
|
||||
"questions.\n"
|
||||
),
|
||||
"codegen": "",
|
||||
}
|
||||
start_message = """
|
||||
SHARK DocuChat
|
||||
Chat with an AI, contextualized with provided files.
|
||||
"""
|
||||
|
||||
|
||||
def create_prompt(model_name, history):
|
||||
system_message = start_message[model_name]
|
||||
def create_prompt(history):
|
||||
system_message = start_message
|
||||
|
||||
if model_name in ["StableLM", "vicuna", "vicuna1p3"]:
|
||||
conversation = "".join(
|
||||
[
|
||||
"".join(["<|USER|>" + item[0], "<|ASSISTANT|>" + item[1]])
|
||||
for item in history
|
||||
]
|
||||
)
|
||||
else:
|
||||
conversation = "".join(
|
||||
["".join([item[0], item[1]]) for item in history]
|
||||
)
|
||||
conversation = "".join(["".join([item[0], item[1]]) for item in history])
|
||||
|
||||
msg = system_message + conversation
|
||||
msg = msg.strip()
|
||||
return msg
|
||||
|
||||
|
||||
def chat(curr_system_message, history, model, device, precision):
|
||||
def chat(curr_system_message, history, device, precision):
|
||||
args.run_docuchat_web = True
|
||||
global sharded_model
|
||||
global past_key_values
|
||||
global h2ogpt_model
|
||||
global h2ogpt_tokenizer
|
||||
global model_state
|
||||
global langchain
|
||||
global userpath_selector
|
||||
|
||||
model_name, model_path = list(map(str.strip, model.split("=>")))
|
||||
print(f"In chat for {model_name}")
|
||||
if h2ogpt_model == 0:
|
||||
if "cuda" in device:
|
||||
shark_device = "cuda"
|
||||
elif "sync" in device:
|
||||
shark_device = "cpu"
|
||||
elif "task" in device:
|
||||
shark_device = "cpu"
|
||||
elif "vulkan" in device:
|
||||
shark_device = "vulkan"
|
||||
else:
|
||||
print("unrecognized device")
|
||||
|
||||
# if h2ogpt_model == 0:
|
||||
# if "cuda" in device:
|
||||
# device = "cuda"
|
||||
# elif "sync" in device:
|
||||
# device = "cpu-sync"
|
||||
# elif "task" in device:
|
||||
# device = "cpu-task"
|
||||
# elif "vulkan" in device:
|
||||
# device = "vulkan"
|
||||
# else:
|
||||
# print("unrecognized device")
|
||||
device = "cpu" if shark_device == "cpu" else "cuda"
|
||||
|
||||
# max_toks = 128 if model_name == "codegen" else 512
|
||||
# h2ogpt_model = UnshardedVicuna(
|
||||
# model_name,
|
||||
# hf_model_path=model_path,
|
||||
# device=device,
|
||||
# precision=precision,
|
||||
# max_num_tokens=max_toks,
|
||||
# )
|
||||
# prompt = create_prompt(model_name, history)
|
||||
# print("prompt = ", prompt)
|
||||
args.device = shark_device
|
||||
args.precision = precision
|
||||
|
||||
# for partial_text in h2ogpt_model.generate(prompt):
|
||||
# history[-1][1] = partial_text
|
||||
# yield history
|
||||
output = gen.evaluate(
|
||||
None, # model_state
|
||||
None, # my_db_state
|
||||
None, # instruction
|
||||
None, # iinput
|
||||
history, # context
|
||||
False, # stream_output
|
||||
None, # prompt_type
|
||||
None, # prompt_dict
|
||||
None, # temperature
|
||||
None, # top_p
|
||||
None, # top_k
|
||||
None, # num_beams
|
||||
None, # max_new_tokens
|
||||
None, # min_new_tokens
|
||||
None, # early_stopping
|
||||
None, # max_time
|
||||
None, # repetition_penalty
|
||||
None, # num_return_sequences
|
||||
False, # do_sample
|
||||
False, # chat
|
||||
None, # instruction_nochat
|
||||
curr_system_message, # iinput_nochat
|
||||
"Disabled", # langchain_mode
|
||||
LangChainAction.QUERY.value, # langchain_action
|
||||
3, # top_k_docs
|
||||
True, # chunk
|
||||
512, # chunk_size
|
||||
[DocumentChoices.All_Relevant.name], # document_choice
|
||||
from apps.language_models.langchain.gen import Langchain
|
||||
|
||||
langchain = Langchain(device, precision)
|
||||
h2ogpt_model, h2ogpt_tokenizer, _ = langchain.get_model(
|
||||
load_4bit=True
|
||||
if device == "cuda"
|
||||
else False, # load model in 4bit if device is cuda to save memory
|
||||
load_gptq="",
|
||||
use_safetensors=False,
|
||||
infer_devices=True,
|
||||
device=device,
|
||||
base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
|
||||
inference_server="",
|
||||
tokenizer_base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
|
||||
lora_weights="",
|
||||
gpu_id=0,
|
||||
reward_type=None,
|
||||
local_files_only=False,
|
||||
resume_download=True,
|
||||
use_auth_token=False,
|
||||
trust_remote_code=True,
|
||||
offload_folder=None,
|
||||
compile_model=False,
|
||||
verbose=False,
|
||||
)
|
||||
model_state = dict(
|
||||
model=h2ogpt_model,
|
||||
tokenizer=h2ogpt_tokenizer,
|
||||
device=device,
|
||||
base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
|
||||
tokenizer_base_model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3",
|
||||
lora_weights="",
|
||||
inference_server="",
|
||||
prompt_type=None,
|
||||
prompt_dict=None,
|
||||
)
|
||||
|
||||
prompt = create_prompt(history)
|
||||
output = langchain.evaluate(
|
||||
model_state=model_state,
|
||||
my_db_state=None,
|
||||
instruction=prompt,
|
||||
iinput="",
|
||||
context="",
|
||||
stream_output=True,
|
||||
prompt_type="prompt_answer",
|
||||
prompt_dict={
|
||||
"promptA": "",
|
||||
"promptB": "",
|
||||
"PreInstruct": "<|prompt|>",
|
||||
"PreInput": None,
|
||||
"PreResponse": "<|answer|>",
|
||||
"terminate_response": [
|
||||
"<|prompt|>",
|
||||
"<|answer|>",
|
||||
"<|endoftext|>",
|
||||
],
|
||||
"chat_sep": "<|endoftext|>",
|
||||
"chat_turn_sep": "<|endoftext|>",
|
||||
"humanstr": "<|prompt|>",
|
||||
"botstr": "<|answer|>",
|
||||
"generates_leading_space": False,
|
||||
},
|
||||
temperature=0.1,
|
||||
top_p=0.75,
|
||||
top_k=40,
|
||||
num_beams=1,
|
||||
max_new_tokens=256,
|
||||
min_new_tokens=0,
|
||||
early_stopping=False,
|
||||
max_time=180,
|
||||
repetition_penalty=1.07,
|
||||
num_return_sequences=1,
|
||||
do_sample=False,
|
||||
chat=True,
|
||||
instruction_nochat=prompt,
|
||||
iinput_nochat="",
|
||||
langchain_mode="UserData",
|
||||
langchain_action=LangChainAction.QUERY.value,
|
||||
top_k_docs=3,
|
||||
chunk=True,
|
||||
chunk_size=512,
|
||||
document_choice=[DocumentChoices.All_Relevant.name],
|
||||
concurrency_count=1,
|
||||
memory_restriction_level=2,
|
||||
raise_generate_gpu_exceptions=False,
|
||||
@@ -154,9 +159,13 @@ def chat(curr_system_message, history, model, device, precision):
|
||||
db_type="chroma",
|
||||
n_jobs=-1,
|
||||
first_para=False,
|
||||
max_max_time=60 * 2,
|
||||
model_state0=model_state,
|
||||
model_lock=True,
|
||||
user_path=userpath_selector.value,
|
||||
)
|
||||
for partial_text in output:
|
||||
history[-1][1] = partial_text
|
||||
history[-1][1] = partial_text["response"]
|
||||
yield history
|
||||
|
||||
return history
|
||||
@@ -164,14 +173,6 @@ def chat(curr_system_message, history, model, device, precision):
|
||||
|
||||
with gr.Blocks(title="H2OGPT") as h2ogpt_web:
|
||||
with gr.Row():
|
||||
model_choices = list(
|
||||
map(lambda x: f"{x[0]: <10} => {x[1]}", model_map.items())
|
||||
)
|
||||
model = gr.Dropdown(
|
||||
label="Select Model",
|
||||
value=model_choices[0],
|
||||
choices=model_choices,
|
||||
)
|
||||
supported_devices = available_devices
|
||||
enabled = len(supported_devices) > 0
|
||||
# show cpu-task device first in list for chatbot
|
||||
@@ -197,6 +198,14 @@ with gr.Blocks(title="H2OGPT") as h2ogpt_web:
|
||||
],
|
||||
visible=True,
|
||||
)
|
||||
userpath_selector = gr.Textbox(
|
||||
label="Document Directory",
|
||||
value=str(
|
||||
os.path.abspath("apps/language_models/langchain/user_path/")
|
||||
),
|
||||
interactive=True,
|
||||
container=True,
|
||||
)
|
||||
chatbot = gr.Chatbot(height=500)
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
@@ -220,7 +229,7 @@ with gr.Blocks(title="H2OGPT") as h2ogpt_web:
|
||||
fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False
|
||||
).then(
|
||||
fn=chat,
|
||||
inputs=[system_msg, chatbot, model, device, precision],
|
||||
inputs=[system_msg, chatbot, device, precision],
|
||||
outputs=[chatbot],
|
||||
queue=True,
|
||||
)
|
||||
@@ -228,7 +237,7 @@ with gr.Blocks(title="H2OGPT") as h2ogpt_web:
|
||||
fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False
|
||||
).then(
|
||||
fn=chat,
|
||||
inputs=[system_msg, chatbot, model, device, precision],
|
||||
inputs=[system_msg, chatbot, device, precision],
|
||||
outputs=[chatbot],
|
||||
queue=True,
|
||||
)
|
||||
|
||||
@@ -50,7 +50,7 @@ def img2img_inf(
|
||||
steps: int,
|
||||
strength: float,
|
||||
guidance_scale: float,
|
||||
seed: int,
|
||||
seed: str | int,
|
||||
batch_count: int,
|
||||
batch_size: int,
|
||||
scheduler: str,
|
||||
@@ -230,10 +230,12 @@ def img2img_inf(
|
||||
start_time = time.time()
|
||||
global_obj.get_sd_obj().log = ""
|
||||
generated_imgs = []
|
||||
seeds = []
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
extra_info = {"STRENGTH": strength}
|
||||
text_output = ""
|
||||
try:
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
except TypeError as error:
|
||||
raise gr.Error(str(error)) from None
|
||||
|
||||
for current_batch in range(batch_count):
|
||||
out_imgs = global_obj.get_sd_obj().generate_images(
|
||||
@@ -617,8 +619,10 @@ with gr.Blocks(title="Image-to-Image") as img2img_web:
|
||||
visible=False,
|
||||
)
|
||||
with gr.Row():
|
||||
seed = gr.Number(
|
||||
value=args.seed, precision=0, label="Seed"
|
||||
seed = gr.Textbox(
|
||||
value=args.seed,
|
||||
label="Seed",
|
||||
info="An integer or a JSON list of integers, -1 for random",
|
||||
)
|
||||
device = gr.Dropdown(
|
||||
elem_id="device",
|
||||
|
||||
@@ -49,7 +49,7 @@ def inpaint_inf(
|
||||
inpaint_full_res_padding: int,
|
||||
steps: int,
|
||||
guidance_scale: float,
|
||||
seed: int,
|
||||
seed: str | int,
|
||||
batch_count: int,
|
||||
batch_size: int,
|
||||
scheduler: str,
|
||||
@@ -181,10 +181,13 @@ def inpaint_inf(
|
||||
start_time = time.time()
|
||||
global_obj.get_sd_obj().log = ""
|
||||
generated_imgs = []
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
image = image_dict["image"]
|
||||
mask_image = image_dict["mask"]
|
||||
text_output = ""
|
||||
try:
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
except TypeError as error:
|
||||
raise gr.Error(str(error)) from None
|
||||
|
||||
for current_batch in range(batch_count):
|
||||
out_imgs = global_obj.get_sd_obj().generate_images(
|
||||
@@ -514,8 +517,10 @@ with gr.Blocks(title="Inpainting") as inpaint_web:
|
||||
visible=False,
|
||||
)
|
||||
with gr.Row():
|
||||
seed = gr.Number(
|
||||
value=args.seed, precision=0, label="Seed"
|
||||
seed = gr.Textbox(
|
||||
value=args.seed,
|
||||
label="Seed",
|
||||
info="An integer or a JSON list of integers, -1 for random",
|
||||
)
|
||||
device = gr.Dropdown(
|
||||
elem_id="device",
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
import gradio as gr
|
||||
from PIL import Image
|
||||
from apps.stable_diffusion.scripts import lora_train
|
||||
from apps.stable_diffusion.src import prompt_examples, args
|
||||
from apps.stable_diffusion.src import prompt_examples, args, utils
|
||||
from apps.stable_diffusion.web.ui.utils import (
|
||||
available_devices,
|
||||
nodlogo_loc,
|
||||
@@ -168,7 +168,9 @@ with gr.Blocks(title="Lora Training") as lora_train_web:
|
||||
stop_batch = gr.Button("Stop Batch")
|
||||
with gr.Row():
|
||||
seed = gr.Number(
|
||||
value=args.seed, precision=0, label="Seed"
|
||||
value=utils.parse_seed_input(args.seed)[0],
|
||||
precision=0,
|
||||
label="Seed",
|
||||
)
|
||||
device = gr.Dropdown(
|
||||
elem_id="device",
|
||||
|
||||
@@ -49,7 +49,7 @@ def outpaint_inf(
|
||||
width: int,
|
||||
steps: int,
|
||||
guidance_scale: float,
|
||||
seed: int,
|
||||
seed: str,
|
||||
batch_count: int,
|
||||
batch_size: int,
|
||||
scheduler: str,
|
||||
@@ -178,7 +178,10 @@ def outpaint_inf(
|
||||
start_time = time.time()
|
||||
global_obj.get_sd_obj().log = ""
|
||||
generated_imgs = []
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
try:
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
except TypeError as error:
|
||||
raise gr.Error(str(error)) from None
|
||||
|
||||
left = True if "left" in directions else False
|
||||
right = True if "right" in directions else False
|
||||
@@ -542,8 +545,10 @@ with gr.Blocks(title="Outpainting") as outpaint_web:
|
||||
visible=False,
|
||||
)
|
||||
with gr.Row():
|
||||
seed = gr.Number(
|
||||
value=args.seed, precision=0, label="Seed"
|
||||
seed = gr.Textbox(
|
||||
value=args.seed,
|
||||
label="Seed",
|
||||
info="An integer or a JSON list of integers, -1 for random",
|
||||
)
|
||||
device = gr.Dropdown(
|
||||
elem_id="device",
|
||||
|
||||
@@ -284,6 +284,13 @@ def llm_chat_api(InputData: dict):
|
||||
}
|
||||
|
||||
|
||||
def view_json_file(file_obj):
|
||||
content = ""
|
||||
with open(file_obj.name, "r") as fopen:
|
||||
content = fopen.read()
|
||||
return content
|
||||
|
||||
|
||||
with gr.Blocks(title="Chatbot") as stablelm_chat:
|
||||
with gr.Row():
|
||||
model_choices = list(
|
||||
@@ -319,6 +326,14 @@ with gr.Blocks(title="Chatbot") as stablelm_chat:
|
||||
],
|
||||
visible=True,
|
||||
)
|
||||
with gr.Row():
|
||||
with gr.Group():
|
||||
config_file = gr.File(label="Upload sharding configuration")
|
||||
json_view_button = gr.Button("View as JSON")
|
||||
json_view = gr.JSON()
|
||||
json_view_button.click(
|
||||
fn=view_json_file, inputs=[config_file], outputs=[json_view]
|
||||
)
|
||||
chatbot = gr.Chatbot(height=500)
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
|
||||
@@ -46,7 +46,7 @@ def txt2img_inf(
|
||||
width: int,
|
||||
steps: int,
|
||||
guidance_scale: float,
|
||||
seed: int,
|
||||
seed: str | int,
|
||||
batch_count: int,
|
||||
batch_size: int,
|
||||
scheduler: str,
|
||||
@@ -178,8 +178,11 @@ def txt2img_inf(
|
||||
start_time = time.time()
|
||||
global_obj.get_sd_obj().log = ""
|
||||
generated_imgs = []
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
text_output = ""
|
||||
try:
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
except TypeError as error:
|
||||
raise gr.Error(str(error)) from None
|
||||
|
||||
for current_batch in range(batch_count):
|
||||
out_imgs = global_obj.get_sd_obj().generate_images(
|
||||
@@ -481,8 +484,10 @@ with gr.Blocks(title="Text-to-Image") as txt2img_web:
|
||||
label="Repeatable Seeds",
|
||||
)
|
||||
with gr.Row():
|
||||
seed = gr.Number(
|
||||
value=args.seed, precision=0, label="Seed"
|
||||
seed = gr.Textbox(
|
||||
value=args.seed,
|
||||
label="Seed",
|
||||
info="An integer or a JSON list of integers, -1 for random",
|
||||
)
|
||||
device = gr.Dropdown(
|
||||
elem_id="device",
|
||||
|
||||
@@ -42,7 +42,7 @@ def upscaler_inf(
|
||||
steps: int,
|
||||
noise_level: int,
|
||||
guidance_scale: float,
|
||||
seed: int,
|
||||
seed: str,
|
||||
batch_count: int,
|
||||
batch_size: int,
|
||||
scheduler: str,
|
||||
@@ -177,8 +177,11 @@ def upscaler_inf(
|
||||
start_time = time.time()
|
||||
global_obj.get_sd_obj().log = ""
|
||||
generated_imgs = []
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
extra_info = {"NOISE LEVEL": noise_level}
|
||||
try:
|
||||
seeds = utils.batch_seeds(seed, batch_count, repeatable_seeds)
|
||||
except TypeError as error:
|
||||
raise gr.Error(str(error)) from None
|
||||
|
||||
for current_batch in range(batch_count):
|
||||
low_res_img = image
|
||||
@@ -534,8 +537,10 @@ with gr.Blocks(title="Upscaler") as upscaler_web:
|
||||
visible=False,
|
||||
)
|
||||
with gr.Row():
|
||||
seed = gr.Number(
|
||||
value=args.seed, precision=0, label="Seed"
|
||||
seed = gr.Textbox(
|
||||
value=args.seed,
|
||||
label="Seed",
|
||||
info="An integer or a JSON list of integers, -1 for random",
|
||||
)
|
||||
device = gr.Dropdown(
|
||||
elem_id="device",
|
||||
|
||||
14
build_tools/vicuna_testing.py
Normal file
14
build_tools/vicuna_testing.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import os
|
||||
from sys import executable
|
||||
import subprocess
|
||||
from apps.language_models.scripts import vicuna
|
||||
|
||||
|
||||
def test_loop():
|
||||
precisions = ["fp16", "int8", "int4"]
|
||||
devices = ["cpu"]
|
||||
for precision in precisions:
|
||||
for device in devices:
|
||||
model = vicuna.UnshardedVicuna(device=device, precision=precision)
|
||||
model.compile()
|
||||
del model
|
||||
Reference in New Issue
Block a user