tinychat
+SEC TO FIRST TOKEN
+ + + +TOKENS/SEC
+ + + +TOKENS
+ +diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index d1aadc9498..4865ffbd2d 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -455,8 +455,8 @@ jobs: steps: - name: Checkout Code uses: actions/checkout@v4 - - name: Insert amdgpu - run: sudo modprobe amdgpu + - name: Remove amdgpu + run: sudo rmmod amdgpu || true - name: Symlink models and datasets run: | mkdir -p weights @@ -474,10 +474,6 @@ jobs: rm -f /tmp/staging.db /tmp/staging.db-shm /tmp/staging.db-wal - name: reset process replay run: test/external/process_replay/reset.py - - name: setup perflevel - run: | - examples/mlperf/training_submission_v4.1/tinycorp/benchmarks/bert/implementations/tinybox_red/setup.sh - rocm-smi - name: Train MNIST run: time PYTHONPATH=. AMD=1 TARGET_EVAL_ACC_PCT=96.0 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt - name: Run 10 CIFAR training steps diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4f980faf94..82120bd264 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -323,8 +323,8 @@ jobs: run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py - name: Run unit tests run: PYTHONPATH="." python -m pytest -n=auto test/unit/ - - name: Repo line count < 11300 lines - run: MAX_LINE_COUNT=11300 python sz.py + - name: Repo line count < 11500 lines + run: MAX_LINE_COUNT=11500 python sz.py fuzzing: name: Fuzzing @@ -347,7 +347,7 @@ jobs: testgpuimage: name: 'GPU IMAGE Tests' - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - name: Checkout Code @@ -371,7 +371,7 @@ jobs: testopenpilot: name: 'openpilot Compile Tests' - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - name: Checkout Code @@ -644,7 +644,7 @@ jobs: backend: [metal, llvm, cpu] name: MacOS (${{ matrix.backend }}) runs-on: macos-15 - timeout-minutes: 10 + timeout-minutes: 20 steps: - name: Checkout Code uses: actions/checkout@v4 @@ -664,6 +664,9 @@ jobs: run: python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20 - name: Run process replay tests uses: ./.github/actions/process-replay + - name: Run macOS-specific unit test + if: matrix.backend == 'cpu' + run: python3 -m pytest test/unit/test_disk_tensor.py::TestDiskTensor::test_copy_to_cpu_not_truncated # ****** Windows Tests ****** diff --git a/.gitignore b/.gitignore index 80e74a6072..e94ef15d7e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ notebooks *.so *.txt build +!examples/tinychat/assets/cdn.jsdelivr.net/npm/purecss@3.0.0/build/ /dist *.egg-info /env diff --git a/autogen_stubs.sh b/autogen_stubs.sh index 60010312eb..9074c858b7 100755 --- a/autogen_stubs.sh +++ b/autogen_stubs.sh @@ -171,6 +171,7 @@ generate_amd() { extra/hip_gpu_driver/sdma_v6_0_0_pkt_open.h \ extra/hip_gpu_driver/gc_11_0_0_offset.h \ extra/hip_gpu_driver/gc_10_3_0_offset.h \ + extra/hip_gpu_driver/sienna_cichlid_ip_offset.h \ --clang-args="-I/opt/rocm/include -x c++" \ -o $BASE/amd_gpu.py @@ -353,6 +354,12 @@ generate_am() { extra/amdpci/headers/amdgpu_smu.h \ -o $BASE/am/smu_v13_0_0.py fixup $BASE/am/smu_v13_0_0.py + + clang2py -k cdefstum \ + extra/amdpci/headers/hdp_6_0_0_offset.h \ + extra/amdpci/headers/hdp_6_0_0_sh_mask.h \ + -o $BASE/am/hdp_6_0_0.py + fixup $BASE/am/hdp_6_0_0.py } generate_webgpu() { diff --git a/examples/mlperf/model_train.py b/examples/mlperf/model_train.py index fb36ca6660..bd594e1734 100644 --- a/examples/mlperf/model_train.py +++ b/examples/mlperf/model_train.py @@ -851,7 +851,9 @@ def train_step_bert(model, optimizer, scheduler, loss_scaler:float, input_ids:Te optimizer.step() scheduler.step() - return loss.realize(), global_norm.realize() + # TODO: no to("CPU") here because it blocks and messes the python time + Tensor.realize(loss, global_norm, optimizer.optimizers[0].lr) + return loss, global_norm, optimizer.optimizers[0].lr @TinyJit def eval_step_bert(model, input_ids:Tensor, segment_ids:Tensor, attention_mask:Tensor, masked_positions:Tensor, masked_lm_ids:Tensor, @@ -862,7 +864,10 @@ def eval_step_bert(model, input_ids:Tensor, segment_ids:Tensor, attention_mask:T lm_logits, seq_relationship_logits = model(input_ids, attention_mask, masked_positions, segment_ids) masked_lm_accuracy, seq_relationship_accuracy, masked_lm_loss, next_sentence_loss = \ model.accuracy(lm_logits, seq_relationship_logits, masked_lm_ids, masked_lm_weights, next_sentence_labels) - return masked_lm_accuracy.realize(), seq_relationship_accuracy.realize(), masked_lm_loss.realize(), next_sentence_loss.realize() + for t in [masked_lm_accuracy, seq_relationship_accuracy, masked_lm_loss, next_sentence_loss]: + t.to_("CPU") + Tensor.realize(masked_lm_accuracy, seq_relationship_accuracy, masked_lm_loss, next_sentence_loss) + return masked_lm_accuracy, seq_relationship_accuracy, masked_lm_loss, next_sentence_loss def train_bert(): # NOTE: pip install tensorflow, wandb required @@ -1031,47 +1036,49 @@ def train_bert(): MLLOGGER.start(key=mllog_constants.EPOCH_START, value=i*BS, metadata={"epoch_num": i*BS}) while train_data is not None and i < train_steps and not achieved: - Tensor.training = True - BEAM.value = TRAIN_BEAM - st = time.perf_counter() - GlobalCounters.reset() - loss, global_norm = train_step_bert(model, optimizer_group, scheduler_group, loss_scaler, - train_data["input_ids"], train_data["segment_ids"], train_data["input_mask"], train_data["masked_lm_positions"], \ - train_data["masked_lm_ids"], train_data["masked_lm_weights"], train_data["next_sentence_labels"], GPUS) + if getenv("TRAIN", 1): + Tensor.training = True + BEAM.value = TRAIN_BEAM + st = time.perf_counter() + GlobalCounters.reset() + loss, global_norm, lr = train_step_bert(model, optimizer_group, scheduler_group, loss_scaler, + train_data["input_ids"], train_data["segment_ids"], train_data["input_mask"], train_data["masked_lm_positions"], \ + train_data["masked_lm_ids"], train_data["masked_lm_weights"], train_data["next_sentence_labels"], GPUS) - pt = time.perf_counter() + pt = time.perf_counter() - try: - next_data = next(train_it) - except StopIteration: - next_data = None + try: + next_data = next(train_it) + except StopIteration: + next_data = None - dt = time.perf_counter() + dt = time.perf_counter() - device_str = loss.device if isinstance(loss.device, str) else f"{loss.device[0]} * {len(loss.device)}" - loss = loss.item() + device_str = parameters[0].device if isinstance(parameters[0].device, str) else f"{parameters[0].device[0]} * {len(parameters[0].device)}" + loss = loss.item() + lr = lr.item() - cl = time.perf_counter() - if BENCHMARK: step_times.append(cl - st) + cl = time.perf_counter() + if BENCHMARK: step_times.append(cl - st) - tqdm.write( - f"{i:5} {((cl - st)) * 1000.0:7.2f} ms run, {(pt - st) * 1000.0:7.2f} ms python, {(dt - pt) * 1000.0:6.2f} ms fetch data, " - f"{(cl - dt) * 1000.0:7.2f} ms {device_str}, {loss:5.2f} loss, {optimizer_wd.lr.numpy()[0]:.6f} LR, " - f"{GlobalCounters.mem_used / 1e9:.2f} GB used, {GlobalCounters.global_ops * 1e-9 / (cl - st):9.2f} GFLOPS") - if WANDB: - wandb.log({"lr": optimizer_wd.lr.numpy(), "train/loss": loss, "train/global_norm": global_norm.item(), "train/step_time": cl - st, - "train/python_time": pt - st, "train/data_time": dt - pt, "train/cl_time": cl - dt, - "train/GFLOPS": GlobalCounters.global_ops * 1e-9 / (cl - st), "epoch": (i+1)*BS}) + tqdm.write( + f"{i:5} {((cl - st)) * 1000.0:7.2f} ms run, {(pt - st) * 1000.0:7.2f} ms python, {(dt - pt) * 1000.0:6.2f} ms fetch data, " + f"{(cl - dt) * 1000.0:7.2f} ms {device_str}, {loss:5.2f} loss, {lr:.6f} LR, " + f"{GlobalCounters.mem_used / 1e9:.2f} GB used, {GlobalCounters.global_ops * 1e-9 / (cl - st):9.2f} GFLOPS") + if WANDB: + wandb.log({"lr": lr, "train/loss": loss, "train/global_norm": global_norm.item(), "train/step_time": cl - st, + "train/python_time": pt - st, "train/data_time": dt - pt, "train/cl_time": cl - dt, + "train/GFLOPS": GlobalCounters.global_ops * 1e-9 / (cl - st), "epoch": (i+1)*BS}) - train_data, next_data = next_data, None - i += 1 + train_data, next_data = next_data, None + i += 1 - if i == BENCHMARK: - median_step_time = sorted(step_times)[(BENCHMARK + 1) // 2] # in seconds - estimated_total_minutes = int(median_step_time * train_steps / 60) - print(f"Estimated training time: {estimated_total_minutes // 60}h{estimated_total_minutes % 60}m") - print(f"epoch global_ops: {train_steps * GlobalCounters.global_ops:_}, " - f"epoch global_mem: {train_steps * GlobalCounters.global_mem:_}") + if i == BENCHMARK: + median_step_time = sorted(step_times)[(BENCHMARK + 1) // 2] # in seconds + estimated_total_minutes = int(median_step_time * train_steps / 60) + print(f"Estimated training time: {estimated_total_minutes // 60}h{estimated_total_minutes % 60}m") + print(f"epoch global_ops: {train_steps * GlobalCounters.global_ops:_}, " + f"epoch global_mem: {train_steps * GlobalCounters.global_mem:_}") # ** eval loop ** if i % eval_step_freq == 0 or (BENCHMARK and i == BENCHMARK) or i == train_steps: diff --git a/examples/tinychat/assets/cdn.jsdelivr.net/npm/purecss@3.0.0/build/base-min.css b/examples/tinychat/assets/cdn.jsdelivr.net/npm/purecss@3.0.0/build/base-min.css new file mode 100644 index 0000000000..df4fbc0557 --- /dev/null +++ b/examples/tinychat/assets/cdn.jsdelivr.net/npm/purecss@3.0.0/build/base-min.css @@ -0,0 +1,11 @@ +/*! +Pure v3.0.0 +Copyright 2013 Yahoo! +Licensed under the BSD License. +https://github.com/pure-css/pure/blob/master/LICENSE +*/ +/*! +normalize.css v | MIT License | https://necolas.github.io/normalize.css/ +Copyright (c) Nicolas Gallagher and Jonathan Neal +*/ +/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */html{line-height:1.15;-webkit-text-size-adjust:100%}body{margin:0}main{display:block}h1{font-size:2em;margin:.67em 0}hr{box-sizing:content-box;height:0;overflow:visible}pre{font-family:monospace,monospace;font-size:1em}a{background-color:transparent}abbr[title]{border-bottom:none;text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted}b,strong{font-weight:bolder}code,kbd,samp{font-family:monospace,monospace;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}img{border-style:none}button,input,optgroup,select,textarea{font-family:inherit;font-size:100%;line-height:1.15;margin:0}button,input{overflow:visible}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{border-style:none;padding:0}[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring,button:-moz-focusring{outline:1px dotted ButtonText}fieldset{padding:.35em .75em .625em}legend{box-sizing:border-box;color:inherit;display:table;max-width:100%;padding:0;white-space:normal}progress{vertical-align:baseline}textarea{overflow:auto}[type=checkbox],[type=radio]{box-sizing:border-box;padding:0}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}details{display:block}summary{display:list-item}template{display:none}[hidden]{display:none}html{font-family:sans-serif}.hidden,[hidden]{display:none!important}.pure-img{max-width:100%;height:auto;display:block} \ No newline at end of file diff --git a/examples/tinychat/tinychat-browser/.gitignore b/examples/tinychat/tinychat-browser/.gitignore new file mode 100644 index 0000000000..eddee94585 --- /dev/null +++ b/examples/tinychat/tinychat-browser/.gitignore @@ -0,0 +1,5 @@ +net_* +llama3-2.tiktoken +tiktoken.js +tiktoken_bg.wasm +transformer* \ No newline at end of file diff --git a/examples/tinychat/tinychat-browser/README.md b/examples/tinychat/tinychat-browser/README.md new file mode 100644 index 0000000000..2051cdfbda --- /dev/null +++ b/examples/tinychat/tinychat-browser/README.md @@ -0,0 +1,8 @@ +# How to build and run tinychat in browser (WebGPU and WASM) +- `PYTHONPATH=. python examples/tinychat/tinychat-browser/compile.py` +- `./examples/tinychat/tinychat-browser/compile_wasm.sh` + - Prerequisite: [install emscripten](https://emscripten.org/docs/getting_started/downloads.html). This script looks for `~/emsdk/emsdk_env.sh`, adjust this based on your installation. +- `./examples/tinychat/tinychat-browser/make_tiktoken_js.sh` + - Prerequisite: install `npm`, `webpack`. +- `cd examples/tinychat && python -m http.server 7776` +- In browser: open either `localhost:7776/tinychat-browser` (WebGPU), or `localhost:7776/tinychat-browser/?backend=wasm` (WASM) \ No newline at end of file diff --git a/examples/tinychat/tinychat-browser/compile.py b/examples/tinychat/tinychat-browser/compile.py new file mode 100644 index 0000000000..8b898ec3da --- /dev/null +++ b/examples/tinychat/tinychat-browser/compile.py @@ -0,0 +1,149 @@ +import os, json, hashlib, math +from extra.export_model import export_model +from examples.llama3 import build_transformer, Tokenizer +from tinygrad.nn.state import get_state_dict, load_state_dict +from tinygrad import Device, Variable, Tensor, dtypes, TinyJit +from tinygrad.helpers import fetch, Context +from tiktoken.load import load_tiktoken_bpe, dump_tiktoken_bpe + +def prepare_browser_chunks(model): + # split weights into browser-friendly chunks + state_dict = get_state_dict(model) + del state_dict['output.weight'], state_dict['output.scale'] # same as tok_embeddings; ensures consistency with model export + chunk_size = 16 * 1024 * 1024 # small chunks based on iphone browser constraints + metadata = {} + # We won't export cache_kv bytes (because we start inference on client at start_pos=0), but we will tell the client how big cache_kv needs to be + t_infos = [(v.lazydata.base.realized.nbytes, k, v.dtype) for k,v in state_dict.items() if "cache_kv" not in k] + empty_t_infos = [(v.lazydata.base.realized.nbytes, k, v.dtype) for k,v in state_dict.items() if "cache_kv" in k] + + split_t_infos = [] + for size, name, dtype in t_infos: + if size <= chunk_size: + split_t_infos.append((size, name, dtype, ())) + else: # split large weights into multiple parts + for i in range(0, size, chunk_size): + split_t_infos.append((min(chunk_size, size-i), f"{name}_part{math.ceil(i/chunk_size)}", dtype, (i, min(i+chunk_size, size)))) + + files = [] + # pack weights into files with FFD bin packing + split_t_infos = sorted(split_t_infos, reverse=True) + for info in split_t_infos: + placed = False + for file in files: + if sum(i[0] for i in file) + info[0] <= chunk_size: + if info[3] and any(i[3] for i in file): continue # no two split tensors can touch the same file, due to wasm loading constraints + file.append(info) + placed = True + break + if not placed: + files.append([info]) + + tinygrad_dtypes = {dtypes.float32: "float32", dtypes.float16: "float16", dtypes.int8: "int8", dtypes.int32: "int32"} + for i, file in enumerate(files): + cursor = 0 + with open(os.path.join(os.path.dirname(__file__), f'./net_part{i}.chunk'), "wb+") as writer: + for size, name, dtype, offsets in file: + name, part_num = (name, 0) if "_part" not in name else (name.split("_part")[0], int(name.split("_part")[1])) + default = {"parts": {}, "dtype": tinygrad_dtypes[dtype]} + weight_metadata = metadata.get(name, default) + weight_metadata["parts"][part_num] = {"file": i, "file_start_pos": cursor, "size": size} + metadata[name] = weight_metadata + data = bytes(state_dict[name].lazydata.base.realized.as_buffer()) + data = data if not offsets else data[offsets[0]:offsets[1]] + writer.write(data) + cursor += size + + metadata.update({name: {"parts": {0: {"empty": True, "size": size}}, "dtype": tinygrad_dtypes[dtype]} for size, name, dtype in empty_t_infos}) + + for k in metadata: + metadata[k]["parts"] = [part for part_num, part in sorted(metadata[k]["parts"].items(), key = lambda x: x[0])] + cursor = 0 + for i, part in enumerate(metadata[k]["parts"]): + metadata[k]["parts"][i]["target_start_pos"] = cursor + cursor += part["size"] + metadata[k]["size"] = cursor + + # compute hashes, which client app will check to determine whether to update with new weights and/or detect integrity issues + state_dict_hash = hashlib.sha256(json.dumps(metadata, sort_keys=True).encode("utf-8")).hexdigest() + metadata = {"state_dict": metadata, "state_dict_hash": state_dict_hash, "files": []} + hashes = set() + for i in range(len(files)): + with open(os.path.join(os.path.dirname(__file__), f'./net_part{i}.chunk'), "rb") as reader: + hash = hashlib.sha256(reader.read()).hexdigest() + hashes.add(hash) + metadata["files"].append({"name": f'net_part{i}.chunk', "hash": hash}) + if len(hashes) != len(files): print(f"WARNING: {len(files)} files were exported, but only {len(hashes)} are unique: something may have gone wrong") + metadata_hash = hashlib.sha256(json.dumps(metadata, sort_keys=True).encode("utf-8")).hexdigest() + metadata = {"metadata": metadata, "metadata_hash": metadata_hash} + + with open(os.path.join(os.path.dirname(__file__), f'./net_metadata.json'), "w") as writer: json.dump(metadata, writer, indent=4) + return metadata + +def validate_model(model, tokenizer): + prompt = "yo" + toks = [tokenizer.bos_id] + toks += [tokenizer.special_tokens["<|start_header_id|>"]] + tokenizer.encode("user") + [tokenizer.special_tokens["<|end_header_id|>"]] + tokenizer.encode("\n\n") + toks += tokenizer.encode(prompt) + [tokenizer.special_tokens["<|eot_id|>"]] + toks += [tokenizer.special_tokens["<|start_header_id|>"]] + tokenizer.encode("assistant") + [tokenizer.special_tokens["<|end_header_id|>"]] + tokenizer.encode("\n\n") + start_pos = 0 + run = TinyJit(model.forward) + for tok in toks[:-1]: + run(Tensor([[tok]]), Variable("start_pos", 0, model.max_context).bind(start_pos), 0.0, 0, 0.0, 0.0, 0.0).realize() + start_pos += 1 + tok = toks[-1] + result = "" + expected = "How's it going?" + while True: + tok = run(Tensor([[tok]]), Variable("start_pos", 0, model.max_context).bind(start_pos), 0.0, 0, 0.0, 0.0, 0.0).item() + start_pos += 1 + if tok in tokenizer.stop_tokens or len(result) > len(expected): break + result += tokenizer.decode([tok]) + assert result == expected, f"Model validation failed, expected output: {expected}, actual output: {result}" + +if __name__=="__main__": + # Export BPE data for use with tiktoken.js + tokenizer_path = fetch("https://huggingface.co/bofenghuang/Meta-Llama-3-8B/resolve/main/original/tokenizer.model", "tokenizer.model", subdir="llama3-1b-instruct") + mergeable_ranks = load_tiktoken_bpe(str(tokenizer_path)) + bpe_path = os.path.join(os.path.dirname(__file__), "llama3-2.tiktoken") + dump_tiktoken_bpe(mergeable_ranks, bpe_path) + tokenizer = Tokenizer(str(tokenizer_path)) + + model_path = fetch("https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-f16.gguf", "Llama-3.2-1B-Instruct-f16.gguf", subdir="llama3-1b-instruct") + Tensor.no_grad = True + max_context=1024 + tok = 128000 + TEMPERATURE, TOP_K, TOP_P, ALPHA_F, ALPHA_P = 0.95, 0, 0.0, 0.0, 0.0 + start_pos = Variable("start_pos", 0, max_context).bind(0) + model_input = lambda: [Tensor([[tok]]), start_pos, TEMPERATURE, TOP_K, TOP_P, ALPHA_F, ALPHA_P] + + Device.DEFAULT="CPU" + model = build_transformer(model_path, model_size="1B", quantize="int8", scale_dtype=dtypes.float32, device=Device.DEFAULT, max_context=max_context) + state_dict = get_state_dict(model) + validate_model(model, tokenizer) + model_name = "transformer" + + with Context(BEAM=3): + cprog, js_wrapper = export_model(model, "wasm", *model_input(), model_name=model_name) + # ensure consistency with exported weights + js_wrapper = js_wrapper.replace("output.weight", "tok_embeddings.weight").replace("output.scale", "tok_embeddings.scale") + + with open(os.path.join(os.path.dirname(__file__), f"{model_name}.c"), "w") as f: f.write(cprog) + with open(os.path.join(os.path.dirname(__file__), "net_clang.js"), "w") as f: f.write(js_wrapper) + + Device.DEFAULT="WEBGPU" + # float16 is not yet supported for dawn/Vulkan/NVIDIA stack, see: https://issues.chromium.org/issues/42251215 + # therefore for now, we used CLANG to quantize the float16 llama to int8 with float32 scales, then load to WEBGPU + model = build_transformer(model_path, model_size="1B", quantize="int8", max_context=max_context, load_weights=False) + load_state_dict(model, state_dict) + # these were the same before load_state_dict + model.output.weight, model.output.scale = model.tok_embeddings.weight, model.tok_embeddings.scale + + validate_model(model, tokenizer) + metadata = prepare_browser_chunks(model) # export weights to disk + + with Context(BEAM=3): + prg, input_sizes, output_sizes, state = export_model(model, "webgpu", *model_input(), model_name=model_name, stream_weights=True) + # ensure consistency with exported weights + prg = prg.replace("output.weight", "tok_embeddings.weight").replace("output.scale", "tok_embeddings.scale") + + with open(os.path.join(os.path.dirname(__file__), "net.js"), "w") as f: f.write(prg) \ No newline at end of file diff --git a/examples/tinychat/tinychat-browser/compile_wasm.sh b/examples/tinychat/tinychat-browser/compile_wasm.sh new file mode 100755 index 0000000000..fda21b0c4a --- /dev/null +++ b/examples/tinychat/tinychat-browser/compile_wasm.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +cd "$(dirname "$0")" + +# prereq: install emscripten: https://emscripten.org/docs/getting_started/downloads.html +EMSCRIPTEN_PATH=~/emsdk/emsdk_env.sh +source $EMSCRIPTEN_PATH +step="transformer" +initial_memory=6553600 +max_memory=1500053504 +exported_functions='["_net", "_malloc", "_free", "_set_buf"]' + +emcc "${step}.c" \ + -O3 -msimd128 -ffast-math -flto \ + -o "${step}.js" \ + -s MODULARIZE=1 \ + -s EXPORT_ES6=1 \ + -s EXPORTED_FUNCTIONS="${exported_functions}" \ + -s ENVIRONMENT='worker' \ + -s FILESYSTEM=0 \ + -s EVAL_CTORS \ + -s ALLOW_MEMORY_GROWTH=1 \ + -s INITIAL_MEMORY="$initial_memory" \ + -s MAXIMUM_MEMORY="$max_memory" \ No newline at end of file diff --git a/examples/tinychat/tinychat-browser/index.css b/examples/tinychat/tinychat-browser/index.css new file mode 100644 index 0000000000..9be6635450 --- /dev/null +++ b/examples/tinychat/tinychat-browser/index.css @@ -0,0 +1,322 @@ +/* define colors */ +:root { + --primary-color: #fff; + --secondary-color: #2a2a2a; + --secondary-color-transparent: #ffffff66; + --primary-bg-color: #1a1a1a; + --foreground-color: #f0f0f0; +} + +main { + width: 100%; + height: 100%; + + display: flex; + flex-direction: column; + + place-items: center; +} + +.home { + width: 100%; + height: 90%; + + margin-bottom: 10rem; +} + +.title { + font-size: 3rem; + margin: 1rem 0; + margin-top: 3rem; +} + +.histories-container-container { + width: 100%; + max-height: 75%; + + position: relative; +} + +.histories-container { + overflow-y: auto; + overflow-x: hidden; + width: 100%; + height: 100%; + + display: flex; + flex-direction: column; + gap: 1rem; + align-items: center; + + margin: 0; + padding: 3rem 1rem; +} + +.histories-start { + height: 3rem; + width: 100%; + + z-index: 999; + top: 0; + position: absolute; + + background: linear-gradient( + 180deg, + var(--primary-bg-color) 0%, + transparent 100% + ); +} +.histories-end { + height: 3rem; + width: 100%; + + z-index: 999; + bottom: 0; + position: absolute; + + background: linear-gradient( + 0deg, + var(--primary-bg-color) 0%, + transparent 100% + ); +} + +.history { + padding: 1rem; + width: 100%; + max-width: 40rem; + + background-color: var(--secondary-color); + border-radius: 10px; + border-left: 2px solid var(--primary-color); + + cursor: pointer; + + transform: translateX(calc(1px * var(--tx, 0))); + opacity: var(--opacity, 1); +} +.history:hover { + background-color: var(--secondary-color); +} + +.history-delete-button { + position: absolute; + top: 0; + right: 0; + padding: 0.5rem; + margin: 0; + outline: none; + border: none; + background-color: var(--secondary-color); + color: var(--foreground-color); + border-radius: 0 0 0 10px; + cursor: pointer; + transition: 0.2s; +} +.history-delete-button:hover { + background-color: var(--secondary-color); + padding: 0.75rem; +} + +.messages { + overflow-y: auto; + height: 100%; + width: 100%; + max-width: 1200px; + + display: flex; + flex-direction: column; + gap: 1rem; + align-items: center; + padding-top: 1rem; + padding-bottom: 11rem; +} + +.message { + max-width: 75%; + padding: 0.5rem 1rem; + border-radius: 20px; +} +.message-role-assistant { + background-color: var(--secondary-color); + margin-right: auto; + color: #fff; +} +.message-role-user { + margin-left: auto; + background-color: var(--primary-color); + color: #000; +} + +.message > pre { + white-space: pre-wrap; +} + +.hljs { + width: 100%; + position: relative; + border-radius: 10px; + /* wrap code blocks */ + white-space: pre-wrap; +} +/* put clipboard button in the top right corner of the code block */ +.clipboard-button { + position: absolute; + top: 0; + right: 0; + padding: 0.5rem; + margin: 0; + outline: none; + border: none; + background-color: var(--secondary-color); + color: var(--foreground-color); + border-radius: 0 0 0 10px; + cursor: pointer; + transition: 0.2s; +} +.clipboard-button:hover { + background-color: var(--secondary-color); + padding: 0.75rem; +} + +.input-container { + position: absolute; + bottom: 0; + + /* linear gradient from background-color to transparent on the top */ + background: linear-gradient( + 0deg, + var(--primary-bg-color) 55%, + transparent 100% + ); + + width: 100%; + max-width: 1200px; + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + z-index: 999; +} + +.input-performance { + margin-top: 4rem; + + display: flex; + flex-direction: row; + gap: 1rem; +} + +.input-performance-point { + display: flex; + flex-direction: row; + place-items: center; + gap: 0.5rem; +} +.input-performance-point > p { + height: 1rem; + line-height: normal; +} + +.input { + width: 90%; + min-height: 3rem; + flex-shrink: 0; + + display: flex; + flex-direction: row; + justify-content: center; + gap: 0.5rem; + + align-items: flex-end; + margin-bottom: 2rem; +} + +.input-form { + width: 100%; + padding: 1rem; + min-height: 3rem; + max-height: 8rem; + + background-color: var(--secondary-color); + color: var(--foreground-color); + border-radius: 10px; + border: none; + resize: none; + outline: none; +} +.mobile .input-form { /* prevent auto-zoom on touching prompt box */ + font-size: 16px; +} + +.input-button { + height: 3rem; + width: 4rem; + + background-color: var(--primary-color); + color: var(--secondary-color); + border-radius: 10px; + padding: 0.5rem; + cursor: pointer; +} +.input-button:hover { + background-color: var(--secondary-color-transparent); +} +.input-button:disabled { + background-color: var(--secondary-color); + cursor: not-allowed; +} + +/* wrap text */ +p { + white-space: pre-wrap; +} + +/* fonts */ +.megrim-regular { + font-family: monospace; + font-weight: 400; + font-style: normal; +} + +.monospace { + font-family: monospace; +} + +.loading-bar { + display: flex; + flex-direction: row; + align-items: center; + gap: 0.5rem; + width: 100%; + min-height: 3rem; + margin-bottom: 2rem; +} + +.loading-text { + color: var(--foreground-color); + font-size: 1rem; + white-space: nowrap; +} + +#progress-percentage { + color: var(--foreground-color); + font-size: 1rem; + white-space: nowrap; +} + +.progress-bar { + flex-grow: 1; + height: 0.5rem; + background-color: var(--secondary-color); + border-radius: 5px; + overflow: hidden; + position: relative; +} + +.progress { + width: 0%; + height: 100%; + background-color: var(--primary-color); + transition: width 0.2s ease-in-out; +} \ No newline at end of file diff --git a/examples/tinychat/tinychat-browser/index.html b/examples/tinychat/tinychat-browser/index.html new file mode 100644 index 0000000000..8d43624c29 --- /dev/null +++ b/examples/tinychat/tinychat-browser/index.html @@ -0,0 +1,182 @@ + + +
+SEC TO FIRST TOKEN
+ + + +TOKENS/SEC
+ + + +TOKENS
+ +