tinychat
+SEC TO FIRST TOKEN
+ + + +TOKENS/SEC
+ + + +TOKENS
+ +diff --git a/.gitignore b/.gitignore index 80e74a6072..e94ef15d7e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ notebooks *.so *.txt build +!examples/tinychat/assets/cdn.jsdelivr.net/npm/purecss@3.0.0/build/ /dist *.egg-info /env diff --git a/examples/tinychat/assets/cdn.jsdelivr.net/npm/purecss@3.0.0/build/base-min.css b/examples/tinychat/assets/cdn.jsdelivr.net/npm/purecss@3.0.0/build/base-min.css new file mode 100644 index 0000000000..df4fbc0557 --- /dev/null +++ b/examples/tinychat/assets/cdn.jsdelivr.net/npm/purecss@3.0.0/build/base-min.css @@ -0,0 +1,11 @@ +/*! +Pure v3.0.0 +Copyright 2013 Yahoo! +Licensed under the BSD License. +https://github.com/pure-css/pure/blob/master/LICENSE +*/ +/*! +normalize.css v | MIT License | https://necolas.github.io/normalize.css/ +Copyright (c) Nicolas Gallagher and Jonathan Neal +*/ +/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */html{line-height:1.15;-webkit-text-size-adjust:100%}body{margin:0}main{display:block}h1{font-size:2em;margin:.67em 0}hr{box-sizing:content-box;height:0;overflow:visible}pre{font-family:monospace,monospace;font-size:1em}a{background-color:transparent}abbr[title]{border-bottom:none;text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted}b,strong{font-weight:bolder}code,kbd,samp{font-family:monospace,monospace;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}img{border-style:none}button,input,optgroup,select,textarea{font-family:inherit;font-size:100%;line-height:1.15;margin:0}button,input{overflow:visible}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{border-style:none;padding:0}[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring,button:-moz-focusring{outline:1px dotted ButtonText}fieldset{padding:.35em .75em .625em}legend{box-sizing:border-box;color:inherit;display:table;max-width:100%;padding:0;white-space:normal}progress{vertical-align:baseline}textarea{overflow:auto}[type=checkbox],[type=radio]{box-sizing:border-box;padding:0}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}details{display:block}summary{display:list-item}template{display:none}[hidden]{display:none}html{font-family:sans-serif}.hidden,[hidden]{display:none!important}.pure-img{max-width:100%;height:auto;display:block} \ No newline at end of file diff --git a/examples/tinychat/tinychat-browser/.gitignore b/examples/tinychat/tinychat-browser/.gitignore new file mode 100644 index 0000000000..eddee94585 --- /dev/null +++ b/examples/tinychat/tinychat-browser/.gitignore @@ -0,0 +1,5 @@ +net_* +llama3-2.tiktoken +tiktoken.js +tiktoken_bg.wasm +transformer* \ No newline at end of file diff --git a/examples/tinychat/tinychat-browser/compile.py b/examples/tinychat/tinychat-browser/compile.py index b9544dfeff..8b898ec3da 100644 --- a/examples/tinychat/tinychat-browser/compile.py +++ b/examples/tinychat/tinychat-browser/compile.py @@ -1,8 +1,8 @@ import os, json, hashlib, math from extra.export_model import export_model -from examples.llama3 import build_transformer +from examples.llama3 import build_transformer, Tokenizer from tinygrad.nn.state import get_state_dict, load_state_dict -from tinygrad import Device, Variable, Tensor, dtypes +from tinygrad import Device, Variable, Tensor, dtypes, TinyJit from tinygrad.helpers import fetch, Context from tiktoken.load import load_tiktoken_bpe, dump_tiktoken_bpe @@ -66,21 +66,47 @@ def prepare_browser_chunks(model): # compute hashes, which client app will check to determine whether to update with new weights and/or detect integrity issues state_dict_hash = hashlib.sha256(json.dumps(metadata, sort_keys=True).encode("utf-8")).hexdigest() metadata = {"state_dict": metadata, "state_dict_hash": state_dict_hash, "files": []} + hashes = set() for i in range(len(files)): with open(os.path.join(os.path.dirname(__file__), f'./net_part{i}.chunk'), "rb") as reader: - metadata["files"].append({"name": f'net_part{i}.chunk', "hash": hashlib.sha256(reader.read()).hexdigest()}) + hash = hashlib.sha256(reader.read()).hexdigest() + hashes.add(hash) + metadata["files"].append({"name": f'net_part{i}.chunk', "hash": hash}) + if len(hashes) != len(files): print(f"WARNING: {len(files)} files were exported, but only {len(hashes)} are unique: something may have gone wrong") metadata_hash = hashlib.sha256(json.dumps(metadata, sort_keys=True).encode("utf-8")).hexdigest() metadata = {"metadata": metadata, "metadata_hash": metadata_hash} with open(os.path.join(os.path.dirname(__file__), f'./net_metadata.json'), "w") as writer: json.dump(metadata, writer, indent=4) return metadata +def validate_model(model, tokenizer): + prompt = "yo" + toks = [tokenizer.bos_id] + toks += [tokenizer.special_tokens["<|start_header_id|>"]] + tokenizer.encode("user") + [tokenizer.special_tokens["<|end_header_id|>"]] + tokenizer.encode("\n\n") + toks += tokenizer.encode(prompt) + [tokenizer.special_tokens["<|eot_id|>"]] + toks += [tokenizer.special_tokens["<|start_header_id|>"]] + tokenizer.encode("assistant") + [tokenizer.special_tokens["<|end_header_id|>"]] + tokenizer.encode("\n\n") + start_pos = 0 + run = TinyJit(model.forward) + for tok in toks[:-1]: + run(Tensor([[tok]]), Variable("start_pos", 0, model.max_context).bind(start_pos), 0.0, 0, 0.0, 0.0, 0.0).realize() + start_pos += 1 + tok = toks[-1] + result = "" + expected = "How's it going?" + while True: + tok = run(Tensor([[tok]]), Variable("start_pos", 0, model.max_context).bind(start_pos), 0.0, 0, 0.0, 0.0, 0.0).item() + start_pos += 1 + if tok in tokenizer.stop_tokens or len(result) > len(expected): break + result += tokenizer.decode([tok]) + assert result == expected, f"Model validation failed, expected output: {expected}, actual output: {result}" + if __name__=="__main__": # Export BPE data for use with tiktoken.js tokenizer_path = fetch("https://huggingface.co/bofenghuang/Meta-Llama-3-8B/resolve/main/original/tokenizer.model", "tokenizer.model", subdir="llama3-1b-instruct") mergeable_ranks = load_tiktoken_bpe(str(tokenizer_path)) bpe_path = os.path.join(os.path.dirname(__file__), "llama3-2.tiktoken") dump_tiktoken_bpe(mergeable_ranks, bpe_path) + tokenizer = Tokenizer(str(tokenizer_path)) model_path = fetch("https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-f16.gguf", "Llama-3.2-1B-Instruct-f16.gguf", subdir="llama3-1b-instruct") Tensor.no_grad = True @@ -93,7 +119,7 @@ if __name__=="__main__": Device.DEFAULT="CPU" model = build_transformer(model_path, model_size="1B", quantize="int8", scale_dtype=dtypes.float32, device=Device.DEFAULT, max_context=max_context) state_dict = get_state_dict(model) - out = model.forward(*model_input()) + validate_model(model, tokenizer) model_name = "transformer" with Context(BEAM=3): @@ -112,7 +138,7 @@ if __name__=="__main__": # these were the same before load_state_dict model.output.weight, model.output.scale = model.tok_embeddings.weight, model.tok_embeddings.scale - out = model.forward(*model_input()) + validate_model(model, tokenizer) metadata = prepare_browser_chunks(model) # export weights to disk with Context(BEAM=3): diff --git a/examples/tinychat/tinychat-browser/index.css b/examples/tinychat/tinychat-browser/index.css new file mode 100644 index 0000000000..9be6635450 --- /dev/null +++ b/examples/tinychat/tinychat-browser/index.css @@ -0,0 +1,322 @@ +/* define colors */ +:root { + --primary-color: #fff; + --secondary-color: #2a2a2a; + --secondary-color-transparent: #ffffff66; + --primary-bg-color: #1a1a1a; + --foreground-color: #f0f0f0; +} + +main { + width: 100%; + height: 100%; + + display: flex; + flex-direction: column; + + place-items: center; +} + +.home { + width: 100%; + height: 90%; + + margin-bottom: 10rem; +} + +.title { + font-size: 3rem; + margin: 1rem 0; + margin-top: 3rem; +} + +.histories-container-container { + width: 100%; + max-height: 75%; + + position: relative; +} + +.histories-container { + overflow-y: auto; + overflow-x: hidden; + width: 100%; + height: 100%; + + display: flex; + flex-direction: column; + gap: 1rem; + align-items: center; + + margin: 0; + padding: 3rem 1rem; +} + +.histories-start { + height: 3rem; + width: 100%; + + z-index: 999; + top: 0; + position: absolute; + + background: linear-gradient( + 180deg, + var(--primary-bg-color) 0%, + transparent 100% + ); +} +.histories-end { + height: 3rem; + width: 100%; + + z-index: 999; + bottom: 0; + position: absolute; + + background: linear-gradient( + 0deg, + var(--primary-bg-color) 0%, + transparent 100% + ); +} + +.history { + padding: 1rem; + width: 100%; + max-width: 40rem; + + background-color: var(--secondary-color); + border-radius: 10px; + border-left: 2px solid var(--primary-color); + + cursor: pointer; + + transform: translateX(calc(1px * var(--tx, 0))); + opacity: var(--opacity, 1); +} +.history:hover { + background-color: var(--secondary-color); +} + +.history-delete-button { + position: absolute; + top: 0; + right: 0; + padding: 0.5rem; + margin: 0; + outline: none; + border: none; + background-color: var(--secondary-color); + color: var(--foreground-color); + border-radius: 0 0 0 10px; + cursor: pointer; + transition: 0.2s; +} +.history-delete-button:hover { + background-color: var(--secondary-color); + padding: 0.75rem; +} + +.messages { + overflow-y: auto; + height: 100%; + width: 100%; + max-width: 1200px; + + display: flex; + flex-direction: column; + gap: 1rem; + align-items: center; + padding-top: 1rem; + padding-bottom: 11rem; +} + +.message { + max-width: 75%; + padding: 0.5rem 1rem; + border-radius: 20px; +} +.message-role-assistant { + background-color: var(--secondary-color); + margin-right: auto; + color: #fff; +} +.message-role-user { + margin-left: auto; + background-color: var(--primary-color); + color: #000; +} + +.message > pre { + white-space: pre-wrap; +} + +.hljs { + width: 100%; + position: relative; + border-radius: 10px; + /* wrap code blocks */ + white-space: pre-wrap; +} +/* put clipboard button in the top right corner of the code block */ +.clipboard-button { + position: absolute; + top: 0; + right: 0; + padding: 0.5rem; + margin: 0; + outline: none; + border: none; + background-color: var(--secondary-color); + color: var(--foreground-color); + border-radius: 0 0 0 10px; + cursor: pointer; + transition: 0.2s; +} +.clipboard-button:hover { + background-color: var(--secondary-color); + padding: 0.75rem; +} + +.input-container { + position: absolute; + bottom: 0; + + /* linear gradient from background-color to transparent on the top */ + background: linear-gradient( + 0deg, + var(--primary-bg-color) 55%, + transparent 100% + ); + + width: 100%; + max-width: 1200px; + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + z-index: 999; +} + +.input-performance { + margin-top: 4rem; + + display: flex; + flex-direction: row; + gap: 1rem; +} + +.input-performance-point { + display: flex; + flex-direction: row; + place-items: center; + gap: 0.5rem; +} +.input-performance-point > p { + height: 1rem; + line-height: normal; +} + +.input { + width: 90%; + min-height: 3rem; + flex-shrink: 0; + + display: flex; + flex-direction: row; + justify-content: center; + gap: 0.5rem; + + align-items: flex-end; + margin-bottom: 2rem; +} + +.input-form { + width: 100%; + padding: 1rem; + min-height: 3rem; + max-height: 8rem; + + background-color: var(--secondary-color); + color: var(--foreground-color); + border-radius: 10px; + border: none; + resize: none; + outline: none; +} +.mobile .input-form { /* prevent auto-zoom on touching prompt box */ + font-size: 16px; +} + +.input-button { + height: 3rem; + width: 4rem; + + background-color: var(--primary-color); + color: var(--secondary-color); + border-radius: 10px; + padding: 0.5rem; + cursor: pointer; +} +.input-button:hover { + background-color: var(--secondary-color-transparent); +} +.input-button:disabled { + background-color: var(--secondary-color); + cursor: not-allowed; +} + +/* wrap text */ +p { + white-space: pre-wrap; +} + +/* fonts */ +.megrim-regular { + font-family: monospace; + font-weight: 400; + font-style: normal; +} + +.monospace { + font-family: monospace; +} + +.loading-bar { + display: flex; + flex-direction: row; + align-items: center; + gap: 0.5rem; + width: 100%; + min-height: 3rem; + margin-bottom: 2rem; +} + +.loading-text { + color: var(--foreground-color); + font-size: 1rem; + white-space: nowrap; +} + +#progress-percentage { + color: var(--foreground-color); + font-size: 1rem; + white-space: nowrap; +} + +.progress-bar { + flex-grow: 1; + height: 0.5rem; + background-color: var(--secondary-color); + border-radius: 5px; + overflow: hidden; + position: relative; +} + +.progress { + width: 0%; + height: 100%; + background-color: var(--primary-color); + transition: width 0.2s ease-in-out; +} \ No newline at end of file diff --git a/examples/tinychat/tinychat-browser/index.html b/examples/tinychat/tinychat-browser/index.html new file mode 100644 index 0000000000..8d43624c29 --- /dev/null +++ b/examples/tinychat/tinychat-browser/index.html @@ -0,0 +1,182 @@ + + +
+SEC TO FIRST TOKEN
+ + + +TOKENS/SEC
+ + + +TOKENS
+ +