mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
download llama3 70B (#7868)
use "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF". ``` PYTHONPATH=. JITBEAM=2 python3 examples/llama3.py --download_model --size 70B --quantize int8 --benchmark ``` on M4 Max, 40 sec to load the model and ``` enqueue in 165.15 ms total 328.54 ms, 3.04 tok/s, 247.46 GB/s, param 221.20 GB/s enqueue in 5.31 ms total 168.48 ms, 5.94 tok/s, 482.54 GB/s, param 431.34 GB/s enqueue in 5.32 ms total 168.77 ms, 5.93 tok/s, 481.71 GB/s, param 430.60 GB/s enqueue in 5.69 ms total 169.51 ms, 5.90 tok/s, 479.61 GB/s, param 428.72 GB/s enqueue in 5.41 ms total 168.60 ms, 5.93 tok/s, 482.20 GB/s, param 431.04 GB/s enqueue in 5.18 ms total 168.98 ms, 5.92 tok/s, 481.12 GB/s, param 430.08 GB/s enqueue in 5.43 ms total 168.82 ms, 5.92 tok/s, 481.59 GB/s, param 430.49 GB/s enqueue in 5.27 ms total 168.94 ms, 5.92 tok/s, 481.23 GB/s, param 430.17 GB/s ```
This commit is contained in:
@@ -246,6 +246,12 @@ if __name__ == "__main__":
|
||||
fetch("https://huggingface.co/TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R/resolve/main/model-00003-of-00004.safetensors", "model-00003-of-00004.safetensors", subdir="llama3-8b-sfr")
|
||||
fetch("https://huggingface.co/TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R/resolve/main/model-00004-of-00004.safetensors", "model-00004-of-00004.safetensors", subdir="llama3-8b-sfr")
|
||||
args.model = fetch("https://huggingface.co/TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R/raw/main/model.safetensors.index.json", "model.safetensors.index.json", subdir="llama3-8b-sfr")
|
||||
elif args.size == "70B":
|
||||
subdir = "Llama-3.1-Nemotron-70B-Instruct-HF"
|
||||
args.model = fetch("https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/resolve/main/model.safetensors.index.json?download=true", "model.safetensors.index.json", subdir=subdir)
|
||||
fetch("https://huggingface.co/bofenghuang/Meta-Llama-3-8B/resolve/main/original/tokenizer.model", "tokenizer.model", subdir=subdir)
|
||||
for i in range(30):
|
||||
fetch(f"https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/resolve/main/model-{i+1:05d}-of-00030.safetensors?download=true", f"model-{i+1:05d}-of-00030.safetensors", subdir=subdir)
|
||||
|
||||
assert args.model is not None, "please provide --model option"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user