fix llama shard convo mode (#3716)

2026-01-10 07:28:15 -05:00 · 2024-03-13 12:07:02 -04:00
parent 337cd53444
commit ad1d873f8d
1 changed files with 1 additions and 1 deletions
--- a/examples/llama.py
+++ b/examples/llama.py
@@ -392,7 +392,7 @@ After you are done speaking, output [EOS]. You are not Chad.

    print(f"Preparing KV cache for chatbot with personality {args.personality}...")
    with Timing():
-      llama.model(Tensor([toks]), 0, args.temperature).realize()  # NOTE: outputs are not used
+      llama.model(Tensor([toks], device=device), 0, args.temperature).realize()  # NOTE: outputs are not used
    start_pos = len(toks)
  else:
    # non chat bot mode