From ad1d873f8d1fc280e0bdbf0b2981d8504fed4f89 Mon Sep 17 00:00:00 2001
From: chenyu <chenyu@fastmail.com>
Date: Wed, 13 Mar 2024 12:07:02 -0400
Subject: [PATCH] fix llama shard convo mode (#3716)

---
 examples/llama.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/llama.py b/examples/llama.py
index 3c0ff897a7..aacbfbe709 100755
--- a/examples/llama.py
+++ b/examples/llama.py
@@ -392,7 +392,7 @@ After you are done speaking, output [EOS]. You are not Chad.
 
     print(f"Preparing KV cache for chatbot with personality {args.personality}...")
     with Timing():
-      llama.model(Tensor([toks]), 0, args.temperature).realize()  # NOTE: outputs are not used
+      llama.model(Tensor([toks], device=device), 0, args.temperature).realize()  # NOTE: outputs are not used
     start_pos = len(toks)
   else:
     # non chat bot mode