From ad1d873f8d1fc280e0bdbf0b2981d8504fed4f89 Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 13 Mar 2024 12:07:02 -0400 Subject: [PATCH] fix llama shard convo mode (#3716) --- examples/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llama.py b/examples/llama.py index 3c0ff897a7..aacbfbe709 100755 --- a/examples/llama.py +++ b/examples/llama.py @@ -392,7 +392,7 @@ After you are done speaking, output [EOS]. You are not Chad. print(f"Preparing KV cache for chatbot with personality {args.personality}...") with Timing(): - llama.model(Tensor([toks]), 0, args.temperature).realize() # NOTE: outputs are not used + llama.model(Tensor([toks], device=device), 0, args.temperature).realize() # NOTE: outputs are not used start_pos = len(toks) else: # non chat bot mode