fix llama shard convo mode (#3716)

This commit is contained in:
chenyu
2024-03-13 12:07:02 -04:00
committed by GitHub
parent 337cd53444
commit ad1d873f8d

View File

@@ -392,7 +392,7 @@ After you are done speaking, output [EOS]. You are not Chad.
print(f"Preparing KV cache for chatbot with personality {args.personality}...")
with Timing():
llama.model(Tensor([toks]), 0, args.temperature).realize() # NOTE: outputs are not used
llama.model(Tensor([toks], device=device), 0, args.temperature).realize() # NOTE: outputs are not used
start_pos = len(toks)
else:
# non chat bot mode