From ccd073775e80e3d8a39c9a5c9d7abba8d2ffcf8b Mon Sep 17 00:00:00 2001 From: Fixt Date: Fri, 31 Jan 2025 08:54:42 -0500 Subject: [PATCH] Remove Think Blocks to enhance support for chain-of-thought language models (#247) --- custom_components/llama_conversation/const.py | 10 ++++++++++ custom_components/llama_conversation/conversation.py | 3 +++ 2 files changed, 13 insertions(+) diff --git a/custom_components/llama_conversation/const.py b/custom_components/llama_conversation/const.py index d73af54..9c662e6 100644 --- a/custom_components/llama_conversation/const.py +++ b/custom_components/llama_conversation/const.py @@ -146,59 +146,69 @@ PROMPT_TEMPLATE_DESCRIPTIONS = { "user": { "prefix": "<|im_start|>user\n", "suffix": "<|im_end|>" }, "assistant": { "prefix": "<|im_start|>assistant\n", "suffix": "<|im_end|>" }, "tool": { "prefix": "<|im_start|>tool", "suffix": "<|im_end|>" }, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "<|im_start|>assistant" }, PROMPT_TEMPLATE_COMMAND_R: { "system": { "prefix": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>", "suffix": "<|END_OF_TURN_TOKEN|>" }, "user": { "prefix": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>", "suffix": "<|END_OF_TURN_TOKEN|>" }, "assistant": { "prefix": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", "suffix": "<|END_OF_TURN_TOKEN|>" }, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" }, PROMPT_TEMPLATE_ALPACA: { "system": { "prefix": "", "suffix": "\n" }, "user": { "prefix": "### Instruction:\n", "suffix": "\n" }, "assistant": { "prefix": "### Response:\n", "suffix": "\n" }, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "### Response:" }, PROMPT_TEMPLATE_VICUNA: { "system": { "prefix": "", "suffix": "\n" }, "user": { "prefix": "USER: ", "suffix": "" }, "assistant": { "prefix": "ASSISTANT: ", "suffix": "" }, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "ASSISTANT:" }, PROMPT_TEMPLATE_NONE: { "system": { "prefix": "", "suffix": "" }, "user": { "prefix": "", "suffix": "" }, "assistant": { "prefix": "", "suffix": "" }, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "" }, PROMPT_TEMPLATE_MISTRAL: { "user": { "prefix": "[INST] ", "suffix": " [/INST] " }, "assistant": { "prefix": "", "suffix": "" }, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "" }, PROMPT_TEMPLATE_ZEPHYR: { "system": { "prefix": "<|system|>\n", "suffix": "<|endoftext|>" }, "user": { "prefix": "<|user|>\n", "suffix": "<|endoftext|>" }, "assistant": { "prefix": "<|assistant|>\n", "suffix": "<|endoftext|>" }, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "<|assistant|>\n" }, PROMPT_TEMPLATE_ZEPHYR2: { "system": { "prefix": "<|system|>\n", "suffix": "" }, "user": { "prefix": "<|user|>\n", "suffix": "" }, "assistant": { "prefix": "<|assistant|>\n", "suffix": "" }, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "<|assistant|>\n" }, PROMPT_TEMPLATE_ZEPHYR3: { "system": { "prefix": "<|system|>\n", "suffix": "<|end|>" }, "user": { "prefix": "<|user|>\n", "suffix": "<|end|>" }, "assistant": { "prefix": "<|assistant|>\n", "suffix": "<|end|>" }, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "<|assistant|>\n" }, PROMPT_TEMPLATE_LLAMA3: { "system": { "prefix": "<|start_header_id|>system<|end_header_id|>\n\n", "suffix": "<|eot_id|>"}, "user": { "prefix": "<|start_header_id|>user<|end_header_id|>\n\n", "suffix": "<|eot_id|>"}, "assistant": { "prefix": "<|start_header_id|>assistant<|end_header_id|>\n\n", "suffix": "<|eot_id|>"}, + "chain_of_thought": { "prefix": "", "suffix": ""}, "generation_prompt": "<|start_header_id|>assistant<|end_header_id|>\n\n" } } diff --git a/custom_components/llama_conversation/conversation.py b/custom_components/llama_conversation/conversation.py index 6d891b4..443a3fa 100644 --- a/custom_components/llama_conversation/conversation.py +++ b/custom_components/llama_conversation/conversation.py @@ -406,6 +406,9 @@ class LocalLLMAgent(ConversationEntity, AbstractConversationAgent): # remove end of text token if it was returned response = response.replace(template_desc["assistant"]["suffix"], "") + # remove think blocks + response = re.sub(rf"^.*?{template_desc["chain_of_thought"]["suffix"]}", "", response, flags=re.DOTALL) + conversation.append({"role": "assistant", "message": response}) if remember_conversation: if remember_num_interactions and len(conversation) > (remember_num_interactions * 2) + 1: