mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 22:18:10 -05:00
fix: merge system messages into user messages for Ollama models
Some Ollama models (like Olmo, Nemotron-3-nano) don't properly respect system messages, causing them to ignore tool-format instructions. This fix modifies _format_messages_for_provider() to merge system message content into the first user message for Ollama models, ensuring the instructions are visible to the model. Changes: - Accumulate all system messages and merge into first user message - Handle edge cases: system-only messages, system then assistant - Preserve conversation history after system message merge - Add comprehensive tests for all scenarios Fixes #4117 Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -1919,14 +1919,50 @@ class LLM(BaseLLM):
|
||||
return [*messages, {"role": "user", "content": "Please continue."}] # type: ignore[list-item]
|
||||
return messages # type: ignore[return-value]
|
||||
|
||||
# TODO: Remove this code after merging PR https://github.com/BerriAI/litellm/pull/10917
|
||||
# Ollama doesn't supports last message to be 'assistant'
|
||||
if (
|
||||
"ollama" in self.model.lower()
|
||||
and messages
|
||||
and messages[-1]["role"] == "assistant"
|
||||
):
|
||||
return [*messages, {"role": "user", "content": ""}] # type: ignore[list-item]
|
||||
# Handle Ollama models - merge system messages into user messages
|
||||
# Some Ollama models (like Olmo, Nemotron-3-nano) don't properly respect
|
||||
# system messages, causing them to ignore tool-format instructions.
|
||||
# By merging system content into the first user message, we ensure
|
||||
# the instructions are visible to the model.
|
||||
if self._get_custom_llm_provider() == "ollama":
|
||||
formatted_messages: list[dict[str, str]] = []
|
||||
system_contents: list[str] = []
|
||||
|
||||
for msg in messages:
|
||||
if msg["role"] == "system":
|
||||
# Accumulate all system message contents
|
||||
system_contents.append(str(msg["content"]))
|
||||
else:
|
||||
# For the first non-system message, prepend accumulated system content
|
||||
if system_contents and not formatted_messages:
|
||||
merged_content = "\n\n".join(system_contents)
|
||||
if msg["role"] == "user":
|
||||
# Merge system content into the first user message
|
||||
formatted_messages.append({
|
||||
"role": "user",
|
||||
"content": f"{merged_content}\n\n{msg['content']}"
|
||||
})
|
||||
else:
|
||||
# If first non-system message isn't user, prepend a user message
|
||||
formatted_messages.append({
|
||||
"role": "user",
|
||||
"content": merged_content
|
||||
})
|
||||
formatted_messages.append({"role": msg["role"], "content": str(msg["content"])})
|
||||
system_contents = []
|
||||
else:
|
||||
formatted_messages.append({"role": msg["role"], "content": str(msg["content"])})
|
||||
|
||||
# Handle case where there are only system messages
|
||||
if system_contents and not formatted_messages:
|
||||
merged_content = "\n\n".join(system_contents)
|
||||
formatted_messages.append({"role": "user", "content": merged_content})
|
||||
|
||||
# Ollama doesn't support last message being 'assistant'
|
||||
if formatted_messages and formatted_messages[-1]["role"] == "assistant":
|
||||
formatted_messages.append({"role": "user", "content": ""})
|
||||
|
||||
return formatted_messages
|
||||
|
||||
# Handle Anthropic models
|
||||
if not self.is_anthropic:
|
||||
|
||||
@@ -705,7 +705,121 @@ def test_ollama_does_not_modify_when_last_is_user(ollama_llm):
|
||||
|
||||
formatted = ollama_llm._format_messages_for_provider(original_messages)
|
||||
|
||||
assert formatted == original_messages
|
||||
# Ollama formatting should preserve user-only messages
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["role"] == "user"
|
||||
assert formatted[0]["content"] == "Tell me a joke."
|
||||
|
||||
|
||||
def test_ollama_merges_system_message_into_first_user_message(ollama_llm):
|
||||
"""Test that system messages are merged into the first user message for Ollama models.
|
||||
|
||||
This ensures that tool-format instructions in system messages are visible to
|
||||
models that don't properly respect system messages (like Olmo, Nemotron-3-nano).
|
||||
"""
|
||||
original_messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello!"},
|
||||
]
|
||||
|
||||
formatted = ollama_llm._format_messages_for_provider(original_messages)
|
||||
|
||||
# System message should be merged into user message
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["role"] == "user"
|
||||
assert "You are a helpful assistant." in formatted[0]["content"]
|
||||
assert "Hello!" in formatted[0]["content"]
|
||||
|
||||
|
||||
def test_ollama_merges_multiple_system_messages(ollama_llm):
|
||||
"""Test that multiple system messages are accumulated and merged."""
|
||||
original_messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "system", "content": "Use the following tools:"},
|
||||
{"role": "user", "content": "What's the weather?"},
|
||||
]
|
||||
|
||||
formatted = ollama_llm._format_messages_for_provider(original_messages)
|
||||
|
||||
# Both system messages should be merged into user message
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["role"] == "user"
|
||||
assert "You are a helpful assistant." in formatted[0]["content"]
|
||||
assert "Use the following tools:" in formatted[0]["content"]
|
||||
assert "What's the weather?" in formatted[0]["content"]
|
||||
|
||||
|
||||
def test_ollama_handles_system_only_messages(ollama_llm):
|
||||
"""Test that system-only messages are converted to user messages."""
|
||||
original_messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
]
|
||||
|
||||
formatted = ollama_llm._format_messages_for_provider(original_messages)
|
||||
|
||||
# System message should be converted to user message
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["role"] == "user"
|
||||
assert formatted[0]["content"] == "You are a helpful assistant."
|
||||
|
||||
|
||||
def test_ollama_handles_system_then_assistant_messages(ollama_llm):
|
||||
"""Test that system messages are prepended when first non-system is assistant."""
|
||||
original_messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "assistant", "content": "Hello!"},
|
||||
]
|
||||
|
||||
formatted = ollama_llm._format_messages_for_provider(original_messages)
|
||||
|
||||
# System should be prepended as user, assistant kept, then empty user appended
|
||||
assert len(formatted) == 3
|
||||
assert formatted[0]["role"] == "user"
|
||||
assert formatted[0]["content"] == "You are a helpful assistant."
|
||||
assert formatted[1]["role"] == "assistant"
|
||||
assert formatted[1]["content"] == "Hello!"
|
||||
assert formatted[2]["role"] == "user"
|
||||
assert formatted[2]["content"] == ""
|
||||
|
||||
|
||||
def test_ollama_preserves_conversation_after_system_merge(ollama_llm):
|
||||
"""Test that conversation history is preserved after system message merge."""
|
||||
original_messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello!"},
|
||||
{"role": "assistant", "content": "Hi there!"},
|
||||
{"role": "user", "content": "How are you?"},
|
||||
]
|
||||
|
||||
formatted = ollama_llm._format_messages_for_provider(original_messages)
|
||||
|
||||
# System merged into first user, then rest of conversation preserved
|
||||
assert len(formatted) == 3
|
||||
assert formatted[0]["role"] == "user"
|
||||
assert "You are a helpful assistant." in formatted[0]["content"]
|
||||
assert "Hello!" in formatted[0]["content"]
|
||||
assert formatted[1]["role"] == "assistant"
|
||||
assert formatted[1]["content"] == "Hi there!"
|
||||
assert formatted[2]["role"] == "user"
|
||||
assert formatted[2]["content"] == "How are you?"
|
||||
|
||||
|
||||
def test_non_ollama_model_preserves_system_messages():
|
||||
"""Test that non-Ollama models preserve system messages as-is."""
|
||||
llm = LLM(model="gpt-4o-mini", is_litellm=True)
|
||||
original_messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello!"},
|
||||
]
|
||||
|
||||
formatted = llm._format_messages_for_provider(original_messages)
|
||||
|
||||
# Non-Ollama models should preserve system messages
|
||||
assert len(formatted) == 2
|
||||
assert formatted[0]["role"] == "system"
|
||||
assert formatted[0]["content"] == "You are a helpful assistant."
|
||||
assert formatted[1]["role"] == "user"
|
||||
assert formatted[1]["content"] == "Hello!"
|
||||
|
||||
|
||||
def test_native_provider_raises_error_when_supported_but_fails():
|
||||
|
||||
Reference in New Issue
Block a user