From be3e7998ae3f877920d4a9a6122ddf0d955f06c9 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 18 Dec 2025 11:39:51 +0000
Subject: [PATCH] fix: merge system messages into user messages for Ollama
 models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some Ollama models (like Olmo, Nemotron-3-nano) don't properly respect
system messages, causing them to ignore tool-format instructions.

This fix modifies _format_messages_for_provider() to merge system message
content into the first user message for Ollama models, ensuring the
instructions are visible to the model.

Changes:
- Accumulate all system messages and merge into first user message
- Handle edge cases: system-only messages, system then assistant
- Preserve conversation history after system message merge
- Add comprehensive tests for all scenarios

Fixes #4117

Co-Authored-By: João <joao@crewai.com>
---
 lib/crewai/src/crewai/llm.py |  52 +++++++++++++---
 lib/crewai/tests/test_llm.py | 116 ++++++++++++++++++++++++++++++++++-
 2 files changed, 159 insertions(+), 9 deletions(-)

diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
index 77053deeb..4e860f7c6 100644
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -1919,14 +1919,50 @@ class LLM(BaseLLM):
                 return [*messages, {"role": "user", "content": "Please continue."}]  # type: ignore[list-item]
             return messages  # type: ignore[return-value]
 
-        # TODO: Remove this code after merging PR https://github.com/BerriAI/litellm/pull/10917
-        # Ollama doesn't supports last message to be 'assistant'
-        if (
-            "ollama" in self.model.lower()
-            and messages
-            and messages[-1]["role"] == "assistant"
-        ):
-            return [*messages, {"role": "user", "content": ""}]  # type: ignore[list-item]
+        # Handle Ollama models - merge system messages into user messages
+        # Some Ollama models (like Olmo, Nemotron-3-nano) don't properly respect
+        # system messages, causing them to ignore tool-format instructions.
+        # By merging system content into the first user message, we ensure
+        # the instructions are visible to the model.
+        if self._get_custom_llm_provider() == "ollama":
+            formatted_messages: list[dict[str, str]] = []
+            system_contents: list[str] = []
+
+            for msg in messages:
+                if msg["role"] == "system":
+                    # Accumulate all system message contents
+                    system_contents.append(str(msg["content"]))
+                else:
+                    # For the first non-system message, prepend accumulated system content
+                    if system_contents and not formatted_messages:
+                        merged_content = "\n\n".join(system_contents)
+                        if msg["role"] == "user":
+                            # Merge system content into the first user message
+                            formatted_messages.append({
+                                "role": "user",
+                                "content": f"{merged_content}\n\n{msg['content']}"
+                            })
+                        else:
+                            # If first non-system message isn't user, prepend a user message
+                            formatted_messages.append({
+                                "role": "user",
+                                "content": merged_content
+                            })
+                            formatted_messages.append({"role": msg["role"], "content": str(msg["content"])})
+                        system_contents = []
+                    else:
+                        formatted_messages.append({"role": msg["role"], "content": str(msg["content"])})
+
+            # Handle case where there are only system messages
+            if system_contents and not formatted_messages:
+                merged_content = "\n\n".join(system_contents)
+                formatted_messages.append({"role": "user", "content": merged_content})
+
+            # Ollama doesn't support last message being 'assistant'
+            if formatted_messages and formatted_messages[-1]["role"] == "assistant":
+                formatted_messages.append({"role": "user", "content": ""})
+
+            return formatted_messages
 
         # Handle Anthropic models
         if not self.is_anthropic:
diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py
index 6f3bcd70a..7b75e4b4c 100644
--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -705,7 +705,121 @@ def test_ollama_does_not_modify_when_last_is_user(ollama_llm):
 
     formatted = ollama_llm._format_messages_for_provider(original_messages)
 
-    assert formatted == original_messages
+    # Ollama formatting should preserve user-only messages
+    assert len(formatted) == 1
+    assert formatted[0]["role"] == "user"
+    assert formatted[0]["content"] == "Tell me a joke."
+
+
+def test_ollama_merges_system_message_into_first_user_message(ollama_llm):
+    """Test that system messages are merged into the first user message for Ollama models.
+
+    This ensures that tool-format instructions in system messages are visible to
+    models that don't properly respect system messages (like Olmo, Nemotron-3-nano).
+    """
+    original_messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello!"},
+    ]
+
+    formatted = ollama_llm._format_messages_for_provider(original_messages)
+
+    # System message should be merged into user message
+    assert len(formatted) == 1
+    assert formatted[0]["role"] == "user"
+    assert "You are a helpful assistant." in formatted[0]["content"]
+    assert "Hello!" in formatted[0]["content"]
+
+
+def test_ollama_merges_multiple_system_messages(ollama_llm):
+    """Test that multiple system messages are accumulated and merged."""
+    original_messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "system", "content": "Use the following tools:"},
+        {"role": "user", "content": "What's the weather?"},
+    ]
+
+    formatted = ollama_llm._format_messages_for_provider(original_messages)
+
+    # Both system messages should be merged into user message
+    assert len(formatted) == 1
+    assert formatted[0]["role"] == "user"
+    assert "You are a helpful assistant." in formatted[0]["content"]
+    assert "Use the following tools:" in formatted[0]["content"]
+    assert "What's the weather?" in formatted[0]["content"]
+
+
+def test_ollama_handles_system_only_messages(ollama_llm):
+    """Test that system-only messages are converted to user messages."""
+    original_messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+    ]
+
+    formatted = ollama_llm._format_messages_for_provider(original_messages)
+
+    # System message should be converted to user message
+    assert len(formatted) == 1
+    assert formatted[0]["role"] == "user"
+    assert formatted[0]["content"] == "You are a helpful assistant."
+
+
+def test_ollama_handles_system_then_assistant_messages(ollama_llm):
+    """Test that system messages are prepended when first non-system is assistant."""
+    original_messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "assistant", "content": "Hello!"},
+    ]
+
+    formatted = ollama_llm._format_messages_for_provider(original_messages)
+
+    # System should be prepended as user, assistant kept, then empty user appended
+    assert len(formatted) == 3
+    assert formatted[0]["role"] == "user"
+    assert formatted[0]["content"] == "You are a helpful assistant."
+    assert formatted[1]["role"] == "assistant"
+    assert formatted[1]["content"] == "Hello!"
+    assert formatted[2]["role"] == "user"
+    assert formatted[2]["content"] == ""
+
+
+def test_ollama_preserves_conversation_after_system_merge(ollama_llm):
+    """Test that conversation history is preserved after system message merge."""
+    original_messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello!"},
+        {"role": "assistant", "content": "Hi there!"},
+        {"role": "user", "content": "How are you?"},
+    ]
+
+    formatted = ollama_llm._format_messages_for_provider(original_messages)
+
+    # System merged into first user, then rest of conversation preserved
+    assert len(formatted) == 3
+    assert formatted[0]["role"] == "user"
+    assert "You are a helpful assistant." in formatted[0]["content"]
+    assert "Hello!" in formatted[0]["content"]
+    assert formatted[1]["role"] == "assistant"
+    assert formatted[1]["content"] == "Hi there!"
+    assert formatted[2]["role"] == "user"
+    assert formatted[2]["content"] == "How are you?"
+
+
+def test_non_ollama_model_preserves_system_messages():
+    """Test that non-Ollama models preserve system messages as-is."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+    original_messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello!"},
+    ]
+
+    formatted = llm._format_messages_for_provider(original_messages)
+
+    # Non-Ollama models should preserve system messages
+    assert len(formatted) == 2
+    assert formatted[0]["role"] == "system"
+    assert formatted[0]["content"] == "You are a helpful assistant."
+    assert formatted[1]["role"] == "user"
+    assert formatted[1]["content"] == "Hello!"
 
 
 def test_native_provider_raises_error_when_supported_but_fails():