fix: handle Pydantic V1/V2 compatibility in ChromaDBConfig

Fixes #4095

When users pass a chromadb.config.Settings object (Pydantic V1 model) to
ChromaDBConfig (Pydantic V2 dataclass), Pydantic V2 would attempt to
validate it and fail with: TypeError: BaseModel.validate() takes 2
positional arguments but 3 were given

This fix:
- Uses SkipValidation to prevent Pydantic V2 from validating the V1 Settings object
- Uses BeforeValidator to handle dict-to-Settings conversion
- Adds arbitrary_types_allowed=True to the config for extra safety

The settings field now accepts either:
- A chromadb.config.Settings instance (passed through unchanged)
- A dictionary of settings parameters (converted to Settings internally)

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2025-12-16 09:18:33 +00:00
parent 88d3c0fa97
commit 34a33fc287
2 changed files with 174 additions and 4 deletions

View File

@@ -2,10 +2,11 @@
from dataclasses import field
import os
from typing import Literal, cast
from typing import Annotated, Any, Literal, cast
import warnings
from chromadb.config import Settings
from pydantic import BeforeValidator, ConfigDict, SkipValidation
from pydantic.dataclasses import dataclass as pyd_dataclass
from crewai.rag.chromadb.constants import (
@@ -31,6 +32,40 @@ warnings.filterwarnings(
)
def _coerce_settings(value: Any) -> Settings:
"""Coerce input value to a chromadb.config.Settings instance.
This validator handles the Pydantic V1/V2 compatibility issue by:
- Passing through existing Settings objects without validation
- Converting dict inputs to Settings objects
Args:
value: Either a Settings instance or a dict of settings parameters.
Returns:
A chromadb.config.Settings instance.
Raises:
TypeError: If value is neither a Settings instance nor a dict.
"""
if isinstance(value, Settings):
return value
if isinstance(value, dict):
return Settings(**value)
raise TypeError(
f"settings must be a chromadb.config.Settings instance or a dict, "
f"got {type(value).__name__}"
)
# Type alias that skips Pydantic V2 validation for chromadb Settings (Pydantic V1 model)
# and uses a before validator to handle dict-to-Settings conversion
ChromaSettings = Annotated[
SkipValidation[Settings],
BeforeValidator(_coerce_settings),
]
def _default_settings() -> Settings:
"""Create default ChromaDB settings.
@@ -64,14 +99,19 @@ def _default_embedding_function() -> ChromaEmbeddingFunctionWrapper:
)
@pyd_dataclass(frozen=True)
@pyd_dataclass(frozen=True, config=ConfigDict(arbitrary_types_allowed=True))
class ChromaDBConfig(BaseRagConfig):
"""Configuration for ChromaDB client."""
"""Configuration for ChromaDB client.
The settings field accepts either a chromadb.config.Settings instance
or a dictionary of settings parameters. This handles the Pydantic V1/V2
compatibility issue where ChromaDB uses Pydantic V1 for its Settings class.
"""
provider: Literal["chromadb"] = field(default="chromadb", init=False)
tenant: str = DEFAULT_TENANT
database: str = DEFAULT_DATABASE
settings: Settings = field(default_factory=_default_settings)
settings: ChromaSettings = field(default_factory=_default_settings)
embedding_function: ChromaEmbeddingFunctionWrapper = field(
default_factory=_default_embedding_function
)

View File

@@ -0,0 +1,130 @@
"""Tests for ChromaDBConfig Pydantic V1/V2 compatibility."""
import pytest
from chromadb.config import Settings
from crewai.rag.chromadb.config import ChromaDBConfig, _coerce_settings
class TestCoerceSettings:
"""Test suite for _coerce_settings validator function."""
def test_coerce_settings_passes_through_settings_instance(self):
"""Test that existing Settings instances are passed through unchanged."""
settings = Settings(
persist_directory="./test_db",
allow_reset=True,
is_persistent=False,
)
result = _coerce_settings(settings)
assert result is settings
assert result.persist_directory == "./test_db"
assert result.allow_reset is True
assert result.is_persistent is False
def test_coerce_settings_converts_dict_to_settings(self):
"""Test that dict inputs are converted to Settings instances."""
settings_dict = {
"persist_directory": "./my_custom_db",
"allow_reset": True,
"is_persistent": True,
}
result = _coerce_settings(settings_dict)
assert isinstance(result, Settings)
assert result.persist_directory == "./my_custom_db"
assert result.allow_reset is True
assert result.is_persistent is True
def test_coerce_settings_raises_type_error_for_invalid_input(self):
"""Test that invalid input types raise TypeError."""
with pytest.raises(TypeError, match="settings must be a chromadb.config.Settings"):
_coerce_settings("invalid_string")
with pytest.raises(TypeError, match="settings must be a chromadb.config.Settings"):
_coerce_settings(123)
with pytest.raises(TypeError, match="settings must be a chromadb.config.Settings"):
_coerce_settings(["list", "of", "items"])
class TestChromaDBConfigPydanticCompatibility:
"""Test suite for ChromaDBConfig Pydantic V1/V2 compatibility.
These tests verify the fix for GitHub issue #4095:
Pydantic V1/V2 Compatibility Crash in RagTool when passing custom ChromaDB Settings.
"""
def test_chromadb_config_accepts_settings_instance(self):
"""Test that ChromaDBConfig accepts a chromadb.config.Settings instance.
This is the main regression test for issue #4095 where passing a Settings
instance would cause: TypeError: BaseModel.validate() takes 2 positional
arguments but 3 were given
"""
custom_settings = Settings(
persist_directory="./my_db",
allow_reset=True,
is_persistent=False,
)
config = ChromaDBConfig(settings=custom_settings)
assert config.settings is custom_settings
assert config.settings.persist_directory == "./my_db"
assert config.settings.allow_reset is True
assert config.settings.is_persistent is False
def test_chromadb_config_accepts_settings_dict(self):
"""Test that ChromaDBConfig accepts a dict for settings and converts it."""
settings_dict = {
"persist_directory": "./dict_db",
"allow_reset": False,
"is_persistent": True,
}
config = ChromaDBConfig(settings=settings_dict)
assert isinstance(config.settings, Settings)
assert config.settings.persist_directory == "./dict_db"
assert config.settings.allow_reset is False
assert config.settings.is_persistent is True
def test_chromadb_config_uses_default_settings_when_not_provided(self):
"""Test that ChromaDBConfig uses default settings when none provided."""
config = ChromaDBConfig()
assert isinstance(config.settings, Settings)
assert config.settings.allow_reset is True
assert config.settings.is_persistent is True
def test_chromadb_config_with_all_parameters(self):
"""Test ChromaDBConfig with all parameters including custom settings."""
custom_settings = Settings(
persist_directory="./full_test_db",
allow_reset=True,
is_persistent=True,
)
config = ChromaDBConfig(
tenant="test_tenant",
database="test_database",
settings=custom_settings,
limit=10,
score_threshold=0.8,
batch_size=50,
)
assert config.tenant == "test_tenant"
assert config.database == "test_database"
assert config.settings is custom_settings
assert config.limit == 10
assert config.score_threshold == 0.8
assert config.batch_size == 50
def test_chromadb_config_provider_is_chromadb(self):
"""Test that provider field is always 'chromadb'."""
config = ChromaDBConfig()
assert config.provider == "chromadb"
def test_chromadb_config_is_frozen(self):
"""Test that ChromaDBConfig is immutable (frozen)."""
config = ChromaDBConfig()
with pytest.raises(AttributeError):
config.tenant = "new_tenant"