Compare commits

...

9 Commits

Author SHA1 Message Date
Xingyao Wang
c79350fa67 increase timeout 2025-03-06 04:22:24 +00:00
Xingyao Wang
0177cb3d21 update litellm to latest main 2025-03-06 01:44:05 +00:00
Xingyao Wang
764cd72e45 attemp 128k thinking budget 2025-03-05 19:50:02 +00:00
Xingyao Wang
f0339e24cc improve dataset shuffled 2025-03-04 16:34:05 +00:00
Xingyao Wang
4a07029318 remove the annoying primt 2025-03-04 15:12:57 +00:00
Xingyao Wang
b1398f2d03 remove the annoying primt 2025-03-04 15:12:50 +00:00
Xingyao Wang
be4dec238f pass thinking block to the next request 2025-03-04 14:27:40 +00:00
Xingyao Wang
9abeeb8008 bump litellm ver 2025-03-04 13:48:42 +00:00
Xingyao Wang
af9e1896e9 bump litellm version 2025-03-03 18:28:09 +00:00
7 changed files with 61 additions and 20 deletions

View File

@@ -247,12 +247,12 @@ def prepare_dataset(
f'Starting evaluation with skipping first {skip_num} instances ({len(dataset)} instances to run).'
)
if eval_n_limit and eval_n_limit > 0:
# Use fixed random seed 42 for sampling without replacement
dataset = dataset.sample(
min(eval_n_limit, len(dataset)), random_state=42, replace=False
)
# First shuffle the entire dataset with a fixed seed
shuffled_dataset = dataset.sample(frac=1.0, random_state=42, replace=False)
# Then take the first eval_n_limit rows
dataset = shuffled_dataset.iloc[:eval_n_limit]
logger.info(
f'Randomly sampling {eval_n_limit} unique instances with random seed 42.'
f'Taking first {eval_n_limit} instances from randomly shuffled dataset (seed 42).'
)
elif eval_n_limit and eval_n_limit > 0:
# Use fixed random seed 42 for sampling without replacement

View File

@@ -65,7 +65,9 @@ class LLMConfig(BaseModel):
retry_multiplier: float = Field(default=2)
retry_min_wait: int = Field(default=5)
retry_max_wait: int = Field(default=30)
timeout: int | None = Field(default=None)
timeout: int | None = Field(
default=1200
) # 20 minutes, extended thinking can take a while
max_message_chars: int = Field(
default=30_000
) # maximum number of characters in an observation's content when sent to the llm

View File

@@ -67,6 +67,10 @@ class Message(BaseModel):
# force string serializer
force_string_serializer: bool = False
# This is unique to Anthropic's claude-3-7-sonnet
# it is required to pass it back to the model to produce the next action
thinking_blocks: list[dict[str, str]] | None = None
@property
def contains_image(self) -> bool:
return any(isinstance(content, ImageContent) for content in self.content)
@@ -96,6 +100,9 @@ class Message(BaseModel):
def _list_serializer(self) -> dict:
content: list[dict] = []
if self.thinking_blocks is not None:
content.extend(self.thinking_blocks)
role_tool_with_prompt_caching = False
for item in self.content:
d = item.model_dump()

View File

@@ -139,8 +139,25 @@ class LLM(RetryMixin, DebugMixin):
# set up the completion function
kwargs: dict[str, Any] = {
'temperature': self.config.temperature,
'top_p': self.config.top_p,
'max_completion_tokens': self.config.max_output_tokens,
'extra_headers': {'anthropic-beta': 'output-128k-2025-02-19'},
}
# if 'claude-3-7-sonnet' in self.config.model:
kwargs['thinking'] = {
'type': 'enabled',
# 'budget_tokens': 30720, # 32768 - 2048
# 'budget_tokens': 62000, # 64000 - 2000
'budget_tokens': 120000, # 128000 - 8000
}
# kwargs['max_completion_tokens'] = 32768
# kwargs['max_completion_tokens'] = 64000
kwargs['max_completion_tokens'] = 128000
kwargs.pop('temperature')
kwargs.pop('top_p')
logger.info(f'Setting thinking for {self.config.model} with kwargs: {kwargs}')
if (
self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS
or self.config.model.split('/')[-1] in REASONING_EFFORT_SUPPORTED_MODELS
@@ -164,7 +181,6 @@ class LLM(RetryMixin, DebugMixin):
api_version=self.config.api_version,
custom_llm_provider=self.config.custom_llm_provider,
timeout=self.config.timeout,
top_p=self.config.top_p,
drop_params=self.config.drop_params,
**kwargs,
)

View File

@@ -205,6 +205,9 @@ class ConversationMemory:
if assistant_msg.content is not None
else [],
tool_calls=assistant_msg.tool_calls,
thinking_blocks=llm_response.choices[0].message.thinking_blocks
if hasattr(llm_response.choices[0].message, 'thinking_blocks')
else None,
)
return []
elif isinstance(action, AgentFinishAction):
@@ -236,6 +239,15 @@ class ConversationMemory:
Message(
role=role, # type: ignore[arg-type]
content=[TextContent(text=action.thought)],
thinking_blocks=tool_metadata.model_response.choices[
0
].message.thinking_blocks
if tool_metadata is not None
and hasattr(
tool_metadata.model_response.choices[0].message,
'thinking_blocks',
)
else None,
)
]
elif isinstance(action, MessageAction):

28
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand.
[[package]]
name = "aiohappyeyeballs"
@@ -4185,25 +4185,23 @@ types-tqdm = "*"
[[package]]
name = "litellm"
version = "1.61.19"
version = "1.62.4"
description = "Library to easily interface with LLM API providers"
optional = false
python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
python-versions = ">=3.8.1,<4.0, !=3.9.7"
groups = ["main"]
files = [
{file = "litellm-1.61.19-py3-none-any.whl", hash = "sha256:dc8e4d820f37f042a9ed9572ad0ece6a6b6790d160fdf4c61f4d1b877fba6f67"},
{file = "litellm-1.61.19.tar.gz", hash = "sha256:b6726dc8dece7b1a55252685092a0c8039700472bf72d79e5c81d0f63d145e02"},
]
files = []
develop = false
[package.dependencies]
aiohttp = "*"
click = "*"
httpx = ">=0.23.0"
importlib-metadata = ">=6.8.0"
jinja2 = ">=3.1.2,<4.0.0"
jsonschema = ">=4.22.0,<5.0.0"
jinja2 = "^3.1.2"
jsonschema = "^4.22.0"
openai = ">=1.61.0"
pydantic = ">=2.0.0,<3.0.0"
pydantic = "^2.0.0"
python-dotenv = ">=0.2.0"
tiktoken = ">=0.7.0"
tokenizers = "*"
@@ -4212,6 +4210,12 @@ tokenizers = "*"
extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"]
proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=43.0.1,<44.0.0)", "fastapi (>=0.115.5,<0.116.0)", "fastapi-sso (>=0.16.0,<0.17.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.18,<0.0.19)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.29.0,<0.30.0)", "uvloop (>=0.21.0,<0.22.0)"]
[package.source]
type = "git"
url = "https://github.com/BerriAI/litellm.git"
reference = "main"
resolved_reference = "17efbf0ee94d21d9e8fc8a8ce24b5c854490e0ff"
[[package]]
name = "llama-cloud"
version = "0.1.12"
@@ -8938,7 +8942,7 @@ files = [
[package.dependencies]
greenlet = [
{version = "!=0.4.17", optional = true, markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") or extra == \"asyncio\""},
{version = "!=0.4.17", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"},
{version = "!=0.4.17", optional = true, markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") or extra == \"asyncio\""},
]
typing-extensions = ">=4.6.0"
@@ -10855,4 +10859,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.1"
python-versions = "^3.12"
content-hash = "83da0b681253a79417c9842862cdd102c1ab6e8770d9dd9e0c42bc7994be2cd0"
content-hash = "2a2c34de062d9292dcbf1e4d2af9cda764b666b654a6d62e7423b01f1966ea61"

View File

@@ -16,7 +16,7 @@ packages = [
python = "^3.12"
datasets = "*"
pandas = "*"
litellm = "^1.60.0"
litellm = { git = "https://github.com/BerriAI/litellm.git", branch = "main" }
google-generativeai = "*" # To use litellm with Gemini Pro API
google-api-python-client = "*" # For Google Sheets API
google-auth-httplib2 = "*" # For Google Sheets authentication