fix: Disable prompt caching in default condenser (#7781)

Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Calvin Smith <calvin@all-hands.dev>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
This commit is contained in:
Calvin Smith
2025-04-11 10:09:23 -06:00
committed by GitHub
parent e2bb69908a
commit 36e092e0ac
4 changed files with 36 additions and 7 deletions

View File

@@ -114,8 +114,14 @@ class LLMAttentionCondenser(RollingCondenser):
@classmethod @classmethod
def from_config(cls, config: LLMAttentionCondenserConfig) -> LLMAttentionCondenser: def from_config(cls, config: LLMAttentionCondenserConfig) -> LLMAttentionCondenser:
# This condenser cannot take advantage of prompt caching. If it happens
# to be set, we'll pay for the cache writes but never get a chance to
# save on a read.
llm_config = config.llm_config.model_copy()
llm_config.caching_prompt = False
return LLMAttentionCondenser( return LLMAttentionCondenser(
llm=LLM(config=config.llm_config), llm=LLM(config=llm_config),
max_size=config.max_size, max_size=config.max_size,
keep_first=config.keep_first, keep_first=config.keep_first,
) )

View File

@@ -155,8 +155,14 @@ CURRENT_STATE: Last flip: Heads, Haiku count: 15/20"""
def from_config( def from_config(
cls, config: LLMSummarizingCondenserConfig cls, config: LLMSummarizingCondenserConfig
) -> LLMSummarizingCondenser: ) -> LLMSummarizingCondenser:
# This condenser cannot take advantage of prompt caching. If it happens
# to be set, we'll pay for the cache writes but never get a chance to
# save on a read.
llm_config = config.llm_config.model_copy()
llm_config.caching_prompt = False
return LLMSummarizingCondenser( return LLMSummarizingCondenser(
llm=LLM(config=config.llm_config), llm=LLM(config=llm_config),
max_size=config.max_size, max_size=config.max_size,
keep_first=config.keep_first, keep_first=config.keep_first,
max_event_length=config.max_event_length, max_event_length=config.max_event_length,

View File

@@ -311,8 +311,14 @@ Capture all relevant information, especially:
def from_config( def from_config(
cls, config: StructuredSummaryCondenserConfig cls, config: StructuredSummaryCondenserConfig
) -> StructuredSummaryCondenser: ) -> StructuredSummaryCondenser:
# This condenser cannot take advantage of prompt caching. If it happens
# to be set, we'll pay for the cache writes but never get a chance to
# save on a read.
llm_config = config.llm_config.model_copy()
llm_config.caching_prompt = False
return StructuredSummaryCondenser( return StructuredSummaryCondenser(
llm=LLM(config=config.llm_config), llm=LLM(config=llm_config),
max_size=config.max_size, max_size=config.max_size,
keep_first=config.keep_first, keep_first=config.keep_first,
max_event_length=config.max_event_length, max_event_length=config.max_event_length,

View File

@@ -331,10 +331,7 @@ def test_llm_summarizing_condenser_from_config():
config = LLMSummarizingCondenserConfig( config = LLMSummarizingCondenserConfig(
max_size=50, max_size=50,
keep_first=10, keep_first=10,
llm_config=LLMConfig( llm_config=LLMConfig(model='gpt-4o', api_key='test_key', caching_prompt=True),
model='gpt-4o',
api_key='test_key',
),
) )
condenser = Condenser.from_config(config) condenser = Condenser.from_config(config)
@@ -344,6 +341,10 @@ def test_llm_summarizing_condenser_from_config():
assert condenser.max_size == 50 assert condenser.max_size == 50
assert condenser.keep_first == 10 assert condenser.keep_first == 10
# Since this condenser can't take advantage of caching, we intercept the
# passed config and manually flip the caching prompt to False.
assert not condenser.llm.config.caching_prompt
def test_llm_summarizing_condenser_invalid_config(): def test_llm_summarizing_condenser_invalid_config():
"""Test that LLMSummarizingCondenser raises error when keep_first > max_size.""" """Test that LLMSummarizingCondenser raises error when keep_first > max_size."""
@@ -474,6 +475,7 @@ def test_llm_attention_condenser_from_config():
llm_config=LLMConfig( llm_config=LLMConfig(
model='gpt-4o', model='gpt-4o',
api_key='test_key', api_key='test_key',
caching_prompt=True,
), ),
) )
condenser = Condenser.from_config(config) condenser = Condenser.from_config(config)
@@ -484,6 +486,10 @@ def test_llm_attention_condenser_from_config():
assert condenser.max_size == 50 assert condenser.max_size == 50
assert condenser.keep_first == 10 assert condenser.keep_first == 10
# Since this condenser can't take advantage of caching, we intercept the
# passed config and manually flip the caching prompt to False.
assert not condenser.llm.config.caching_prompt
def test_llm_attention_condenser_invalid_config(): def test_llm_attention_condenser_invalid_config():
"""Test that LLMAttentionCondenser raises an error if the configured LLM doesn't support response schema.""" """Test that LLMAttentionCondenser raises an error if the configured LLM doesn't support response schema."""
@@ -614,6 +620,7 @@ def test_structured_summary_condenser_from_config():
llm_config=LLMConfig( llm_config=LLMConfig(
model='gpt-4o', model='gpt-4o',
api_key='test_key', api_key='test_key',
caching_prompt=True,
), ),
) )
condenser = Condenser.from_config(config) condenser = Condenser.from_config(config)
@@ -624,6 +631,10 @@ def test_structured_summary_condenser_from_config():
assert condenser.max_size == 50 assert condenser.max_size == 50
assert condenser.keep_first == 10 assert condenser.keep_first == 10
# Since this condenser can't take advantage of caching, we intercept the
# passed config and manually flip the caching prompt to False.
assert not condenser.llm.config.caching_prompt
def test_structured_summary_condenser_invalid_config(): def test_structured_summary_condenser_invalid_config():
"""Test that StructuredSummaryCondenser raises error when keep_first > max_size.""" """Test that StructuredSummaryCondenser raises error when keep_first > max_size."""