mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
fix: Disable prompt caching in default condenser (#7781)
Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Calvin Smith <calvin@all-hands.dev> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
This commit is contained in:
@@ -114,8 +114,14 @@ class LLMAttentionCondenser(RollingCondenser):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_config(cls, config: LLMAttentionCondenserConfig) -> LLMAttentionCondenser:
|
def from_config(cls, config: LLMAttentionCondenserConfig) -> LLMAttentionCondenser:
|
||||||
|
# This condenser cannot take advantage of prompt caching. If it happens
|
||||||
|
# to be set, we'll pay for the cache writes but never get a chance to
|
||||||
|
# save on a read.
|
||||||
|
llm_config = config.llm_config.model_copy()
|
||||||
|
llm_config.caching_prompt = False
|
||||||
|
|
||||||
return LLMAttentionCondenser(
|
return LLMAttentionCondenser(
|
||||||
llm=LLM(config=config.llm_config),
|
llm=LLM(config=llm_config),
|
||||||
max_size=config.max_size,
|
max_size=config.max_size,
|
||||||
keep_first=config.keep_first,
|
keep_first=config.keep_first,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -155,8 +155,14 @@ CURRENT_STATE: Last flip: Heads, Haiku count: 15/20"""
|
|||||||
def from_config(
|
def from_config(
|
||||||
cls, config: LLMSummarizingCondenserConfig
|
cls, config: LLMSummarizingCondenserConfig
|
||||||
) -> LLMSummarizingCondenser:
|
) -> LLMSummarizingCondenser:
|
||||||
|
# This condenser cannot take advantage of prompt caching. If it happens
|
||||||
|
# to be set, we'll pay for the cache writes but never get a chance to
|
||||||
|
# save on a read.
|
||||||
|
llm_config = config.llm_config.model_copy()
|
||||||
|
llm_config.caching_prompt = False
|
||||||
|
|
||||||
return LLMSummarizingCondenser(
|
return LLMSummarizingCondenser(
|
||||||
llm=LLM(config=config.llm_config),
|
llm=LLM(config=llm_config),
|
||||||
max_size=config.max_size,
|
max_size=config.max_size,
|
||||||
keep_first=config.keep_first,
|
keep_first=config.keep_first,
|
||||||
max_event_length=config.max_event_length,
|
max_event_length=config.max_event_length,
|
||||||
|
|||||||
@@ -311,8 +311,14 @@ Capture all relevant information, especially:
|
|||||||
def from_config(
|
def from_config(
|
||||||
cls, config: StructuredSummaryCondenserConfig
|
cls, config: StructuredSummaryCondenserConfig
|
||||||
) -> StructuredSummaryCondenser:
|
) -> StructuredSummaryCondenser:
|
||||||
|
# This condenser cannot take advantage of prompt caching. If it happens
|
||||||
|
# to be set, we'll pay for the cache writes but never get a chance to
|
||||||
|
# save on a read.
|
||||||
|
llm_config = config.llm_config.model_copy()
|
||||||
|
llm_config.caching_prompt = False
|
||||||
|
|
||||||
return StructuredSummaryCondenser(
|
return StructuredSummaryCondenser(
|
||||||
llm=LLM(config=config.llm_config),
|
llm=LLM(config=llm_config),
|
||||||
max_size=config.max_size,
|
max_size=config.max_size,
|
||||||
keep_first=config.keep_first,
|
keep_first=config.keep_first,
|
||||||
max_event_length=config.max_event_length,
|
max_event_length=config.max_event_length,
|
||||||
|
|||||||
@@ -331,10 +331,7 @@ def test_llm_summarizing_condenser_from_config():
|
|||||||
config = LLMSummarizingCondenserConfig(
|
config = LLMSummarizingCondenserConfig(
|
||||||
max_size=50,
|
max_size=50,
|
||||||
keep_first=10,
|
keep_first=10,
|
||||||
llm_config=LLMConfig(
|
llm_config=LLMConfig(model='gpt-4o', api_key='test_key', caching_prompt=True),
|
||||||
model='gpt-4o',
|
|
||||||
api_key='test_key',
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
condenser = Condenser.from_config(config)
|
condenser = Condenser.from_config(config)
|
||||||
|
|
||||||
@@ -344,6 +341,10 @@ def test_llm_summarizing_condenser_from_config():
|
|||||||
assert condenser.max_size == 50
|
assert condenser.max_size == 50
|
||||||
assert condenser.keep_first == 10
|
assert condenser.keep_first == 10
|
||||||
|
|
||||||
|
# Since this condenser can't take advantage of caching, we intercept the
|
||||||
|
# passed config and manually flip the caching prompt to False.
|
||||||
|
assert not condenser.llm.config.caching_prompt
|
||||||
|
|
||||||
|
|
||||||
def test_llm_summarizing_condenser_invalid_config():
|
def test_llm_summarizing_condenser_invalid_config():
|
||||||
"""Test that LLMSummarizingCondenser raises error when keep_first > max_size."""
|
"""Test that LLMSummarizingCondenser raises error when keep_first > max_size."""
|
||||||
@@ -474,6 +475,7 @@ def test_llm_attention_condenser_from_config():
|
|||||||
llm_config=LLMConfig(
|
llm_config=LLMConfig(
|
||||||
model='gpt-4o',
|
model='gpt-4o',
|
||||||
api_key='test_key',
|
api_key='test_key',
|
||||||
|
caching_prompt=True,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
condenser = Condenser.from_config(config)
|
condenser = Condenser.from_config(config)
|
||||||
@@ -484,6 +486,10 @@ def test_llm_attention_condenser_from_config():
|
|||||||
assert condenser.max_size == 50
|
assert condenser.max_size == 50
|
||||||
assert condenser.keep_first == 10
|
assert condenser.keep_first == 10
|
||||||
|
|
||||||
|
# Since this condenser can't take advantage of caching, we intercept the
|
||||||
|
# passed config and manually flip the caching prompt to False.
|
||||||
|
assert not condenser.llm.config.caching_prompt
|
||||||
|
|
||||||
|
|
||||||
def test_llm_attention_condenser_invalid_config():
|
def test_llm_attention_condenser_invalid_config():
|
||||||
"""Test that LLMAttentionCondenser raises an error if the configured LLM doesn't support response schema."""
|
"""Test that LLMAttentionCondenser raises an error if the configured LLM doesn't support response schema."""
|
||||||
@@ -614,6 +620,7 @@ def test_structured_summary_condenser_from_config():
|
|||||||
llm_config=LLMConfig(
|
llm_config=LLMConfig(
|
||||||
model='gpt-4o',
|
model='gpt-4o',
|
||||||
api_key='test_key',
|
api_key='test_key',
|
||||||
|
caching_prompt=True,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
condenser = Condenser.from_config(config)
|
condenser = Condenser.from_config(config)
|
||||||
@@ -624,6 +631,10 @@ def test_structured_summary_condenser_from_config():
|
|||||||
assert condenser.max_size == 50
|
assert condenser.max_size == 50
|
||||||
assert condenser.keep_first == 10
|
assert condenser.keep_first == 10
|
||||||
|
|
||||||
|
# Since this condenser can't take advantage of caching, we intercept the
|
||||||
|
# passed config and manually flip the caching prompt to False.
|
||||||
|
assert not condenser.llm.config.caching_prompt
|
||||||
|
|
||||||
|
|
||||||
def test_structured_summary_condenser_invalid_config():
|
def test_structured_summary_condenser_invalid_config():
|
||||||
"""Test that StructuredSummaryCondenser raises error when keep_first > max_size."""
|
"""Test that StructuredSummaryCondenser raises error when keep_first > max_size."""
|
||||||
|
|||||||
Reference in New Issue
Block a user