feat: introduce ModelClientStreamingChunkEvent for streaming model output and update handling in agents and console (#5208)

Resolves #3983 * introduce `model_client_stream` parameter in `AssistantAgent` to enable token-level streaming output. * introduce `ModelClientStreamingChunkEvent` as a type of `AgentEvent` to pass the streaming chunks to the application via `run_stream` and `on_messages_stream`. Although this will not affect the inner messages list in the final `Response` or `TaskResult`. * handle this new message type in `Console`.
2026-04-20 03:02:16 -04:00 · 2025-01-28 18:49:02 -08:00
parent 8a0daf8285
commit 225eb9d0b2
13 changed files with 330 additions and 32 deletions
--- a/python/packages/autogen-ext/tests/models/test_chat_completion_cache.py
+++ b/python/packages/autogen-ext/tests/models/test_chat_completion_cache.py
@@ -107,14 +107,14 @@ async def test_cache_create_stream() -> None:
    async for completion in cached_client.create_stream(
        [system_prompt, UserMessage(content=prompts[0], source="user")]
    ):
-        original_streamed_results.append(completion)
+        original_streamed_results.append(copy.copy(completion))
    total_usage0 = copy.copy(cached_client.total_usage())

    cached_completion_results: List[Union[str, CreateResult]] = []
    async for completion in cached_client.create_stream(
        [system_prompt, UserMessage(content=prompts[0], source="user")]
    ):
-        cached_completion_results.append(completion)
+        cached_completion_results.append(copy.copy(completion))
    total_usage1 = copy.copy(cached_client.total_usage())

    assert total_usage1.prompt_tokens == total_usage0.prompt_tokens