feat: introduce ModelClientStreamingChunkEvent for streaming model output and update handling in agents and console (#5208)

Resolves #3983

* introduce `model_client_stream` parameter in `AssistantAgent` to
enable token-level streaming output.
* introduce `ModelClientStreamingChunkEvent` as a type of `AgentEvent`
to pass the streaming chunks to the application via `run_stream` and
`on_messages_stream`. Although this will not affect the inner messages
list in the final `Response` or `TaskResult`.
* handle this new message type in `Console`.
This commit is contained in:
Eric Zhu
2025-01-28 18:49:02 -08:00
committed by GitHub
parent 8a0daf8285
commit 225eb9d0b2
13 changed files with 330 additions and 32 deletions

View File

@@ -107,14 +107,14 @@ async def test_cache_create_stream() -> None:
async for completion in cached_client.create_stream(
[system_prompt, UserMessage(content=prompts[0], source="user")]
):
original_streamed_results.append(completion)
original_streamed_results.append(copy.copy(completion))
total_usage0 = copy.copy(cached_client.total_usage())
cached_completion_results: List[Union[str, CreateResult]] = []
async for completion in cached_client.create_stream(
[system_prompt, UserMessage(content=prompts[0], source="user")]
):
cached_completion_results.append(completion)
cached_completion_results.append(copy.copy(completion))
total_usage1 = copy.copy(cached_client.total_usage())
assert total_usage1.prompt_tokens == total_usage0.prompt_tokens