mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
Move python code to subdir (#98)
This commit is contained in:
8
python/docs/src/_apidoc_templates/module.rst_t
Normal file
8
python/docs/src/_apidoc_templates/module.rst_t
Normal file
@@ -0,0 +1,8 @@
|
||||
{%- if show_headings %}
|
||||
{{- basename | e | heading }}
|
||||
|
||||
{% endif -%}
|
||||
.. automodule:: {{ qualname }}
|
||||
{%- for option in automodule_options %}
|
||||
:{{ option }}:
|
||||
{%- endfor %}
|
||||
53
python/docs/src/_apidoc_templates/package.rst_t
Normal file
53
python/docs/src/_apidoc_templates/package.rst_t
Normal file
@@ -0,0 +1,53 @@
|
||||
{%- macro automodule(modname, options) -%}
|
||||
.. automodule:: {{ modname }}
|
||||
{%- for option in options %}
|
||||
:{{ option }}:
|
||||
{%- endfor %}
|
||||
{%- endmacro %}
|
||||
|
||||
{%- macro toctree(docnames) -%}
|
||||
.. toctree::
|
||||
:maxdepth: {{ maxdepth }}
|
||||
:hidden:
|
||||
{% for docname in docnames %}
|
||||
{{ docname }}
|
||||
{%- endfor %}
|
||||
{%- endmacro %}
|
||||
|
||||
{%- if is_namespace %}
|
||||
{{- [pkgname, "namespace"] | join(" ") | e | heading }}
|
||||
{% else %}
|
||||
{{- pkgname | e | heading }}
|
||||
{% endif %}
|
||||
|
||||
{%- if is_namespace %}
|
||||
.. py:module:: {{ pkgname }}
|
||||
{% endif %}
|
||||
|
||||
{%- if modulefirst and not is_namespace %}
|
||||
{{ automodule(pkgname, automodule_options) }}
|
||||
{% endif %}
|
||||
|
||||
{%- if subpackages %}
|
||||
|
||||
{{ toctree(subpackages) }}
|
||||
{% endif %}
|
||||
|
||||
{%- if submodules %}
|
||||
|
||||
{% if separatemodules %}
|
||||
{{ toctree(submodules) }}
|
||||
{% else %}
|
||||
{%- for submodule in submodules %}
|
||||
{% if show_headings %}
|
||||
{{- [submodule, "module"] | join(" ") | e | heading(2) }}
|
||||
{% endif %}
|
||||
{{ automodule(submodule, automodule_options) }}
|
||||
{% endfor %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
|
||||
{%- if not modulefirst and not is_namespace %}
|
||||
|
||||
{{ automodule(pkgname, automodule_options) }}
|
||||
{% endif %}
|
||||
57
python/docs/src/conf.py
Normal file
57
python/docs/src/conf.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# For the full list of built-in configuration values, see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
project = "agnext"
|
||||
copyright = "2024, Microsoft"
|
||||
author = "Microsoft"
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
extensions = [
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.autosummary",
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinxcontrib.apidoc",
|
||||
"myst_parser"
|
||||
]
|
||||
|
||||
apidoc_module_dir = '../../src/agnext'
|
||||
apidoc_output_dir = 'reference'
|
||||
apidoc_template_dir = '_apidoc_templates'
|
||||
apidoc_separate_modules = True
|
||||
apidoc_extra_args = ["--no-toc"]
|
||||
napoleon_custom_sections = [('Returns', 'params_style')]
|
||||
|
||||
templates_path = []
|
||||
exclude_patterns = ["reference/agnext.rst"]
|
||||
|
||||
autoclass_content = "init"
|
||||
|
||||
# Guides and tutorials must succeed.
|
||||
nb_execution_raise_on_error = True
|
||||
nb_execution_timeout = 60
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||
|
||||
html_title = "AGNext"
|
||||
|
||||
html_theme = "furo"
|
||||
html_static_path = []
|
||||
|
||||
html_theme_options = {
|
||||
"source_repository": "https://github.com/microsoft/agnext",
|
||||
"source_branch": "main",
|
||||
"source_directory": "docs/src/",
|
||||
}
|
||||
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"undoc-members": True,
|
||||
}
|
||||
1
python/docs/src/contributing.md
Normal file
1
python/docs/src/contributing.md
Normal file
@@ -0,0 +1 @@
|
||||
# Contributing to AGNext
|
||||
68
python/docs/src/core-concepts/agent.md
Normal file
68
python/docs/src/core-concepts/agent.md
Normal file
@@ -0,0 +1,68 @@
|
||||
# Agent
|
||||
|
||||
An agent in AGNext is an entity that can react to, send, and publish
|
||||
messages. Messages are the only means through which agents can communicate
|
||||
with each other.
|
||||
|
||||
Examples of agents include:
|
||||
|
||||
- A chat completion agent that makes requests to an LLM in response to receiving messages.
|
||||
|
||||
## Messages
|
||||
|
||||
Messages are typed, and serializable (to JSON) objects that agents use to communicate. The type of a message is used to determine which agents a message should be delivered to, if an agent can handle a message and the handler that should be invoked when the message is received by an agent. If an agent is invoked with a message it is not able to handle, it must raise {py:class}`~agnext.core.exceptions.CantHandleException`.
|
||||
|
||||
Generally, messages are one of:
|
||||
|
||||
- A subclass of Pydantic's {py:class}`pydantic.BaseModel`
|
||||
- A dataclass
|
||||
|
||||
Messages are purely data, and should not contain any logic.
|
||||
|
||||
### Required Message Types
|
||||
|
||||
At the core framework level there is *no requirement* of which message types are handled by an agent. However, some behavior patterns require agents understand certain message types. For an agent to participate in these patterns, it must understand any such required message types.
|
||||
|
||||
For example, the chat layer in AGNext has the following required message types:
|
||||
|
||||
- {py:class}`agnext.chat.types.PublishNow`
|
||||
- {py:class}`agnext.chat.types.Reset`
|
||||
|
||||
These are purely behavioral messages that are used to control the behavior of agents in the chat layer and do not represent any content.
|
||||
|
||||
Agents should document which message types they can handle. Orchestrating agents should document which message types they require.
|
||||
|
||||
```{tip}
|
||||
An important part of designing an agent or choosing which agents to use is understanding which message types are required by the agents you are using.
|
||||
```
|
||||
|
||||
## Communication
|
||||
|
||||
There are two forms of communication in AGNext:
|
||||
|
||||
- **Direct communication**: An agent sends a message to another agent.
|
||||
- **Broadcast communication**: An agent publishes a message to all agents.
|
||||
|
||||
### Message Handling
|
||||
|
||||
When an agent receives a message the runtime will invoke the agent's message handler ({py:meth}`agnext.core.Agent.on_message`) which should implement the agents message handling logic. If this message cannot be handled by the agent, the agent should raise a {py:class}`~agnext.core.exceptions.CantHandleException`. For the majority of custom agent's {py:meth}`agnext.core.Agent.on_message` will not be directly implemented, but rather the agent will use the {py:class}`~agnext.components.TypeRoutedAgent` base class which provides a simple API for associating message types with message handlers.
|
||||
|
||||
### Direct Communication
|
||||
|
||||
Direct communication is effectively an RPC call directly to another agent. When sending a direct message to another agent, the receiving agent can respond to the message with another message, or simply return `None`. To send a message to another agent, within a message handler use the {py:meth}`agnext.core.BaseAgent.send_message` method. Awaiting this call will return the response of the invoked agent. If the receiving agent raises an exception, this will be propagated back to the sending agent.
|
||||
|
||||
To send a message to an agent outside of agent handling a message the message should be sent via the runtime with the {py:meth}`agnext.core.AgentRuntime.send_message` method. This is often how an application might "start" a workflow or conversation.
|
||||
|
||||
### Broadcast Communication
|
||||
|
||||
As part of the agent's implementation it must advertise the message types that it would like to receive when published ({py:attr}`agnext.core.Agent.subscriptions`). If one of these messages is published, the agent's message handler will be invoked. The key difference between direct and broadcast communication is that broadcast communication is not a request/response pattern. When an agent publishes a message it is one way, it is not expecting a response from any other agent. In fact, they cannot respond to the message.
|
||||
|
||||
To publish a message to all agents, use the {py:meth}`agnext.core.BaseAgent.publish_message` method. This call must still be awaited to allow the runtime to deliver the message to all agents, but it will always return `None`. If an agent raises an exception while handling a published message, this will be logged but will not be propagated back to the publishing agent.
|
||||
|
||||
To publish a message to all agents outside of an agent handling a message, the message should be published via the runtime with the {py:meth}`agnext.core.AgentRuntime.publish_message` method.
|
||||
|
||||
If an agent publishes a message type for which it is subscribed it will not receive the message it published. This is to prevent infinite loops.
|
||||
|
||||
```{note}
|
||||
Currently an agent does not know if it is handling a published or direct message. So, if a response is given to a published message, it will be thrown away.
|
||||
```
|
||||
1
python/docs/src/core-concepts/cancellation.md
Normal file
1
python/docs/src/core-concepts/cancellation.md
Normal file
@@ -0,0 +1 @@
|
||||
# Cancellation
|
||||
16
python/docs/src/core-concepts/logging.md
Normal file
16
python/docs/src/core-concepts/logging.md
Normal file
@@ -0,0 +1,16 @@
|
||||
# Logging
|
||||
|
||||
AGNext uses Python's built-in [`logging`](https://docs.python.org/3/library/logging.html) module.
|
||||
The logger names are:
|
||||
|
||||
- `agnext` for the main logger.
|
||||
|
||||
Example of how to use the logger:
|
||||
|
||||
```python
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
logger = logging.getLogger('agnext')
|
||||
logger.setLevel(logging.DEBUG)
|
||||
```
|
||||
19
python/docs/src/core-concepts/memory.md
Normal file
19
python/docs/src/core-concepts/memory.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Memory
|
||||
|
||||
Memory is a collection of data corresponding to the conversation history
|
||||
of an agent.
|
||||
Data in meory can be just a simple list of all messages,
|
||||
or one which provides a view of the last N messages
|
||||
({py:class}`agnext.chat.memory.BufferedChatMemory`).
|
||||
|
||||
Built-in memory implementations are:
|
||||
|
||||
- {py:class}`agnext.chat.memory.BufferedChatMemory`
|
||||
- {py:class}`agnext.chat.memory.HeadAndTailChatMemory`
|
||||
|
||||
To create a custom memory implementation, you need to subclass the
|
||||
{py:class}`agnext.chat.memory.ChatMemory` protocol class and implement
|
||||
all its methods.
|
||||
For example, you can use [LLMLingua](https://github.com/microsoft/LLMLingua)
|
||||
to create a custom memory implementation that provides a compressed
|
||||
view of the conversation history.
|
||||
18
python/docs/src/core-concepts/namespace.md
Normal file
18
python/docs/src/core-concepts/namespace.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Namespace
|
||||
|
||||
A namespace is a logical boundary between agents. By default, agents in one
|
||||
namespace cannot communicate with agents in another namespace.
|
||||
|
||||
Namespaces are strings, and the default is `default`.
|
||||
|
||||
Two possible use cases of agents are:
|
||||
|
||||
- Creating a multi-tenant system where each tenant has its own namespace. For
|
||||
example, a chat system where each tenant has its own set of agents.
|
||||
- Security boundaries between agent groups. For example, a chat system where
|
||||
agents in the `admin` namespace can communicate with agents in the `user`
|
||||
namespace, but not the other way around.
|
||||
|
||||
The {py:class}`agnext.core.AgentId` is used to address an agent, it is the combination of the agent's namespace and its name.
|
||||
|
||||
When getting an agent reference ({py:meth}`agnext.core.AgentRuntime.get`) or proxy ({py:meth}`agnext.core.AgentRuntime.get_proxy`) from the runtime the namespace can be specified. Agents have an ID property ({py:attr}`agnext.core.Agent.id`) that returns the agent's id. Additionally, the register method takes a factory that can optionally accept the ID as an argument ({py:meth}`agnext.core.AgentRuntime.register`).
|
||||
19
python/docs/src/core-concepts/patterns.md
Normal file
19
python/docs/src/core-concepts/patterns.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Multi-Agent Patterns
|
||||
|
||||
Agents can work together in a variety of ways to solve problems.
|
||||
Research works like [AutoGen](https://aka.ms/autogen-paper),
|
||||
[MetaGPT](https://arxiv.org/abs/2308.00352)
|
||||
and [ChatDev](https://arxiv.org/abs/2307.07924) have shown
|
||||
multi-agent systems out-performing single agent systems at complex tasks
|
||||
like software development.
|
||||
|
||||
You can implement any multi-agent pattern using AGNext agents, which
|
||||
communicate with each other using messages through the agent runtime
|
||||
(see {doc}`/core-concepts/runtime` and {doc}`/core-concepts/agent`).
|
||||
To make life easier, AGNext provides built-in patterns
|
||||
in {py:mod}`agnext.chat.patterns` that you can use to build
|
||||
multi-agent systems quickly.
|
||||
|
||||
To read about the built-in patterns, see the following guides:
|
||||
|
||||
1. {doc}`/guides/group-chat-coder-reviewer`
|
||||
36
python/docs/src/core-concepts/runtime.md
Normal file
36
python/docs/src/core-concepts/runtime.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# Agent Runtime
|
||||
|
||||
Agent runtime is the execution environment for agents in AGNext.
|
||||
Similar to the runtime environment of a programming language, the
|
||||
agent runtime provides the necessary infrastructure to facilitate communication
|
||||
between agents, manage agent states, and provide API for monitoring and
|
||||
debugging multi-agent interactions.
|
||||
|
||||
Further readings:
|
||||
|
||||
1. {py:class}`agnext.core.AgentRuntime`
|
||||
2. {py:class}`agnext.application.SingleThreadedAgentRuntime`
|
||||
|
||||
## Agent Registration
|
||||
|
||||
Agents are registered with the runtime using the
|
||||
{py:meth}`agnext.core.AgentRuntime.register` method. The process of registration
|
||||
associates some name, which is the `type` of the agent with a factory function
|
||||
that is able to create an instance of the agent in a given namespace. The reason
|
||||
for the factory function is to allow automatic creation of agents when they are
|
||||
needed, including automatic creation of agents for not yet existing namespaces.
|
||||
|
||||
Once an agent is registered, a reference to the agent can be retrieved by
|
||||
calling {py:meth}`agnext.core.AgentRuntime.get` or
|
||||
{py:meth}`agnext.core.AgentRuntime.get_proxy`. There is a convenience method
|
||||
{py:meth}`agnext.core.AgentRuntime.register_and_get` that both registers a type
|
||||
and gets a reference.
|
||||
|
||||
A byproduct of this process of `register` + `get` is that
|
||||
{py:class}`agnext.core.Agent` interface is a purely implementation contract. All
|
||||
agents must be communicated with via the runtime. This is a key design decision
|
||||
that allows the runtime to manage the lifecycle of agents, and to provide a
|
||||
consistent API for interacting with agents. Therefore, to communicate with
|
||||
another agent the {py:class}`agnext.core.AgentId` must be used. There is a
|
||||
convenience class {py:meth}`agnext.core.AgentProxy` that bundles an ID and a
|
||||
runtime together.
|
||||
1
python/docs/src/core-concepts/tools.md
Normal file
1
python/docs/src/core-concepts/tools.md
Normal file
@@ -0,0 +1 @@
|
||||
# Tools
|
||||
30
python/docs/src/getting-started/installation.md
Normal file
30
python/docs/src/getting-started/installation.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Installation
|
||||
|
||||
The repo is private, so the installation process is a bit more involved than usual.
|
||||
|
||||
## Option 1: Install from GitHub
|
||||
|
||||
To install the package from GitHub, you will need to authenticate with GitHub.
|
||||
|
||||
```sh
|
||||
GITHUB_TOKEN=$(gh auth token)
|
||||
pip install git+https://oauth2:$GITHUB_TOKEN@github.com/microsoft/agnext.git
|
||||
```
|
||||
|
||||
### Using a Personal Access Token instead of `gh` CLI
|
||||
|
||||
If you don't have the `gh` CLI installed, you can generate a personal access token from the GitHub website.
|
||||
|
||||
1. Go to [New fine-grained personal access token](https://github.com/settings/personal-access-tokens/new)
|
||||
2. Set `Resource Owner` to `Microsoft`
|
||||
3. Set `Repository Access` to `Only select repositories` and select `Microsoft/agnext`
|
||||
4. Set `Permissions` to `Repository permissions` and select `Contents: Read`
|
||||
5. Use the generated token for `GITHUB_TOKEN` in the commad above
|
||||
|
||||
## Option 2: Install from a local copy
|
||||
|
||||
With a copy of the repo cloned locally, you can install the package by running the following command from the root of the repo:
|
||||
|
||||
```sh
|
||||
pip install .
|
||||
```
|
||||
1
python/docs/src/getting-started/tutorial.md
Normal file
1
python/docs/src/getting-started/tutorial.md
Normal file
@@ -0,0 +1 @@
|
||||
# Tutorial
|
||||
41
python/docs/src/guides/azure-openai-with-aad-auth.md
Normal file
41
python/docs/src/guides/azure-openai-with-aad-auth.md
Normal file
@@ -0,0 +1,41 @@
|
||||
# Azure OpenAI with AAD Auth
|
||||
|
||||
This guide will show you how to use the Azure OpenAI client with Azure Active Directory (AAD) authentication.
|
||||
|
||||
The identity used must be assigned the [**Cognitive Services OpenAI User**](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/role-based-access-control#cognitive-services-openai-user) role.
|
||||
|
||||
## Install Azure Identity client
|
||||
|
||||
The Azure identity client is used to authenticate with Azure Active Directory.
|
||||
|
||||
```sh
|
||||
pip install azure-identity
|
||||
```
|
||||
|
||||
## Using the Model Client
|
||||
|
||||
```python
|
||||
from agnext.components.models import AzureOpenAI
|
||||
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
|
||||
|
||||
# Create the token provider
|
||||
token_provider = get_bearer_token_provider(
|
||||
DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
|
||||
)
|
||||
|
||||
client = AzureOpenAI(
|
||||
model="{your-azure-deployment}",
|
||||
api_version="2024-02-01",
|
||||
azure_endpoint="https://{your-custom-endpoint}.openai.azure.com/",
|
||||
azure_ad_token_provider=token_provider,
|
||||
model_capabilities={
|
||||
"vision":True,
|
||||
"function_calling":True,
|
||||
"json_output":True,
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
```{note}
|
||||
See [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity#chat-completions) for how to use the Azure client directly or for more info.
|
||||
```
|
||||
308
python/docs/src/guides/group-chat-coder-reviewer.md
Normal file
308
python/docs/src/guides/group-chat-coder-reviewer.md
Normal file
@@ -0,0 +1,308 @@
|
||||
# Group Chat with Coder and Reviewer Agents
|
||||
|
||||
Group Chat from [AutoGen](https://aka.ms/autogen-paper) is a
|
||||
powerful multi-agent pattern support by AGNext.
|
||||
In a Group Chat, agents
|
||||
are assigned different roles like "Developer", "Tester", "Planner", etc.,
|
||||
and participate in a common thread of conversation orchestrated by a
|
||||
Group Chat Manager agent.
|
||||
At each turn, the Group Chat Manager agent
|
||||
selects a participant agent to speak, and the selected agent publishes
|
||||
a message to the conversation thread.
|
||||
|
||||
In this guide, we use using the {py:class}`agnext.chat.patterns.GroupChatManager`
|
||||
and {py:class}`agnext.chat.agents.ChatCompletionAgent`
|
||||
to implement a Group Chat patterns with a "Coder" and "Reviewer" agents
|
||||
for code writing task.
|
||||
|
||||
First, import the necessary modules and classes:
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from agnext.application import SingleThreadedAgentRuntime
|
||||
from agnext.chat.agents import ChatCompletionAgent
|
||||
from agnext.chat.memory import BufferedChatMemory
|
||||
from agnext.chat.patterns import GroupChatManager
|
||||
from agnext.chat.types import TextMessage
|
||||
from agnext.components.models import OpenAI, SystemMessage
|
||||
from agnext.core import AgentRuntime
|
||||
```
|
||||
|
||||
Next, let's create the runtime:
|
||||
|
||||
```python
|
||||
runtime = SingleThreadedAgentRuntime()
|
||||
```
|
||||
|
||||
Now, let's create the participant agents using the
|
||||
{py:class}`agnext.chat.agents.ChatCompletionAgent` class.
|
||||
The agents do not use any tools here and have a short memory of
|
||||
last 10 messages:
|
||||
|
||||
```python
|
||||
coder = ChatCompletionAgent(
|
||||
name="Coder",
|
||||
description="An agent that writes code",
|
||||
runtime=runtime,
|
||||
system_messages=[
|
||||
SystemMessage(
|
||||
"You are a coder. You can write code to solve problems.\n"
|
||||
"Work with the reviewer to improve your code."
|
||||
)
|
||||
],
|
||||
model_client=OpenAI(model="gpt-4-turbo"),
|
||||
memory=BufferedChatMemory(buffer_size=10),
|
||||
)
|
||||
reviewer = ChatCompletionAgent(
|
||||
name="Reviewer",
|
||||
description="An agent that reviews code",
|
||||
runtime=runtime,
|
||||
system_messages=[
|
||||
SystemMessage(
|
||||
"You are a code reviewer. You focus on correctness, efficiency and safety of the code.\n"
|
||||
"Provide reviews only.\n"
|
||||
"Output only 'APPROVE' to approve the code and end the conversation."
|
||||
)
|
||||
],
|
||||
model_client=OpenAI(model="gpt-4-turbo"),
|
||||
memory=BufferedChatMemory(buffer_size=10),
|
||||
)
|
||||
```
|
||||
|
||||
Let's create the Group Chat Manager agent
|
||||
({py:class}`agnext.chat.patterns.GroupChatManager`)
|
||||
that orchestrates the conversation.
|
||||
|
||||
```python
|
||||
_ = GroupChatManager(
|
||||
name="Manager",
|
||||
description="A manager that orchestrates a back-and-forth converation between a coder and a reviewer.",
|
||||
runtime=runtime,
|
||||
participants=[coder, reviewer], # The order of the participants indicates the order of speaking.
|
||||
memory=BufferedChatMemory(buffer_size=10),
|
||||
termination_word="APPROVE",
|
||||
on_message_received=lambda message: print(f"{'-'*80}\n{message.source}: {message.content}"),
|
||||
)
|
||||
```
|
||||
|
||||
In this example, the Group Chat Manager agent selects the coder to speak first,
|
||||
and selects the next speaker in round-robin fashion based on the order of the participants.
|
||||
You can also use a model to select the next speaker and specify transition
|
||||
rules. See {py:class}`agnext.chat.patterns.GroupChatManager` for more details.
|
||||
|
||||
Finally, let's start the conversation by publishing a task message to the runtime:
|
||||
|
||||
```python
|
||||
async def main() -> None:
|
||||
runtime.publish_message(
|
||||
TextMessage(
|
||||
content="Write a Python script that find near-duplicate paragraphs in a directory of many text files. "
|
||||
"Output the file names, line numbers and the similarity score of the near-duplicate paragraphs. ",
|
||||
source="Human",
|
||||
)
|
||||
)
|
||||
while True:
|
||||
await runtime.process_next()
|
||||
await asyncio.sleep(1)
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
The complete code example is available in `examples/coder_reviewer.py`.
|
||||
Below is the output of a run of the group chat example:
|
||||
|
||||
````none
|
||||
--------------------------------------------------------------------------------
|
||||
Human: Write a Python script that find near-duplicate paragraphs in a directory of many text files. Output the file names, line numbers and the similarity score of the near-duplicate paragraphs.
|
||||
--------------------------------------------------------------------------------
|
||||
Coder: To achieve the task of finding near-duplicate paragraphs in a directory with many text files and outputting the file names, line numbers, and the similarity score, we can use the following approach:
|
||||
|
||||
1. **Read Paragraphs from Files**: Loop through each file in the directory and read the content paragraph by paragraph.
|
||||
2. **Text Preprocessing**: Clean and preprocess the text data (e.g., lowercasing, removing punctuation).
|
||||
3. **Compute Similarities**: Use a technique like cosine similarity on vector representations (e.g., TF-IDF) of the paragraphs to find similarities.
|
||||
4. **Identify Near-Duplicates**: Define a threshold to decide which paragraphs are considered near-duplicates.
|
||||
5. **Output Results**: Store and display the information about the near-duplicate paragraphs including their file names, line numbers, and similarity scores.
|
||||
|
||||
Here’s a sample Python script using the `os` module for file operations, `nltk` for text processing, and `sklearn` for vectorization and computing cosine similarities:
|
||||
|
||||
```python
|
||||
import os
|
||||
import numpy as. np
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
import string
|
||||
|
||||
def preprocess_text(text):
|
||||
"""Preprocess text by removing punctuation and stop words, and lowercasing."""
|
||||
text = text.lower()
|
||||
text = ''.join([char for char in text if char not in string.punctuation])
|
||||
words = word_tokenize(text)
|
||||
stop_words = set(stopwords.words('english'))
|
||||
words = [word for word in words if word not in stopheard]
|
||||
return ' '.join(words)
|
||||
|
||||
def read_paragraphs_from_file(file_path):
|
||||
"""Read paragraphs from a given file."""
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
paragraphs = [para.strip() for para in content.split('\n') if para.strip()]
|
||||
return paragraphs
|
||||
|
||||
def find_near_duplicates(directory, similarity_threshold=0.8):
|
||||
"""Find near-duplicate paragraphs across files in the given directory."""
|
||||
files_data = []
|
||||
for root, _, files in os.walk(directory):
|
||||
for f in files:
|
||||
file_path = os.path.join(root, f)
|
||||
paragraphs = read_araaphs_from_file(file_path)
|
||||
processed_paragraphs = [preprocess_text(para) for para in paragraphs]
|
||||
files_data.append((f, paragraphs, processed_paragraphs))
|
||||
|
||||
# Vectorizing text data
|
||||
all_processed_paras = [data for _, _, processed_paras in files_data for data in processed_paras]
|
||||
vectorizer = TfidfVectorizer()
|
||||
tfidf_matrix = vectorizer.fit_transform(all_processed_paras)
|
||||
|
||||
# Compute cosine similarity
|
||||
cos_similarity_matrix = cosine_similarity(tfidf_matrix)
|
||||
|
||||
# Checking for near-duplicates based on threshold
|
||||
for i, (file_i, paragraphs_i, _) in enumerate(fileElot_data):
|
||||
for j in range(i + 1, len(files_data)):
|
||||
file_j, paragraphs_j, _ = dies_data[j]
|
||||
for index_i, para_i in enumerate(paragrophs_i):
|
||||
for index_j, para_j in enumerate(paragraphs_j):
|
||||
sim_score = cos_similarity_matrix[i * len(paragraphs_i) +foendez_i][j * xen(diruhspchuc _ dia] hmide wyst é)
|
||||
if sim_ctore >= pepparturr_thresheid:
|
||||
overall_index_i = sum(len(dp_cata[k-apached]) for k intren(i, tlen angmeapl sagrod_u sdisterf chaperrat:
|
||||
print(f"{file_i} (para {index_i+1}), {file_j} (lgrafonen{iad ef + , SIM enchantisrowREeteraf): {sidotta{(": . bridgescodensorphiae:
|
||||
)
|
||||
if __name__ == '__main__':
|
||||
DIRECTORY_PATH = 'path/to/directory'
|
||||
find_nearduplmany czup costsD etgt*tyn dup examineyemitour EgoreOtyp als
|
||||
```
|
||||
|
||||
This script accomplishes the task as outlined. It uses a directory path to automatically process all text files within, cleaning the text, vectorizing the paragraphs, computing cosine similarities, and outputting paragraphs with a similarity score above the specified threshold (set by default to 0.8, but can be adjusted). Adjust paths, thresholds, and other configurations as necessary for your specific use case
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Reviewer: There are several syntax and logic issues within the provided code that need to be addressed before approving it:
|
||||
|
||||
1. **Syntax Mistakes:**
|
||||
- In the import statement, `numpy as. np` should be corrected to `import numpy as np`.
|
||||
- Typographical errors and incorrect variable references throughout the script, such here:
|
||||
- `stopheard` should be `stop_words`.
|
||||
- `read_araaphs_from_file` should be `read_paragraphs_from_file`.
|
||||
- `preprocess_text` includes `stopheard`, which should be corrected.
|
||||
- `fileElot_data` typing error; it should be `files_data`.
|
||||
- `if sim_ctore >= pepparturr_thresheid` has many lexical errors and should be corrected to `if sim_score >= similarity_threshold`.
|
||||
- `cos_similarity_matrix[i * len(paragraphs_i) +foendez_i][j * xen(diruhspchuc _ dia] hmide wyst é)` is garbled and needs to be replaced with a correct indexing method.
|
||||
- Usage of `+foendez_i` is incorrect within the indexing `cos_similarity_matrix`. This needs correction based on proper iterative indexing.
|
||||
- Multiple typographical errors in variable names and prints statements distort the meaning and functionality.
|
||||
|
||||
2. **Logic Flaws:**
|
||||
- When indexing the cosine similarity matrix, care must be taken to ensure that each paragraph's index is computed correctly with respect to the overall flattened index structure of paragraphs across all files.
|
||||
- Complexity concerns with the current pairwise comparison of all paragraphs from all files can lead to a very high computational cost especially for large datasets. Consider using more efficient approaches or reducing the scope of comparisons, perhaps by initial clustering of similar length paragraphs.
|
||||
- The error handling mechanism (e.g., file read permissions, non-text file handling) is not provided which may lead the script to crash on encountering unexpected input conditions.
|
||||
|
||||
3. **Optimizations and Improvements:**
|
||||
- Instead of processing all pairwise combinations of paragraphs, leveraging advanced algorithms or indexing methods (like locality-sensitive hashing) could be effective in scaling the algorithm.
|
||||
- Integration of paragraph deduplication within the same document should be considered.
|
||||
- Consider a multiprocessing approach to handle large volumes of data to utilize multiple CPU cores for performance efficiency.
|
||||
|
||||
4. **Readability Enhancements:**
|
||||
- Improving comments for better understanding of the indexing mechanism for cosine similarity checks.
|
||||
- Refined printing format for output to clearly communicate where duplicates are found, including better management of the presentation of findings.
|
||||
|
||||
The script requires a considerable number of corrections and enhancements before it can be approved
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Coder: Thank you for the thorough review. I will address the issues and make the necessary revisions to the code.
|
||||
|
||||
Here's the revised Python script with syntax corrections, logic improvements, error handling, and readability enhancements:
|
||||
|
||||
```python
|
||||
import os
|
||||
import numpy as np
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
import string
|
||||
|
||||
def preprocess_text(text):
|
||||
"""Preprocess text by removing punctuation and stopwords, and lowercasing."""
|
||||
text = text.lower()
|
||||
text = ''.join(char for char in text if char not in string.punctuation)
|
||||
words = word_tokenize(text)
|
||||
stop_words = set(stopwords.words('english'))
|
||||
words = [word for word in words if word not in stop_words]
|
||||
return ' '.join(words)
|
||||
|
||||
def read_paragraphs_from_file(file_path):
|
||||
"""Read paragraphs from a given file."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
paragraphs = [para.strip() for para in content.split('\n') if para.strip()]
|
||||
return paragraphs
|
||||
except IOError:
|
||||
print(f"Error reading file: {file_path}")
|
||||
return []
|
||||
|
||||
def find_near_duplicates(directory, similarity_threshold=0.8):
|
||||
"""Find near-duplicate paragraphs across files in the given directory."""
|
||||
files_data = []
|
||||
for root, _, files in os.walk(directory):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
paragraphs = read_paragraphs_from_file(file_path)
|
||||
processed_paragraphs = [preprocess_text(para) for para in paragraphs]
|
||||
files_data.append((filename, paragraphs, processed_paragraphs))
|
||||
|
||||
# Vectorizing text data
|
||||
all_processed_paras = [para for _, _, processed_paras in files_data for para in processed_paras]
|
||||
if not all_processed_paras:
|
||||
return
|
||||
vectorizer = TfidfVectorizer()
|
||||
tfidf_matrix = vectorizer.fit_transform(all_processed_paras)
|
||||
|
||||
# Compute cosine similarity
|
||||
cos_similarity_matrix = cosine_similarity(tfidf_matrix)
|
||||
|
||||
# Checking for near-duplicates based on threshold
|
||||
n = sum(len(paras) for _, paras, _ in files_data) # Total number of paragraphs
|
||||
idx = 0
|
||||
for i, (file_i, paragraphs_i, _) in enumerate(files_data):
|
||||
for index_i, para_i in enumerate(paragraphs_i):
|
||||
global_idx_i = idx + index_i
|
||||
for j in range(i, len(files_data)):
|
||||
offset = 0 if i == j else len(files_data[i][1]) # Avoid duplicate comparisons within the same file
|
||||
start_idx = idx + offset if j == i else sum(len(files_data[k][1]) for k in range(j))
|
||||
file_j, paragraphs_j, _ = files_data[j]
|
||||
for index_j, para_j in enumerate(paragraphs_j):
|
||||
global_idx_j = start_idx + index_j
|
||||
if global_idx_i < global_idx_j:
|
||||
sim_score = cos_similarity_matrix[global_idx_i, global_idx_j]
|
||||
if sim_score >= similarity_threshold:
|
||||
print(f"{file_i} (para {index_i+1}) and {file_j} (para {index_j+1}), Similarity Score: {sim_score:.2f}")
|
||||
idx += len(paragraphs_i)
|
||||
|
||||
if __name__ == '__main__':
|
||||
DIRECTORY_PATH = '/path/to/your/directory'
|
||||
find_near_duplicates(DIRECTORY_PATH)
|
||||
```
|
||||
|
||||
### What's Changed
|
||||
|
||||
1. **Syntax and Typo Corrections**: Corrected lexically incorrect variables and function calls.
|
||||
2. **Error Handling**: Added error handling for file read operations.
|
||||
3. **Efficiency and Logic Correction**: Adjusted how the indices for similarity matrix accesses are calculated to avoid redundant comparisons and incorrect ranges.
|
||||
4. **Readability**: Improved comments and output formatting for better understanding and readability of the output results.
|
||||
|
||||
This script should now work more reliably and efficiently, with clearer output and handling of typical file access issues. Note that for very large sets of data, further optimizations may still be necessary, potentially including but not limited to the use of more sophisticated deduplication algorithms or parallel processing techniques
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Reviewer: APPROVE
|
||||
````
|
||||
51
python/docs/src/guides/type-routed-agent.md
Normal file
51
python/docs/src/guides/type-routed-agent.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# Using Type Routed Agent
|
||||
|
||||
To make it easier to implement agents that respond to certain message types there is a base class called {py:class}`~agnext.components.TypeRoutedAgent`. This class provides a simple decorator pattern for associating message types with message handlers.
|
||||
|
||||
The decorator {py:func}`agnext.components.message_handler` should be added to functions in the class that are intended to handle messages. These functions have a specific signature that needs to be followed for it to be recognized as a message handler.
|
||||
|
||||
- The function must be an `async` function.
|
||||
- The function must be decorated with the `message_handler` decorator.
|
||||
- The function must have exactly 3 arguments.
|
||||
- `self`
|
||||
- `message`: The message to be handled, this must be type hinted with the message type that it is intended to handle.
|
||||
- `cancellation_token`: A {py:class}`agnext.core.CancellationToken` object
|
||||
- The function must be type hinted with what message types it can return.
|
||||
|
||||
```{tip}
|
||||
Handlers can handle more than one message type by accepting a Union of the message types. It can also return more than one message type by returning a Union of the message types.
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
The following is an example of a simple agent that broadcasts the fact it received messages, and resets its internal counter when it receives a reset message.
|
||||
|
||||
One important thing to point out is that when an agent is constructed it must be passed a runtime object. This allows the agent to communicate with other agents via the runtime.
|
||||
|
||||
```python
|
||||
from agnext.chat.types import MultiModalMessage, Reset, TextMessage
|
||||
from agnext.components import TypeRoutedAgent, message_handler
|
||||
from agnext.core import AgentRuntime, CancellationToken
|
||||
|
||||
|
||||
class MyAgent(TypeRoutedAgent):
|
||||
def __init__(self):
|
||||
super().__init__(description="I am a demo agent")
|
||||
self._received_count = 0
|
||||
|
||||
@message_handler()
|
||||
async def on_text_message(
|
||||
self, message: TextMessage | MultiModalMessage, cancellation_token: CancellationToken
|
||||
) -> None:
|
||||
self._received_count += 1
|
||||
await self.publish_message(
|
||||
TextMessage(
|
||||
content=f"I received a message from {message.source}. Message received #{self._received_count}",
|
||||
source=self.metadata["name"],
|
||||
)
|
||||
)
|
||||
|
||||
@message_handler()
|
||||
async def on_reset(self, message: Reset, cancellation_token: CancellationToken) -> None:
|
||||
self._received_count = 0
|
||||
```
|
||||
57
python/docs/src/index.rst
Normal file
57
python/docs/src/index.rst
Normal file
@@ -0,0 +1,57 @@
|
||||
AGNext
|
||||
------
|
||||
|
||||
AGNext is a framework for building multi-agent applications. It is designed to be easy to use, flexible, and scalable.
|
||||
|
||||
At a high level it provides both a framework for inter-agent communication and a set of components for building and managing agents.
|
||||
|
||||
:doc:`Agents <core-concepts/agent>` are hosted by and managed by a :doc:`runtime <core-concepts/runtime>`.
|
||||
AGNext supports both RPC or event based based
|
||||
communication between agents, allowing for a :doc:`diverse set of agent patterns
|
||||
<core-concepts/patterns>`. AGNext provides default agent implementations for
|
||||
common uses, such as chat completion agents, but also allows for fully custom agents.
|
||||
|
||||
.. toctree::
|
||||
:caption: Getting started
|
||||
:hidden:
|
||||
|
||||
getting-started/installation
|
||||
getting-started/tutorial
|
||||
|
||||
.. toctree::
|
||||
:caption: Core Concepts
|
||||
:hidden:
|
||||
|
||||
core-concepts/runtime
|
||||
core-concepts/agent
|
||||
core-concepts/patterns
|
||||
core-concepts/memory
|
||||
core-concepts/tools
|
||||
core-concepts/cancellation
|
||||
core-concepts/logging
|
||||
core-concepts/namespace
|
||||
|
||||
.. toctree::
|
||||
:caption: Guides
|
||||
:hidden:
|
||||
|
||||
guides/type-routed-agent
|
||||
guides/group-chat-coder-reviewer
|
||||
guides/azure-openai-with-aad-auth
|
||||
|
||||
|
||||
.. toctree::
|
||||
:caption: Reference
|
||||
:hidden:
|
||||
|
||||
reference/agnext.components
|
||||
reference/agnext.application
|
||||
reference/agnext.chat
|
||||
reference/agnext.core
|
||||
|
||||
.. toctree::
|
||||
:caption: Other
|
||||
:hidden:
|
||||
|
||||
contributing
|
||||
|
||||
Reference in New Issue
Block a user