Fix docstring of get_or_create (#583)

* Fix docstring of get_or_create * Improve docstring
2026-04-20 03:02:16 -04:00 · 2023-11-12 11:10:58 +08:00
parent 805d8452c4
commit ff414892c4
3 changed files with 8 additions and 10 deletions
--- a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
+++ b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
@@ -122,8 +122,8 @@ class RetrieveUserProxyAgent(UserProxyAgent):
                - customized_answer_prefix (Optional, str): the customized answer prefix for the retrieve chat. Default is "".
                    If not "" and the customized_answer_prefix is not in the answer, `Update Context` will be triggered.
                - update_context (Optional, bool): if False, will not apply `Update Context` for interactive retrieval. Default is True.
-                - get_or_create (Optional, bool): if True, will create/recreate a collection for the retrieve chat.
-                    This is the same as that used in chromadb. Default is False. Will be set to False if docs_path is None.
+                - get_or_create (Optional, bool): if True, will create/return a collection for the retrieve chat. This is the same as that used in chromadb.
+                    Default is False. Will raise ValueError if the collection already exists and get_or_create is False. Will be set to True if docs_path is None.
                - custom_token_count_function(Optional, Callable): a custom function to count the number of tokens in a string.
                    The function should take (text:str, model:str) as input and return the token_count(int). the retrieve_config["model"] will be passed in the function.
                    Default is autogen.token_count_utils.count_token that uses tiktoken, which may not be accurate for non-OpenAI models.
@@ -178,9 +178,7 @@ class RetrieveUserProxyAgent(UserProxyAgent):
        self.customized_prompt = self._retrieve_config.get("customized_prompt", None)
        self.customized_answer_prefix = self._retrieve_config.get("customized_answer_prefix", "").upper()
        self.update_context = self._retrieve_config.get("update_context", True)
-        self._get_or_create = (
-            self._retrieve_config.get("get_or_create", False) if self._docs_path is not None else False
-        )
+        self._get_or_create = self._retrieve_config.get("get_or_create", False) if self._docs_path is not None else True
        self.custom_token_count_function = self._retrieve_config.get("custom_token_count_function", count_token)
        self.custom_text_split_function = self._retrieve_config.get("custom_text_split_function", None)
        self._context_max_tokens = self._max_tokens * 0.8
@@ -360,7 +358,7 @@ class RetrieveUserProxyAgent(UserProxyAgent):
            n_results (int): the number of results to be retrieved.
            search_string (str): only docs containing this string will be retrieved.
        """
-        if not self._collection or self._get_or_create:
+        if not self._collection or not self._get_or_create:
            print("Trying to create collection.")
            self._client = create_vector_db_from_dir(
                dir_path=self._docs_path,
@@ -375,7 +373,7 @@ class RetrieveUserProxyAgent(UserProxyAgent):
                custom_text_split_function=self.custom_text_split_function,
            )
            self._collection = True
-            self._get_or_create = False
+            self._get_or_create = True

        results = query_vector_db(
            query_texts=[problem],
--- a/autogen/retrieve_utils.py
+++ b/autogen/retrieve_utils.py
@@ -242,7 +242,7 @@ def create_vector_db_from_dir(
        db_path (Optional, str): the path to the chromadb. Default is "/tmp/chromadb.db".
        collection_name (Optional, str): the name of the collection. Default is "all-my-documents".
        get_or_create (Optional, bool): Whether to get or create the collection. Default is False. If True, the collection
-            will be recreated if it already exists.
+            will be returned if it already exists. Will raise ValueError if the collection already exists and get_or_create is False.
        chunk_mode (Optional, str): the chunk mode. Default is "multi_lines".
        must_break_at_empty_line (Optional, bool): Whether to break at empty line. Default is True.
        embedding_model (Optional, str): the embedding model to use. Default is "all-MiniLM-L6-v2". Will be ignored if
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@@ -212,7 +212,7 @@
    "        \"model\": config_list[0][\"model\"],\n",
    "        \"client\": chromadb.PersistentClient(path=\"/tmp/chromadb\"),\n",
    "        \"embedding_model\": \"all-mpnet-base-v2\",\n",
-    "        \"get_or_create\": False,  # set to True if you want to recreate the collection\n",
+    "        \"get_or_create\": True,  # set to False if you don't want to reuse an existing collection, but you'll need to remove the collection manually\n",
    "    },\n",
    ")"
   ]
@@ -4172,7 +4172,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.13"
  }
 },
 "nbformat": 4,