From c1326469b206d7fb68eac0f753c925358c30675e Mon Sep 17 00:00:00 2001 From: Dino Hensen Date: Thu, 6 Apr 2023 17:15:40 +0200 Subject: [PATCH 01/11] The function default serialized gpt-4 value at import time, leading to yield a value of gpt-4 after setting gpt3only when not passing a value for model to the function when calling it, this fixes it --- scripts/call_ai_function.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/call_ai_function.py b/scripts/call_ai_function.py index 0c864b4909..db1c955683 100644 --- a/scripts/call_ai_function.py +++ b/scripts/call_ai_function.py @@ -5,7 +5,9 @@ from llm_utils import create_chat_completion # This is a magic function that can do anything with no-code. See # https://github.com/Torantulino/AI-Functions for more info. -def call_ai_function(function, args, description, model=cfg.smart_llm_model): +def call_ai_function(function, args, description, model=None): + if model is None: + model = cfg.smart_llm_model # For each arg, if any are None, convert to "None": args = [str(arg) if arg is not None else "None" for arg in args] # parse args to comma seperated string From 56edfc64401feb8fbe3cdfd8f1e2e4269f2b1db5 Mon Sep 17 00:00:00 2001 From: onekum <55006697+onekum@users.noreply.github.com> Date: Sun, 9 Apr 2023 04:28:23 -0400 Subject: [PATCH 02/11] Tell AI about the `do_nothing` command --- scripts/data/prompt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index 77a449de52..582cf5d3ac 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -24,6 +24,7 @@ COMMANDS: 18. Execute Python File: "execute_python_file", args: "file": "" 19. Task Complete (Shutdown): "task_complete", args: "reason": "" 20. Generate Image: "generate_image", args: "prompt": "" +21. Do Nothing; command name: "do_nothing", args: "" RESOURCES: From 93a92d92fc56ebe49758b2e8bb0e9f0eba2f6513 Mon Sep 17 00:00:00 2001 From: onekum <55006697+onekum@users.noreply.github.com> Date: Sun, 9 Apr 2023 04:28:36 -0400 Subject: [PATCH 03/11] make `do_nothing` a valid command --- scripts/commands.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/commands.py b/scripts/commands.py index ba5383957a..02f3baa8df 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -106,6 +106,8 @@ def execute_command(command_name, arguments): return execute_python_file(arguments["file"]) elif command_name == "generate_image": return generate_image(arguments["prompt"]) + elif command_name == "do_nothing": + return "No action performed." elif command_name == "task_complete": shutdown() else: @@ -283,4 +285,4 @@ def delete_agent(key): result = agents.delete_agent(key) if not result: return f"Agent {key} does not exist." - return f"Agent {key} deleted." \ No newline at end of file + return f"Agent {key} deleted." From 546d8783e78096d737351fca00d2cd701b9b72e5 Mon Sep 17 00:00:00 2001 From: Alexander Nikulin Date: Sun, 9 Apr 2023 14:33:30 +0400 Subject: [PATCH 04/11] put debug setting to cfg and use it in when calling chat.chat_with_at and fix_json --- scripts/config.py | 4 ++++ scripts/json_parser.py | 2 +- scripts/main.py | 6 +++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/config.py b/scripts/config.py index d5f1a3f066..4d7adec1c0 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -31,6 +31,7 @@ class Config(metaclass=Singleton): """ def __init__(self): + self.debug = False self.continuous_mode = False self.speak_mode = False # TODO - make these models be self-contained, using langchain, so we can configure them once and call it good @@ -110,3 +111,6 @@ class Config(metaclass=Singleton): def set_pinecone_region(self, value: str): self.pinecone_region = value + + def set_debug_mode(self, value: bool): + self.debug = value diff --git a/scripts/json_parser.py b/scripts/json_parser.py index 8ec9238b4d..c863ccdbb0 100644 --- a/scripts/json_parser.py +++ b/scripts/json_parser.py @@ -40,7 +40,7 @@ def fix_and_parse_json(json_str: str, try_to_fix_with_gpt: bool = True): if try_to_fix_with_gpt: print(f"Warning: Failed to parse AI output, attempting to fix.\n If you see this warning frequently, it's likely that your prompt is confusing the AI. Try changing it up slightly.") # Now try to fix this up using the ai_functions - ai_fixed_json = fix_json(json_str, json_schema, False) + ai_fixed_json = fix_json(json_str, json_schema, cfg.debug) if ai_fixed_json != "failed": return json.loads(ai_fixed_json) else: diff --git a/scripts/main.py b/scripts/main.py index a0a1898cc4..f96afeb163 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -266,6 +266,10 @@ def parse_arguments(): print_to_console("GPT3.5 Only Mode: ", Fore.GREEN, "ENABLED") cfg.set_smart_llm_model(cfg.fast_llm_model) + if args.debug: + print_to_console("Debug Mode: ", Fore.GREEN, "ENABLED") + cfg.set_debug_mode(True) + # TODO: fill in llm values here @@ -295,7 +299,7 @@ while True: user_input, full_message_history, memory, - cfg.fast_token_limit) # TODO: This hardcodes the model to use GPT3.5. Make this an argument + cfg.fast_token_limit, cfg.debug) # TODO: This hardcodes the model to use GPT3.5. Make this an argument # Print Assistant thoughts print_assistant_thoughts(assistant_reply) From 54101c79973ca5ca8ccd7e1ac59856cb282c57d8 Mon Sep 17 00:00:00 2001 From: vandervoortj <64353639+vandervoortj@users.noreply.github.com> Date: Sun, 9 Apr 2023 15:49:19 -0400 Subject: [PATCH 05/11] Update .gitignore Ignore auto-get.json --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 7091a87237..ce0c33f935 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ auto_gpt_workspace/* *.mpeg .env outputs/* -ai_settings.yaml \ No newline at end of file +ai_settings.yaml +auto-gpt.json From b6444de25dadd44da46acbcd37f3c8358bc18d03 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sun, 9 Apr 2023 15:22:55 -0500 Subject: [PATCH 06/11] Fixes for common json errors, cleanup json_parser file. --- scripts/json_parser.py | 82 +++++++++++++++++--------- scripts/json_utils.py | 127 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 28 deletions(-) create mode 100644 scripts/json_utils.py diff --git a/scripts/json_parser.py b/scripts/json_parser.py index c863ccdbb0..6a5f073f3d 100644 --- a/scripts/json_parser.py +++ b/scripts/json_parser.py @@ -1,11 +1,13 @@ import json +from typing import Any, Dict, Union from call_ai_function import call_ai_function from config import Config +from json_utils import correct_json + cfg = Config() -def fix_and_parse_json(json_str: str, try_to_fix_with_gpt: bool = True): - json_schema = """ - { +JSON_SCHEMA = """ +{ "command": { "name": "command name", "args":{ @@ -20,44 +22,68 @@ def fix_and_parse_json(json_str: str, try_to_fix_with_gpt: bool = True): "criticism": "constructive self-criticism", "speak": "thoughts summary to say to user" } - } - """ +} +""" + +def fix_and_parse_json( + json_str: str, + try_to_fix_with_gpt: bool = True +) -> Union[str, Dict[Any, Any]]: try: json_str = json_str.replace('\t', '') return json.loads(json_str) - except Exception as e: - # Let's do something manually - sometimes GPT responds with something BEFORE the braces: - # "I'm sorry, I don't understand. Please try again."{"text": "I'm sorry, I don't understand. Please try again.", "confidence": 0.0} - # So let's try to find the first brace and then parse the rest of the string + except json.JSONDecodeError as _: # noqa: F841 + json_str = correct_json(json_str) try: - brace_index = json_str.index("{") - json_str = json_str[brace_index:] - last_brace_index = json_str.rindex("}") - json_str = json_str[:last_brace_index+1] - return json.loads(json_str) - except Exception as e: - if try_to_fix_with_gpt: - print(f"Warning: Failed to parse AI output, attempting to fix.\n If you see this warning frequently, it's likely that your prompt is confusing the AI. Try changing it up slightly.") + return json.loads(json_str) + except json.JSONDecodeError as _: # noqa: F841 + pass + # Let's do something manually: + # sometimes GPT responds with something BEFORE the braces: + # "I'm sorry, I don't understand. Please try again." + # {"text": "I'm sorry, I don't understand. Please try again.", + # "confidence": 0.0} + # So let's try to find the first brace and then parse the rest + # of the string + try: + brace_index = json_str.index("{") + json_str = json_str[brace_index:] + last_brace_index = json_str.rindex("}") + json_str = json_str[:last_brace_index+1] + return json.loads(json_str) + except json.JSONDecodeError as e: # noqa: F841 + if try_to_fix_with_gpt: + print("Warning: Failed to parse AI output, attempting to fix." + "\n If you see this warning frequently, it's likely that" + " your prompt is confusing the AI. Try changing it up" + " slightly.") # Now try to fix this up using the ai_functions - ai_fixed_json = fix_json(json_str, json_schema, cfg.debug) + ai_fixed_json = fix_json(json_str, JSON_SCHEMA, cfg.debug) if ai_fixed_json != "failed": - return json.loads(ai_fixed_json) + return json.loads(ai_fixed_json) else: - print(f"Failed to fix ai output, telling the AI.") # This allows the AI to react to the error message, which usually results in it correcting its ways. - return json_str - else: + # This allows the AI to react to the error message, + # which usually results in it correcting its ways. + print("Failed to fix ai output, telling the AI.") + return json_str + else: raise e - + + def fix_json(json_str: str, schema: str, debug=False) -> str: # Try to fix the JSON using gpt: function_string = "def fix_json(json_str: str, schema:str=None) -> str:" args = [f"'''{json_str}'''", f"'''{schema}'''"] - description_string = """Fixes the provided JSON string to make it parseable and fully complient with the provided schema.\n If an object or field specifed in the schema isn't contained within the correct JSON, it is ommited.\n This function is brilliant at guessing when the format is incorrect.""" + description_string = "Fixes the provided JSON string to make it parseable"\ + " and fully complient with the provided schema.\n If an object or"\ + " field specifed in the schema isn't contained within the correct"\ + " JSON, it is ommited.\n This function is brilliant at guessing"\ + " when the format is incorrect." # If it doesn't already start with a "`", add one: if not json_str.startswith("`"): - json_str = "```json\n" + json_str + "\n```" + json_str = "```json\n" + json_str + "\n```" result_string = call_ai_function( function_string, args, description_string, model=cfg.fast_llm_model ) @@ -68,11 +94,11 @@ def fix_json(json_str: str, schema: str, debug=False) -> str: print(f"Fixed JSON: {result_string}") print("----------- END OF FIX ATTEMPT ----------------") try: - json.loads(result_string) # just check the validity + json.loads(result_string) # just check the validity return result_string - except: + except: # noqa: E722 # Get the call stack: # import traceback # call_stack = traceback.format_exc() # print(f"Failed to fix JSON: '{json_str}' "+call_stack) - return "failed" \ No newline at end of file + return "failed" diff --git a/scripts/json_utils.py b/scripts/json_utils.py new file mode 100644 index 0000000000..b3ffe4b9ab --- /dev/null +++ b/scripts/json_utils.py @@ -0,0 +1,127 @@ +import re +import json +from config import Config + +cfg = Config() + + +def extract_char_position(error_message: str) -> int: + """Extract the character position from the JSONDecodeError message. + + Args: + error_message (str): The error message from the JSONDecodeError + exception. + + Returns: + int: The character position. + """ + import re + + char_pattern = re.compile(r'\(char (\d+)\)') + if match := char_pattern.search(error_message): + return int(match[1]) + else: + raise ValueError("Character position not found in the error message.") + + +def add_quotes_to_property_names(json_string: str) -> str: + """ + Add quotes to property names in a JSON string. + + Args: + json_string (str): The JSON string. + + Returns: + str: The JSON string with quotes added to property names. + """ + + def replace_func(match): + return f'"{match.group(1)}":' + + property_name_pattern = re.compile(r'(\w+):') + corrected_json_string = property_name_pattern.sub( + replace_func, + json_string) + + try: + json.loads(corrected_json_string) + return corrected_json_string + except json.JSONDecodeError as e: + raise e + + +def balance_braces(json_string: str) -> str: + """ + Balance the braces in a JSON string. + + Args: + json_string (str): The JSON string. + + Returns: + str: The JSON string with braces balanced. + """ + + open_braces_count = json_string.count('{') + close_braces_count = json_string.count('}') + + while open_braces_count > close_braces_count: + json_string += '}' + close_braces_count += 1 + + while close_braces_count > open_braces_count: + json_string = json_string.rstrip('}') + close_braces_count -= 1 + + try: + json.loads(json_string) + return json_string + except json.JSONDecodeError as e: + raise e + + +def fix_invalid_escape(json_str: str, error_message: str) -> str: + while error_message.startswith('Invalid \\escape'): + bad_escape_location = extract_char_position(error_message) + json_str = json_str[:bad_escape_location] + \ + json_str[bad_escape_location + 1:] + try: + json.loads(json_str) + return json_str + except json.JSONDecodeError as e: + if cfg.debug: + print('json loads error - fix invalid escape', e) + error_message = str(e) + return json_str + + +def correct_json(json_str: str) -> str: + """ + Correct common JSON errors. + + Args: + json_str (str): The JSON string. + """ + + try: + if cfg.debug: + print("json", json_str) + json.loads(json_str) + return json_str + except json.JSONDecodeError as e: + if cfg.debug: + print('json loads error', e) + error_message = str(e) + if error_message.startswith('Invalid \\escape'): + json_str = fix_invalid_escape(json_str, error_message) + if error_message.startswith('Expecting property name enclosed in double quotes'): + json_str = add_quotes_to_property_names(json_str) + try: + json.loads(json_str) + return json_str + except json.JSONDecodeError as e: + if cfg.debug: + print('json loads error - add quotes', e) + error_message = str(e) + if balanced_str := balance_braces(json_str): + return balanced_str + return json_str From b0cb247b83d9b0b6c6f7d153ad0dfe076b6327ac Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Mon, 10 Apr 2023 00:18:37 +0300 Subject: [PATCH 07/11] scrape_text: added tests + hande RequestException --- scripts/browse.py | 5 +- tests/test_browse_scrape_text.py | 102 +++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 tests/test_browse_scrape_text.py diff --git a/scripts/browse.py b/scripts/browse.py index 0fda3d7b06..40e6ca1fe0 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -6,7 +6,10 @@ from llm_utils import create_chat_completion cfg = Config() def scrape_text(url): - response = requests.get(url, headers=cfg.user_agent_header) + try: + response = requests.get(url, headers=cfg.user_agent_header) + except requests.exceptions.RequestException as e: + return "Error: " + str(e) # Check if the response contains an HTTP error if response.status_code >= 400: diff --git a/tests/test_browse_scrape_text.py b/tests/test_browse_scrape_text.py new file mode 100644 index 0000000000..1a08367e8d --- /dev/null +++ b/tests/test_browse_scrape_text.py @@ -0,0 +1,102 @@ + +# Generated by CodiumAI + +import requests +from unittest.mock import Mock +import pytest + +from scripts.browse import scrape_text + +""" +Code Analysis + +Objective: +The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts. + +Inputs: +- url: a string representing the URL of the webpage to be scraped. + +Flow: +1. Send a GET request to the given URL using the requests library and the user agent header from the config file. +2. Check if the response contains an HTTP error. If it does, return an error message. +3. Use BeautifulSoup to parse the HTML content of the response and extract all script and style tags. +4. Get the text content of the remaining HTML using the get_text() method of BeautifulSoup. +5. Split the text into lines and then into chunks, removing any extra whitespace. +6. Join the chunks into a single string with newline characters between them. +7. Return the cleaned text. + +Outputs: +- A string representing the cleaned text content of the webpage. + +Additional aspects: +- The function uses the requests library and BeautifulSoup to handle the HTTP request and HTML parsing, respectively. +- The function removes script and style tags from the HTML to avoid including unwanted content in the text output. +- The function uses a generator expression to split the text into lines and chunks, which can improve performance for large amounts of text. +""" + + + +class TestScrapeText: + + # Tests that scrape_text() returns the expected text when given a valid URL. + def test_scrape_text_with_valid_url(self, mocker): + # Mock the requests.get() method to return a response with expected text + expected_text = "This is some sample text" + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = f"

{expected_text}

" + mocker.patch("requests.get", return_value=mock_response) + + # Call the function with a valid URL and assert that it returns the expected text + url = "http://www.example.com" + assert scrape_text(url) == expected_text + + # Tests that the function returns an error message when an invalid or unreachable url is provided. + def test_invalid_url(self, mocker): + # Mock the requests.get() method to raise an exception + mocker.patch("requests.get", side_effect=requests.exceptions.RequestException) + + # Call the function with an invalid URL and assert that it returns an error message + url = "http://www.invalidurl.com" + error_message = scrape_text(url) + assert "Error:" in error_message + + # Tests that the function returns an empty string when the html page contains no text to be scraped. + def test_no_text(self, mocker): + # Mock the requests.get() method to return a response with no text + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "" + mocker.patch("requests.get", return_value=mock_response) + + # Call the function with a valid URL and assert that it returns an empty string + url = "http://www.example.com" + assert scrape_text(url) == "" + + # Tests that the function returns an error message when the response status code is an http error (>=400). + def test_http_error(self, mocker): + # Mock the requests.get() method to return a response with a 404 status code + mocker.patch('requests.get', return_value=Mock(status_code=404)) + + # Call the function with a URL + result = scrape_text("https://www.example.com") + + # Check that the function returns an error message + assert result == "Error: HTTP 404 error" + + # Tests that scrape_text() properly handles HTML tags. + def test_scrape_text_with_html_tags(self): + # Create a mock response object with HTML containing tags + html = "

This is bold text.

" + response = Mock() + response.status_code = 200 + response.text = html + + # Mock the requests.get() method to return the mock response object + requests.get = Mock(return_value=response) + + # Call the function with a URL + result = scrape_text("https://www.example.com") + + # Check that the function properly handles HTML tags + assert result == "This is bold text." \ No newline at end of file From ee1805c13641e576af4af7fb07f39adef5799625 Mon Sep 17 00:00:00 2001 From: Toran Bruce Richards Date: Mon, 10 Apr 2023 04:21:44 +0100 Subject: [PATCH 08/11] Update PULL_REQUEST_TEMPLATE.md --- .github/PULL_REQUEST_TEMPLATE.md | 33 +++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index cb8ce34a1b..1ac8f8642d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,18 +1,33 @@ -### Background + +Focus on a single, specific change. +Do not include any unrelated or "extra" modifications. +Provide clear documentation and explanations of the changes made. +Ensure diffs are limited to the intended lines — no applying preferred formatting styles or line endings (unless that's what the PR is about). +For guidance on committing only the specific lines you have changed, refer to this helpful video: https://youtu.be/8-hSNHHbiZg + +By following these guidelines, your PRs are more likely to be merged quickly after testing, as long as they align with the project's overall direction. --> + +### Background + ### Changes + - +### Documentation + ### Test Plan + - +### PR Quality Checklist +- [ ] My pull request is atomic and focuses on a single change. +- [ ] I have thouroughly tested my changes with multiple different prompts. +- [ ] I have considered potential risks and mitigations for my changes. +- [ ] I have documented my changes clearly and comprehensively. +- [ ] I have not snuck in any "extra" small tweaks changes -### Change Safety + -- [ ] I have added tests to cover my changes -- [ ] I have considered potential risks and mitigations for my changes - - + From 5727b052fe686506971f9d3472c07027116f2b5b Mon Sep 17 00:00:00 2001 From: Jason Drage Date: Mon, 10 Apr 2023 13:38:56 +1000 Subject: [PATCH 09/11] jd: ignore venv --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7091a87237..f602018bd4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,6 @@ package-lock.json auto_gpt_workspace/* *.mpeg .env +venv/* outputs/* ai_settings.yaml \ No newline at end of file From 06f26cb29c6980c2c521780c83972fc09db819e4 Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Mon, 10 Apr 2023 08:19:41 +0300 Subject: [PATCH 10/11] remove dependency of unittest, use pytest --- tests/test_browse_scrape_text.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/test_browse_scrape_text.py b/tests/test_browse_scrape_text.py index 1a08367e8d..27ebc0f693 100644 --- a/tests/test_browse_scrape_text.py +++ b/tests/test_browse_scrape_text.py @@ -2,7 +2,6 @@ # Generated by CodiumAI import requests -from unittest.mock import Mock import pytest from scripts.browse import scrape_text @@ -76,7 +75,7 @@ class TestScrapeText: # Tests that the function returns an error message when the response status code is an http error (>=400). def test_http_error(self, mocker): # Mock the requests.get() method to return a response with a 404 status code - mocker.patch('requests.get', return_value=Mock(status_code=404)) + mocker.patch('requests.get', return_value=mocker.Mock(status_code=404)) # Call the function with a URL result = scrape_text("https://www.example.com") @@ -85,15 +84,13 @@ class TestScrapeText: assert result == "Error: HTTP 404 error" # Tests that scrape_text() properly handles HTML tags. - def test_scrape_text_with_html_tags(self): + def test_scrape_text_with_html_tags(self, mocker): # Create a mock response object with HTML containing tags html = "

This is bold text.

" - response = Mock() - response.status_code = 200 - response.text = html - - # Mock the requests.get() method to return the mock response object - requests.get = Mock(return_value=response) + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = html + mocker.patch("requests.get", return_value=mock_response) # Call the function with a URL result = scrape_text("https://www.example.com") From da4a045bd6aa85805ff30493f7f0b00050a2dc80 Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Mon, 10 Apr 2023 08:26:46 +0300 Subject: [PATCH 11/11] Adding most basic URL validation in scrape_text --- scripts/browse.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/browse.py b/scripts/browse.py index 40e6ca1fe0..7eeaaf4d94 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -6,6 +6,10 @@ from llm_utils import create_chat_completion cfg = Config() def scrape_text(url): + # Most basic check if the URL is valid: + if not url.startswith('http'): + return "Error: Invalid URL" + try: response = requests.get(url, headers=cfg.user_agent_header) except requests.exceptions.RequestException as e: