refactoring/splitting the json fix functions into general module and llm module which need AI's assistance.

This commit is contained in:
BingokoN
2023-04-17 12:14:43 +01:00
parent 7a32e03bd5
commit 0d2e196368
8 changed files with 135 additions and 121 deletions

View File

@@ -3,7 +3,7 @@ from autogpt.app import execute_command, get_command
from autogpt.chat import chat_with_ai, create_chat_message
from autogpt.config import Config
from autogpt.json_utils.auto_fix import fix_json_using_multiple_techniques
from autogpt.json_utils.json_fix_llm import fix_json_using_multiple_techniques
from autogpt.json_utils.utilities import validate_json
from autogpt.logs import logger, print_assistant_thoughts
from autogpt.speech import say_text

View File

@@ -18,7 +18,6 @@ from autogpt.commands.file_operations import (
search_files,
write_to_file,
)
from autogpt.json_utils.auto_fix import fix_and_parse_json
from autogpt.memory import get_memory
from autogpt.processing.text import summarize_text
from autogpt.speech import say_text

View File

@@ -0,0 +1,124 @@
"""This module contains functions to fix JSON strings using general programmatic approaches, suitable for addressing
common JSON formatting issues."""
from __future__ import annotations
import contextlib
import json
import re
from typing import Optional
from autogpt.config import Config
from autogpt.json_utils.utilities import extract_char_position
CFG = Config()
def fix_invalid_escape(json_to_load: str, error_message: str) -> str:
"""Fix invalid escape sequences in JSON strings.
Args:
json_to_load (str): The JSON string.
error_message (str): The error message from the JSONDecodeError
exception.
Returns:
str: The JSON string with invalid escape sequences fixed.
"""
while error_message.startswith("Invalid \\escape"):
bad_escape_location = extract_char_position(error_message)
json_to_load = (
json_to_load[:bad_escape_location] + json_to_load[bad_escape_location + 1:]
)
try:
json.loads(json_to_load)
return json_to_load
except json.JSONDecodeError as e:
if CFG.debug_mode:
print("json loads error - fix invalid escape", e)
error_message = str(e)
return json_to_load
def balance_braces(json_string: str) -> Optional[str]:
"""
Balance the braces in a JSON string.
Args:
json_string (str): The JSON string.
Returns:
str: The JSON string with braces balanced.
"""
open_braces_count = json_string.count("{")
close_braces_count = json_string.count("}")
while open_braces_count > close_braces_count:
json_string += "}"
close_braces_count += 1
while close_braces_count > open_braces_count:
json_string = json_string.rstrip("}")
close_braces_count -= 1
with contextlib.suppress(json.JSONDecodeError):
json.loads(json_string)
return json_string
def add_quotes_to_property_names(json_string: str) -> str:
"""
Add quotes to property names in a JSON string.
Args:
json_string (str): The JSON string.
Returns:
str: The JSON string with quotes added to property names.
"""
def replace_func(match: re.Match) -> str:
return f'"{match[1]}":'
property_name_pattern = re.compile(r"(\w+):")
corrected_json_string = property_name_pattern.sub(replace_func, json_string)
try:
json.loads(corrected_json_string)
return corrected_json_string
except json.JSONDecodeError as e:
raise e
def correct_json(json_to_load: str) -> str:
"""
Correct common JSON errors.
Args:
json_to_load (str): The JSON string.
"""
try:
if CFG.debug_mode:
print("json", json_to_load)
json.loads(json_to_load)
return json_to_load
except json.JSONDecodeError as e:
if CFG.debug_mode:
print("json loads error", e)
error_message = str(e)
if error_message.startswith("Invalid \\escape"):
json_to_load = fix_invalid_escape(json_to_load, error_message)
if error_message.startswith(
"Expecting property name enclosed in double quotes"
):
json_to_load = add_quotes_to_property_names(json_to_load)
try:
json.loads(json_to_load)
return json_to_load
except json.JSONDecodeError as e:
if CFG.debug_mode:
print("json loads error - add quotes", e)
error_message = str(e)
if balanced_str := balance_braces(json_to_load):
return balanced_str
return json_to_load

View File

@@ -1,15 +1,15 @@
"""This module contains the function to fix JSON strings"""
"""This module contains functions to fix JSON strings generated by LLM models, such as ChatGPT, using the assistance
of the ChatGPT API or LLM models."""
from __future__ import annotations
import contextlib
import json
import re
from typing import Optional, Dict, Any
from typing import Dict, Any
from colorama import Fore
from regex import regex
from autogpt.json_utils.utilities import extract_char_position
from autogpt.json_utils.json_fix_general import correct_json
from autogpt.llm_utils import call_ai_function
from autogpt.logs import logger
from autogpt.speech import say_text
@@ -82,83 +82,6 @@ def auto_fix_json(json_string: str, schema: str) -> str:
return "failed"
def fix_invalid_escape(json_to_load: str, error_message: str) -> str:
"""Fix invalid escape sequences in JSON strings.
Args:
json_to_load (str): The JSON string.
error_message (str): The error message from the JSONDecodeError
exception.
Returns:
str: The JSON string with invalid escape sequences fixed.
"""
while error_message.startswith("Invalid \\escape"):
bad_escape_location = extract_char_position(error_message)
json_to_load = (
json_to_load[:bad_escape_location] + json_to_load[bad_escape_location + 1:]
)
try:
json.loads(json_to_load)
return json_to_load
except json.JSONDecodeError as e:
if CFG.debug_mode:
print("json loads error - fix invalid escape", e)
error_message = str(e)
return json_to_load
def balance_braces(json_string: str) -> Optional[str]:
"""
Balance the braces in a JSON string.
Args:
json_string (str): The JSON string.
Returns:
str: The JSON string with braces balanced.
"""
open_braces_count = json_string.count("{")
close_braces_count = json_string.count("}")
while open_braces_count > close_braces_count:
json_string += "}"
close_braces_count += 1
while close_braces_count > open_braces_count:
json_string = json_string.rstrip("}")
close_braces_count -= 1
with contextlib.suppress(json.JSONDecodeError):
json.loads(json_string)
return json_string
def add_quotes_to_property_names(json_string: str) -> str:
"""
Add quotes to property names in a JSON string.
Args:
json_string (str): The JSON string.
Returns:
str: The JSON string with quotes added to property names.
"""
def replace_func(match: re.Match) -> str:
return f'"{match[1]}":'
property_name_pattern = re.compile(r"(\w+):")
corrected_json_string = property_name_pattern.sub(replace_func, json_string)
try:
json.loads(corrected_json_string)
return corrected_json_string
except json.JSONDecodeError as e:
raise e
def fix_json_using_multiple_techniques(assistant_reply: str) -> Dict[Any, Any]:
"""Fix the given JSON string to make it parseable and fully compliant with two techniques.
@@ -186,40 +109,6 @@ def fix_json_using_multiple_techniques(assistant_reply: str) -> Dict[Any, Any]:
return {}
def correct_json(json_to_load: str) -> str:
"""
Correct common JSON errors.
Args:
json_to_load (str): The JSON string.
"""
try:
if CFG.debug_mode:
print("json", json_to_load)
json.loads(json_to_load)
return json_to_load
except json.JSONDecodeError as e:
if CFG.debug_mode:
print("json loads error", e)
error_message = str(e)
if error_message.startswith("Invalid \\escape"):
json_to_load = fix_invalid_escape(json_to_load, error_message)
if error_message.startswith(
"Expecting property name enclosed in double quotes"
):
json_to_load = add_quotes_to_property_names(json_to_load)
try:
json.loads(json_to_load)
return json_to_load
except json.JSONDecodeError as e:
if CFG.debug_mode:
print("json loads error - add quotes", e)
error_message = str(e)
if balanced_str := balance_braces(json_to_load):
return balanced_str
return json_to_load
def fix_and_parse_json(
json_to_load: str, try_to_fix_with_gpt: bool = True
) -> Dict[Any, Any]:

View File

@@ -9,6 +9,7 @@ from autogpt.config import Config
CFG = Config()
def extract_char_position(error_message: str) -> int:
"""Extract the character position from the JSONDecodeError message.
@@ -40,7 +41,8 @@ def validate_json(json_object: object, schema_name: object) -> object:
if errors := sorted(validator.iter_errors(json_object), key=lambda e: e.path):
logger.error("The JSON object is invalid.")
if CFG.debug_mode:
logger.error(json.dumps(json_object, indent=4)) # Replace 'json_object' with the variable containing the JSON data
logger.error(
json.dumps(json_object, indent=4)) # Replace 'json_object' with the variable containing the JSON data
logger.error("The following issues were found:")
for error in errors:

View File

@@ -204,7 +204,7 @@ logger = Logger()
def print_assistant_thoughts(ai_name, assistant_reply):
"""Prints the assistant's thoughts to the console"""
from autogpt.json_utils.auto_fix import fix_and_parse_json, attempt_to_fix_json_by_finding_outermost_brackets
from autogpt.json_utils.json_fix_llm import fix_and_parse_json, attempt_to_fix_json_by_finding_outermost_brackets
try:
try:

View File

@@ -1,7 +1,7 @@
import unittest
import tests.context
from autogpt.json_utils.auto_fix import fix_and_parse_json
from autogpt.json_utils.json_fix_llm import fix_and_parse_json
class TestParseJson(unittest.TestCase):

View File

@@ -1,6 +1,6 @@
import unittest
from autogpt.json_utils.auto_fix import fix_and_parse_json
from autogpt.json_utils.json_fix_llm import fix_and_parse_json
class TestParseJson(unittest.TestCase):