Fix multi-byte character handling in read_file (#3173)

Co-authored-by: Reinier van der Leer <github@pwuts.nl>
2026-04-08 03:00:28 -04:00 · 2023-05-01 12:50:50 -05:00
parent 7fc6f2abfc
commit a5f856328d
2 changed files with 6 additions and 3 deletions
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -6,6 +6,7 @@ import os
 import os.path
 from typing import Dict, Generator, Literal, Tuple

+import charset_normalizer
 import requests
 from colorama import Back, Fore
 from requests.adapters import HTTPAdapter, Retry
@@ -153,9 +154,10 @@ def read_file(filename: str) -> str:
        str: The contents of the file
    """
    try:
-        with open(filename, "r", encoding="utf-8") as f:
-            content = f.read()
-        return content
+        charset_match = charset_normalizer.from_path(filename).best()
+        encoding = charset_match.encoding
+        logger.debug(f"Read file '{filename}' with encoding '{encoding}'")
+        return str(charset_match)
    except Exception as err:
        return f"Error: {err}"

--- a/requirements.txt
+++ b/requirements.txt
@@ -21,6 +21,7 @@ webdriver-manager
 jsonschema
 tweepy
 click
+charset-normalizer>=3.1.0
 spacy>=3.0.0,<4.0.0
 en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl