mirror of
https://github.com/Pythagora-io/gpt-pilot.git
synced 2026-01-09 13:17:55 -05:00
Ignore files based on name(glob), size and whether they're binary files
This commit is contained in:
@@ -85,7 +85,7 @@ After you have Python and (optionally) PostgreSQL installed, follow these steps:
|
||||
- LLM Provider (OpenAI/Azure/Openrouter)
|
||||
- Your API key
|
||||
- database settings: SQLite/PostgreSQL (to change from SQLite to PostgreSQL, just set `DATABASE_TYPE=postgres`)
|
||||
- optionally set IGNORE_FOLDERS for the folders which shouldn't be tracked by GPT Pilot in workspace, useful to ignore folders created by compilers (i.e. `IGNORE_FOLDERS=folder1,folder2,folder3`)
|
||||
- optionally set IGNORE_PATHS for the folders which shouldn't be tracked by GPT Pilot in workspace, useful to ignore folders created by compilers (i.e. `IGNORE_PATHS=folder1,folder2,folder3`)
|
||||
9. `python db_init.py` (initialize the database)
|
||||
10. `python main.py` (start GPT Pilot)
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
- DB_USER=pilot
|
||||
- DB_PASSWORD=pilot
|
||||
# Folders which shouldn't be tracked in workspace (useful to ignore folders created by compiler)
|
||||
# IGNORE_FOLDERS=folder1,folder2
|
||||
# IGNORE_PATHS=folder1,folder2
|
||||
volumes:
|
||||
- ~/gpt-pilot-workspace:/usr/src/app/workspace
|
||||
build:
|
||||
|
||||
@@ -16,7 +16,7 @@ MODEL_NAME=gpt-4-1106-preview
|
||||
MAX_TOKENS=8192
|
||||
|
||||
# Folders which shouldn't be tracked in workspace (useful to ignore folders created by compiler)
|
||||
# IGNORE_FOLDERS=folder1,folder2
|
||||
# IGNORE_PATHS=folder1,folder2
|
||||
|
||||
# Database
|
||||
# DATABASE_TYPE=postgres
|
||||
|
||||
@@ -21,10 +21,7 @@ STEPS = [
|
||||
'finished'
|
||||
]
|
||||
|
||||
additional_ignore_folders = os.environ.get('IGNORE_FOLDERS', '').split(',')
|
||||
|
||||
# TODO: rename to IGNORE_PATHS as it also contains files
|
||||
IGNORE_FOLDERS = [
|
||||
DEFAULT_IGNORE_PATHS = [
|
||||
'.git',
|
||||
'.gpt-pilot',
|
||||
'.idea',
|
||||
@@ -36,7 +33,16 @@ IGNORE_FOLDERS = [
|
||||
'venv',
|
||||
'dist',
|
||||
'build',
|
||||
'target'
|
||||
] + [folder for folder in additional_ignore_folders if folder]
|
||||
|
||||
'target',
|
||||
"*.min.js",
|
||||
"*.min.css",
|
||||
"*.svg",
|
||||
"*.csv",
|
||||
]
|
||||
IGNORE_PATHS = DEFAULT_IGNORE_PATHS + [
|
||||
folder for folder
|
||||
in os.environ.get('IGNORE_PATHS', '').split(',')
|
||||
if folder
|
||||
]
|
||||
IGNORE_SIZE_THRESHOLD = 102400 # 100K+ files are ignored by default
|
||||
PROMPT_DATA_TO_IGNORE = {'directory_tree', 'name'}
|
||||
|
||||
@@ -8,7 +8,7 @@ import peewee
|
||||
|
||||
from const.messages import CHECK_AND_CONTINUE, AFFIRMATIVE_ANSWERS, NEGATIVE_ANSWERS
|
||||
from utils.style import color_yellow_bold, color_cyan, color_white_bold, color_green
|
||||
from const.common import IGNORE_FOLDERS, STEPS
|
||||
from const.common import STEPS
|
||||
from database.database import delete_unconnected_steps_from, delete_all_app_development_data, update_app_status
|
||||
from const.ipc import MESSAGE_TYPE
|
||||
from prompts.prompts import ask_user
|
||||
@@ -28,6 +28,7 @@ from database.models.files import File
|
||||
from logger.logger import logger
|
||||
from utils.dot_gpt_pilot import DotGptPilot
|
||||
from utils.llm_connection import test_api_access
|
||||
from utils.ignore import IgnoreMatcher
|
||||
|
||||
from utils.telemetry import telemetry
|
||||
|
||||
@@ -176,12 +177,7 @@ class Project:
|
||||
Returns:
|
||||
dict: The directory tree.
|
||||
"""
|
||||
# files = {}
|
||||
# if with_descriptions and False:
|
||||
# files = File.select().where(File.app_id == self.args['app_id'])
|
||||
# files = {snapshot.name: snapshot for snapshot in files}
|
||||
# return build_directory_tree_with_descriptions(self.root_path, ignore=IGNORE_FOLDERS, files=files, add_descriptions=False)
|
||||
return build_directory_tree(self.root_path, ignore=IGNORE_FOLDERS)
|
||||
return build_directory_tree(self.root_path)
|
||||
|
||||
def get_test_directory_tree(self):
|
||||
"""
|
||||
@@ -191,7 +187,7 @@ class Project:
|
||||
dict: The directory tree of tests.
|
||||
"""
|
||||
# TODO remove hardcoded path
|
||||
return build_directory_tree(self.root_path + '/tests', ignore=IGNORE_FOLDERS)
|
||||
return build_directory_tree(self.root_path + '/tests')
|
||||
|
||||
def get_all_coded_files(self):
|
||||
"""
|
||||
@@ -209,18 +205,7 @@ class Project:
|
||||
)
|
||||
)
|
||||
|
||||
files = self.get_files([file.path + '/' + file.name for file in files])
|
||||
|
||||
# Don't send contents of binary files
|
||||
for file in files:
|
||||
if not isinstance(file["content"], str):
|
||||
file["content"] = f"<<binary file, {len(file['content'])} bytes>>"
|
||||
|
||||
# TODO temoprary fix to eliminate files that are not in the project
|
||||
files = [file for file in files if file['content'] != '']
|
||||
# TODO END
|
||||
|
||||
return files
|
||||
return self.get_files([file.path + '/' + file.name for file in files])
|
||||
|
||||
def get_files(self, files):
|
||||
"""
|
||||
@@ -232,6 +217,7 @@ class Project:
|
||||
Returns:
|
||||
list: A list of files with content.
|
||||
"""
|
||||
matcher = IgnoreMatcher(root_path=self.root_path)
|
||||
files_with_content = []
|
||||
for file_path in files:
|
||||
try:
|
||||
@@ -239,9 +225,12 @@ class Project:
|
||||
_, full_path = self.get_full_file_path(file_path, file_path)
|
||||
file_data = get_file_contents(full_path, self.root_path)
|
||||
except ValueError:
|
||||
full_path = None
|
||||
file_data = {"path": file_path, "name": os.path.basename(file_path), "content": ''}
|
||||
|
||||
files_with_content.append(file_data)
|
||||
if full_path and file_data["content"] != "" and not matcher.ignore(full_path):
|
||||
files_with_content.append(file_data)
|
||||
|
||||
return files_with_content
|
||||
|
||||
def find_input_required_lines(self, file_content):
|
||||
@@ -395,7 +384,7 @@ class Project:
|
||||
|
||||
|
||||
def save_files_snapshot(self, development_step_id):
|
||||
files = get_directory_contents(self.root_path, ignore=IGNORE_FOLDERS)
|
||||
files = get_directory_contents(self.root_path)
|
||||
development_step, created = DevelopmentSteps.get_or_create(id=development_step_id)
|
||||
|
||||
total_files = 0
|
||||
@@ -431,7 +420,7 @@ class Project:
|
||||
development_step = DevelopmentSteps.get(DevelopmentSteps.id == development_step_id)
|
||||
file_snapshots = FileSnapshot.select().where(FileSnapshot.development_step == development_step)
|
||||
|
||||
clear_directory(self.root_path, IGNORE_FOLDERS + self.files)
|
||||
clear_directory(self.root_path, ignore=self.files)
|
||||
for file_snapshot in file_snapshots:
|
||||
update_file(file_snapshot.file.full_path, file_snapshot.content, project=self)
|
||||
if file_snapshot.file.full_path not in self.files:
|
||||
|
||||
@@ -10,6 +10,7 @@ from typing import Dict, Union
|
||||
|
||||
from logger.logger import logger
|
||||
from utils.style import color_yellow, color_green, color_red, color_yellow_bold
|
||||
from utils.ignore import IgnoreMatcher
|
||||
from database.database import get_saved_command_run, save_command_run
|
||||
from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError
|
||||
from helpers.exceptions.TokenLimitError import TokenLimitError
|
||||
@@ -340,23 +341,24 @@ def check_if_command_successful(convo, command, cli_response, response, exit_cod
|
||||
|
||||
return response
|
||||
|
||||
def build_directory_tree(path, prefix='', is_root=True, ignore=None):
|
||||
def build_directory_tree(path, prefix='', root_path=None) -> str:
|
||||
"""Build the directory tree structure in a simplified format.
|
||||
|
||||
Args:
|
||||
- path: The starting directory path.
|
||||
- prefix: Prefix for the current item, used for recursion.
|
||||
- is_root: Flag to indicate if the current item is the root directory.
|
||||
- ignore: a list of directories to ignore
|
||||
|
||||
Returns:
|
||||
- A string representation of the directory tree.
|
||||
:param path: The starting directory path.
|
||||
:param prefix: Prefix for the current item, used for recursion.
|
||||
:param root_path: The root directory path.
|
||||
:return: A string representation of the directory tree.
|
||||
"""
|
||||
output = ""
|
||||
indent = ' '
|
||||
|
||||
if root_path is None:
|
||||
root_path = path
|
||||
|
||||
matcher = IgnoreMatcher(root_path=root_path)
|
||||
|
||||
if os.path.isdir(path):
|
||||
if is_root:
|
||||
if root_path == path:
|
||||
output += '/'
|
||||
else:
|
||||
dir_name = os.path.basename(path)
|
||||
@@ -364,8 +366,16 @@ def build_directory_tree(path, prefix='', is_root=True, ignore=None):
|
||||
|
||||
# List items in the directory
|
||||
items = os.listdir(path)
|
||||
dirs = [item for item in items if os.path.isdir(os.path.join(path, item)) and item not in ignore]
|
||||
files = [item for item in items if os.path.isfile(os.path.join(path, item))]
|
||||
dirs = []
|
||||
files = []
|
||||
for item in items:
|
||||
item_path = os.path.join(path, item)
|
||||
if matcher.ignore(item_path):
|
||||
continue
|
||||
if os.path.isdir(item_path):
|
||||
dirs.append(item)
|
||||
elif os.path.isfile(item_path):
|
||||
files.append(item)
|
||||
dirs.sort()
|
||||
files.sort()
|
||||
|
||||
@@ -374,7 +384,7 @@ def build_directory_tree(path, prefix='', is_root=True, ignore=None):
|
||||
for index, dir_item in enumerate(dirs):
|
||||
item_path = os.path.join(path, dir_item)
|
||||
new_prefix = prefix + indent # Updated prefix for recursion
|
||||
output += build_directory_tree(item_path, new_prefix, is_root=False, ignore=ignore)
|
||||
output += build_directory_tree(item_path, new_prefix, root_path)
|
||||
|
||||
if files:
|
||||
output += f"{prefix} {', '.join(files)}\n"
|
||||
@@ -387,36 +397,6 @@ def build_directory_tree(path, prefix='', is_root=True, ignore=None):
|
||||
return output
|
||||
|
||||
|
||||
def res_for_build_directory_tree(path, files=None):
|
||||
return ' - ' + files[os.path.basename(path)].description + ' ' if files and os.path.basename(path) in files else ''
|
||||
|
||||
|
||||
def build_directory_tree_with_descriptions(path, prefix="", ignore=None, is_last=False, files=None):
|
||||
"""Build the directory tree structure in tree-like format.
|
||||
Args:
|
||||
- path: The starting directory path.
|
||||
- prefix: Prefix for the current item, used for recursion.
|
||||
- ignore: List of directory names to ignore.
|
||||
- is_last: Flag to indicate if the current item is the last in its parent directory.
|
||||
Returns:
|
||||
- A string representation of the directory tree.
|
||||
"""
|
||||
ignore |= []
|
||||
if os.path.basename(path) in ignore:
|
||||
return ""
|
||||
output = ""
|
||||
indent = '| ' if not is_last else ' '
|
||||
# It's a directory, add its name to the output and then recurse into it
|
||||
output += prefix + f"|-- {os.path.basename(path)}{res_for_build_directory_tree(path, files)}/\n"
|
||||
if os.path.isdir(path):
|
||||
# List items in the directory
|
||||
items = os.listdir(path)
|
||||
for index, item in enumerate(items):
|
||||
item_path = os.path.join(path, item)
|
||||
output += build_directory_tree(item_path, prefix + indent, ignore, index == len(items) - 1, files)
|
||||
return output
|
||||
|
||||
|
||||
def execute_command_and_check_cli_response(convo, command: dict):
|
||||
"""
|
||||
Execute a command and check its CLI response.
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
from typing import Optional, Union
|
||||
|
||||
from utils.style import color_green
|
||||
|
||||
from utils.ignore import IgnoreMatcher
|
||||
|
||||
def update_file(path: str, new_content: Union[str, bytes], project=None):
|
||||
"""
|
||||
@@ -87,7 +87,8 @@ def get_file_contents(
|
||||
|
||||
|
||||
def get_directory_contents(
|
||||
directory: str, ignore: Optional[list[str]] = None
|
||||
directory: str,
|
||||
ignore: Optional[list[str]] = None,
|
||||
) -> list[dict[str, Union[str, bytes]]]:
|
||||
"""
|
||||
Get the content of all files in the given directory.
|
||||
@@ -101,19 +102,22 @@ def get_directory_contents(
|
||||
"""
|
||||
return_array = []
|
||||
|
||||
if ignore is None:
|
||||
ignore = []
|
||||
matcher = IgnoreMatcher(ignore, root_path=directory)
|
||||
|
||||
# TODO: Convert to use pathlib.Path.walk()
|
||||
for dpath, dirs, files in os.walk(directory):
|
||||
# In-place update of dirs so that os.walk() doesn't traverse them
|
||||
dirs[:] = [d for d in dirs if d not in ignore]
|
||||
dirs[:] = [
|
||||
d for d in dirs
|
||||
if not matcher.ignore(os.path.join(dpath, d))
|
||||
]
|
||||
|
||||
for file in files:
|
||||
if file in ignore:
|
||||
full_path = os.path.join(dpath, file)
|
||||
if matcher.ignore(full_path):
|
||||
continue
|
||||
|
||||
return_array.append(get_file_contents(os.path.join(dpath, file), directory))
|
||||
return_array.append(get_file_contents(full_path, directory))
|
||||
|
||||
return return_array
|
||||
|
||||
@@ -125,20 +129,22 @@ def clear_directory(directory: str, ignore: Optional[list[str]] = None):
|
||||
:param dir_path: Full path to the directory to clear
|
||||
:param ignore: List of files or folders to ignore (optional)
|
||||
"""
|
||||
if ignore is None:
|
||||
ignore = []
|
||||
matcher = IgnoreMatcher(ignore, root_path=directory)
|
||||
|
||||
# TODO: Convert to use pathlib.Path.walk()
|
||||
for dpath, dirs, files in os.walk(directory, topdown=True):
|
||||
# In-place update of dirs so that os.walk() doesn't traverse them
|
||||
dirs[:] = [d for d in dirs if d not in ignore]
|
||||
dirs[:] = [
|
||||
d for d in dirs
|
||||
if not matcher.ignore(os.path.join(dpath, d))
|
||||
]
|
||||
|
||||
for file in files:
|
||||
if file in ignore or os.path.join(directory, file) in ignore:
|
||||
full_path = os.path.join(dpath, file)
|
||||
if matcher.ignore(full_path):
|
||||
continue
|
||||
|
||||
path = os.path.join(dpath, file)
|
||||
os.remove(path)
|
||||
os.remove(full_path)
|
||||
|
||||
# Delete empty subdirectories not in ignore list
|
||||
for d in dirs:
|
||||
|
||||
@@ -363,7 +363,7 @@ class TestProjectFileLists:
|
||||
'user_review_goal': 'Test User Review Goal',
|
||||
}]
|
||||
|
||||
# with directories including common.IGNORE_FOLDERS
|
||||
# with directories including common.IGNORE_PATHS
|
||||
src = os.path.join(project.root_path, 'src')
|
||||
foo = os.path.join(project.root_path, 'src/foo')
|
||||
files_no_folders = os.path.join(foo, 'files_no_folders')
|
||||
|
||||
@@ -147,14 +147,13 @@ def test_get_directory_contents_live():
|
||||
assert isinstance(this_file["content"], str)
|
||||
assert "test_get_directory_contents_live()" in this_file["content"]
|
||||
|
||||
# Check that the Python cache was loaded as a binary file
|
||||
print("FILES", [(f["path"], f["name"]) for f in files])
|
||||
pycache_file = [
|
||||
# Check that the binary file was ignored
|
||||
image_files = [
|
||||
f
|
||||
for f in files
|
||||
if f["path"] == "helpers" and f["name"] == "testlogo.png"
|
||||
][0]
|
||||
assert isinstance(pycache_file["content"], bytes)
|
||||
]
|
||||
assert image_files == []
|
||||
|
||||
# Check that the ignore list works
|
||||
assert all(file["name"] != "__init__.py" for file in files)
|
||||
|
||||
102
pilot/test/utils/test_ignore.py
Normal file
102
pilot/test/utils/test_ignore.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from unittest.mock import patch
|
||||
import pytest
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from utils.ignore import IgnoreMatcher
|
||||
from os.path import sep, join, dirname
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("path", "expected"),
|
||||
[
|
||||
(".git", True),
|
||||
(".gpt-pilot", True),
|
||||
(".idea", True),
|
||||
(".vscode", True),
|
||||
(".DS_Store", True),
|
||||
(join("subdirectory", ".DS_Store"), True),
|
||||
("__pycache__", True),
|
||||
(join("subdirectory", "__pycache__"), True),
|
||||
("node_modules", True),
|
||||
(join("subdirectory", "node_modules"), True),
|
||||
("package-lock.json", True),
|
||||
("venv", True),
|
||||
("dist", True),
|
||||
("build", True),
|
||||
("target", True),
|
||||
(".gitignore", False),
|
||||
("server.js", False),
|
||||
(join(dirname(__file__), "node_modules"), True),
|
||||
(join(dirname(__file__), "subdirectory", "node_modules"), True),
|
||||
]
|
||||
)
|
||||
def test_default_ignore(path, expected):
|
||||
matcher = IgnoreMatcher(root_path=dirname(__file__))
|
||||
assert matcher.ignore(path) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("ignore", "path", "expected"),
|
||||
[
|
||||
("*.py[co]", "test.pyc", True),
|
||||
("*.py[co]", "subdir/test.pyo", True),
|
||||
("*.py[co]", "test.py", False),
|
||||
("*.min.js", f"public{sep}js{sep}script.min.js", True),
|
||||
("*.min.js", f"public{sep}js{sep}min.js", False),
|
||||
]
|
||||
)
|
||||
def test_additional_ignore(ignore, path, expected):
|
||||
matcher = IgnoreMatcher([ignore])
|
||||
assert matcher.ignore(path) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("ignore", "path", "expected"),
|
||||
[
|
||||
("jquery.js", "jquery.js", True),
|
||||
("jquery.js", f"otherdir{sep}jquery.js", True),
|
||||
("jquery.js", f"{sep}test{sep}jquery.js", True),
|
||||
]
|
||||
)
|
||||
def test_full_path(ignore, path, expected):
|
||||
matcher = IgnoreMatcher([ignore], root_path=f"{sep}test")
|
||||
assert matcher.ignore(path) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("size", "expected"),
|
||||
[
|
||||
(1024*1024, True), # 1MB
|
||||
(102400, False), # 100KB
|
||||
]
|
||||
)
|
||||
@patch("utils.ignore.os.path.isfile")
|
||||
@patch("utils.ignore.os.path.getsize")
|
||||
def test_ignore_large_files(mock_getsize, mock_isfile, size, expected):
|
||||
mock_isfile.return_value = True
|
||||
mock_getsize.return_value = size
|
||||
matcher = IgnoreMatcher(root_path=f"{sep}test")
|
||||
|
||||
with patch.object(matcher, "is_binary", return_value=False):
|
||||
assert matcher.ignore("fakefile.txt") is expected
|
||||
|
||||
mock_isfile.assert_called_once()
|
||||
mock_getsize.assert_called_once_with(f"{sep}test{sep}fakefile.txt")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("content", "expected"),
|
||||
[
|
||||
(("hello world ŠĐŽČĆ").encode("utf-8"), False), # text
|
||||
(b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52", True), # image
|
||||
]
|
||||
)
|
||||
def test_ignore_binary_files(content, expected):
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
path = join(tmpdir, "testfile.txt")
|
||||
with open(path, "wb") as fp:
|
||||
fp.write(content)
|
||||
|
||||
matcher = IgnoreMatcher(root_path=tmpdir)
|
||||
# Check both relative and absolute paths
|
||||
assert matcher.ignore("testfile.txt") is expected
|
||||
assert matcher.ignore(path) is expected
|
||||
97
pilot/utils/ignore.py
Normal file
97
pilot/utils/ignore.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from fnmatch import fnmatch
|
||||
import os.path
|
||||
from typing import Optional
|
||||
|
||||
from const.common import IGNORE_PATHS, IGNORE_SIZE_THRESHOLD
|
||||
|
||||
|
||||
class IgnoreMatcher:
|
||||
def __init__(self,
|
||||
ignore_paths: Optional[list[str]] = None,
|
||||
*,
|
||||
root_path: Optional[None] = None,
|
||||
ignore_binaries: bool = True,
|
||||
ignore_large_files: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the IgnoreMatcher object.
|
||||
|
||||
The passed paths (optional) are *added* to the list of
|
||||
ignore paths from `const.common.IGNORE_PATHS`.
|
||||
|
||||
:param ignore_paths: List of paths to ignore (optional)
|
||||
"""
|
||||
if ignore_paths is None:
|
||||
ignore_paths = []
|
||||
|
||||
self.ignore_paths = ignore_paths + IGNORE_PATHS
|
||||
self.ignore_binaries = ignore_binaries
|
||||
self.ignore_large_files = ignore_large_files
|
||||
self.root_path = root_path
|
||||
|
||||
def ignore(self, path: str) -> bool:
|
||||
"""
|
||||
Check if the given path matches any of the ignore patterns.
|
||||
|
||||
Specified path can be either the full path, or a relative path
|
||||
(if root_path was set in the constructor).
|
||||
|
||||
:param path: Path to the file or directory to check
|
||||
:return: True if the path matches any of the ignore patterns, False otherwise
|
||||
"""
|
||||
|
||||
# Turn into absolute (full) path
|
||||
if self.root_path and not path.startswith(self.root_path):
|
||||
path = os.path.join(self.root_path, path)
|
||||
|
||||
if self.is_in_ignore_list(path):
|
||||
return True
|
||||
|
||||
if self.ignore_large_files and self.is_large_file(path):
|
||||
return True
|
||||
|
||||
if self.ignore_binaries and self.is_binary(path):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def is_in_ignore_list(self, path: str) -> bool:
|
||||
"""
|
||||
Check if the given path matches any of the ignore patterns.
|
||||
|
||||
:param path: The path to the file or directory to check
|
||||
:return: True if the path matches any of the ignore patterns, False otherwise.
|
||||
"""
|
||||
name = os.path.basename(path)
|
||||
for pattern in self.ignore_paths:
|
||||
if fnmatch(name, pattern):
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_large_file(self, path: str) -> bool:
|
||||
"""
|
||||
Check if the given file is larger than the threshold.
|
||||
|
||||
:param path: FULL path to the file to check.
|
||||
:return: True if the file is larger than the threshold, False otherwise.
|
||||
"""
|
||||
if not os.path.isfile(path):
|
||||
return False
|
||||
|
||||
return bool(os.path.getsize(path) > IGNORE_SIZE_THRESHOLD)
|
||||
|
||||
def is_binary(self, path: str) -> bool:
|
||||
"""
|
||||
Check if the given file is binary.
|
||||
|
||||
:param path: FULL path to the file to check.
|
||||
:return: True if the file is binary, False otherwise.
|
||||
"""
|
||||
if not os.path.isfile(path):
|
||||
return False
|
||||
|
||||
try:
|
||||
open(path, "r", encoding="utf-8").read(128*1024)
|
||||
return False
|
||||
except UnicodeDecodeError:
|
||||
return True
|
||||
Reference in New Issue
Block a user