feat(backend): Add ClamAV scanning for local file paths (#11988)

## Context

From PR #11796 review discussion. Files processed by the video blocks
(downloads, uploads, generated videos) should be scanned through ClamAV
for malware detection.

## Problem

`store_media_file()` in `backend/util/file.py` already scans:
- `workspace://` references
- Cloud storage paths  
- Data URIs (`data:...`)
- HTTP/HTTPS URLs

**But local file paths were NOT scanned.** The `else` branch only
verified the file exists.

This gap affected video processing blocks (e.g., `LoopVideoBlock`,
`AddAudioToVideoBlock`) that:
1. Download/receive input media
2. Process it locally (loop, add audio, etc.)
3. Write output to temp directory
4. Call `store_media_file(output_filename, ...)` with a local path →
**skipped virus scanning**

## Solution

Added virus scanning to the local file path branch:
```python
# Virus scan the local file before any further processing
local_content = target_path.read_bytes()
if len(local_content) > MAX_FILE_SIZE_BYTES:
    raise ValueError(...)
await scan_content_safe(local_content, filename=sanitized_file)
```

## Changes

- `backend/util/file.py` - Added ~7 lines to scan local files
(consistent with other input types)
- `backend/util/file_test.py` - Added 2 test cases for local file
scanning

## Risk Assessment

- **Low risk:** Single point of change, follows existing pattern
- **Backwards compatible:** No API changes
- **Fail-safe:** If scanning fails, file is rejected (existing behavior)

Closes SECRT-1904

Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
This commit is contained in:
Otto
2026-02-09 00:24:18 +00:00
committed by GitHub
parent 98dd1a9480
commit a329831b0b
2 changed files with 105 additions and 0 deletions

View File

@@ -342,6 +342,14 @@ async def store_media_file(
if not target_path.is_file():
raise ValueError(f"Local file does not exist: {target_path}")
# Virus scan the local file before any further processing
local_content = target_path.read_bytes()
if len(local_content) > MAX_FILE_SIZE_BYTES:
raise ValueError(
f"File too large: {len(local_content)} bytes > {MAX_FILE_SIZE_BYTES} bytes"
)
await scan_content_safe(local_content, filename=sanitized_file)
# Return based on requested format
if return_format == "for_local_processing":
# Use when processing files locally with tools like ffmpeg, MoviePy, PIL

View File

@@ -247,3 +247,100 @@ class TestFileCloudIntegration:
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_local_processing",
)
@pytest.mark.asyncio
async def test_store_media_file_local_path_scanned(self):
"""Test that local file paths are scanned for viruses."""
graph_exec_id = "test-exec-123"
local_file = "test_video.mp4"
file_content = b"fake video content"
with patch(
"backend.util.file.get_cloud_storage_handler"
) as mock_handler_getter, patch(
"backend.util.file.scan_content_safe"
) as mock_scan, patch(
"backend.util.file.Path"
) as mock_path_class:
# Mock cloud storage handler - not a cloud path
mock_handler = MagicMock()
mock_handler.is_cloud_path.return_value = False
mock_handler_getter.return_value = mock_handler
# Mock virus scanner
mock_scan.return_value = None
# Mock file system operations
mock_base_path = MagicMock()
mock_target_path = MagicMock()
mock_resolved_path = MagicMock()
mock_path_class.return_value = mock_base_path
mock_base_path.mkdir = MagicMock()
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
mock_target_path.resolve.return_value = mock_resolved_path
mock_resolved_path.is_relative_to.return_value = True
mock_resolved_path.is_file.return_value = True
mock_resolved_path.read_bytes.return_value = file_content
mock_resolved_path.relative_to.return_value = Path(local_file)
mock_resolved_path.name = local_file
result = await store_media_file(
file=MediaFileType(local_file),
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_local_processing",
)
# Verify virus scan was called for local file
mock_scan.assert_called_once_with(file_content, filename=local_file)
# Result should be the relative path
assert str(result) == local_file
@pytest.mark.asyncio
async def test_store_media_file_local_path_virus_detected(self):
"""Test that infected local files raise VirusDetectedError."""
from backend.api.features.store.exceptions import VirusDetectedError
graph_exec_id = "test-exec-123"
local_file = "infected.exe"
file_content = b"malicious content"
with patch(
"backend.util.file.get_cloud_storage_handler"
) as mock_handler_getter, patch(
"backend.util.file.scan_content_safe"
) as mock_scan, patch(
"backend.util.file.Path"
) as mock_path_class:
# Mock cloud storage handler - not a cloud path
mock_handler = MagicMock()
mock_handler.is_cloud_path.return_value = False
mock_handler_getter.return_value = mock_handler
# Mock virus scanner to detect virus
mock_scan.side_effect = VirusDetectedError(
"EICAR-Test-File", "File rejected due to virus detection"
)
# Mock file system operations
mock_base_path = MagicMock()
mock_target_path = MagicMock()
mock_resolved_path = MagicMock()
mock_path_class.return_value = mock_base_path
mock_base_path.mkdir = MagicMock()
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
mock_target_path.resolve.return_value = mock_resolved_path
mock_resolved_path.is_relative_to.return_value = True
mock_resolved_path.is_file.return_value = True
mock_resolved_path.read_bytes.return_value = file_content
with pytest.raises(VirusDetectedError):
await store_media_file(
file=MediaFileType(local_file),
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_local_processing",
)