Compare commits

...

2 Commits

Author SHA1 Message Date
openhands 80f1da66f1 Improve git changes fix with better error handling and reduced cache time
- Fix _get_default_branch() to handle repositories without remote origin
- Reduce frontend cache stale time from 5 minutes to 30 seconds for fresher data
- Add fallback to 'main' branch when remote origin is not available
- Improve robustness for edge cases like local-only repositories
2025-07-22 15:43:14 +00:00
openhands 50cd559cd4 Fix git changes showing merged files as user changes
This commit fixes an issue where the changes tab would show all merged files
from the main branch as if they were user-created changes after performing
a merge operation.

The problem occurred when:
1. User creates a branch that is NOT up-to-date with main
2. User asks OpenHands to fetch and merge latest changes from main
3. The changes tab would show all merged files as 'Added' changes
4. Only a refresh would fix the display

Root cause:
The _get_valid_ref() method was always preferring the remote tracking branch
(origin/feature-branch) as the comparison base. After a merge, this remote
branch still pointed to the old commit before the merge, causing git diff
to show all merged changes as if they were new user changes.

Solution:
- Added _has_diverged_from_remote_tracking_branch() method to detect when
  the local branch has diverged from its remote tracking branch due to merges
- Modified _get_valid_ref() to use merge-base with the default branch when
  divergence is detected, preventing merged changes from appearing as user changes
- Added comprehensive tests to verify the fix works correctly

The fix ensures that only actual user changes are shown in the changes tab,
while merged changes from the main branch are properly excluded.
2025-07-22 15:16:48 +00:00
3 changed files with 262 additions and 3 deletions
@@ -15,7 +15,7 @@ export const useGetGitChanges = () => {
queryKey: ["file_changes", conversationId],
queryFn: () => OpenHands.getGitChanges(conversationId),
retry: false,
staleTime: 1000 * 60 * 5, // 5 minutes
staleTime: 1000 * 30, // 30 seconds (reduced from 5 minutes to ensure fresher data after git operations)
gcTime: 1000 * 60 * 15, // 15 minutes
enabled: runtimeIsReady && !!conversationId,
meta: {
+94 -2
View File
@@ -79,6 +79,9 @@ class GitHandler:
"""
Determines a valid Git reference for comparison.
This method intelligently selects a comparison base that avoids showing
merged changes as if they were user-created changes.
Returns:
str | None: A valid Git reference or None if no valid reference is found.
"""
@@ -90,8 +93,23 @@ class GitHandler:
ref_default_branch = 'origin/' + default_branch
ref_new_repo = '$(git --no-pager rev-parse --verify 4b825dc642cb6eb9a060e54bf8d69288fbee4904)' # compares with empty tree
# Check if the current branch's remote tracking branch exists
if self._verify_ref_exists(ref_current_branch):
# Check if the current HEAD has diverged significantly from the remote tracking branch
# This happens after merging changes from the default branch
if self._has_diverged_from_remote_tracking_branch(
current_branch, default_branch
):
# If we've diverged due to a merge, prefer using merge-base with default branch
# This prevents showing merged changes as user changes
if self._verify_ref_exists(ref_non_default_branch):
return ref_non_default_branch
# If no significant divergence or merge-base doesn't exist, use the remote tracking branch
return ref_current_branch
# Fallback to other references if remote tracking branch doesn't exist
refs = [
ref_current_branch,
ref_non_default_branch,
ref_default_branch,
ref_new_repo,
@@ -102,6 +120,73 @@ class GitHandler:
return None
def _has_diverged_from_remote_tracking_branch(
self, current_branch: str, default_branch: str
) -> bool:
"""
Checks if the current branch has diverged significantly from its remote tracking branch.
This typically happens after merging changes from the default branch, where the local
branch has new commits but the remote tracking branch hasn't been updated yet.
Args:
current_branch (str): The name of the current branch.
default_branch (str): The name of the default branch.
Returns:
bool: True if the branch has diverged due to a merge, False otherwise.
"""
try:
# Get the commit hash of the current HEAD
head_cmd = 'git --no-pager rev-parse HEAD'
head_result = self.execute(head_cmd, self.cwd)
if head_result.exit_code != 0:
return False
current_head = head_result.content.strip()
# Get the commit hash of the remote tracking branch
remote_branch_cmd = f'git --no-pager rev-parse origin/{current_branch}'
remote_result = self.execute(remote_branch_cmd, self.cwd)
if remote_result.exit_code != 0:
return False
remote_head = remote_result.content.strip()
# If they're the same, no divergence
if current_head == remote_head:
return False
# Check if the current HEAD is ahead of the remote tracking branch
ahead_cmd = f'git --no-pager rev-list --count origin/{current_branch}..HEAD'
ahead_result = self.execute(ahead_cmd, self.cwd)
if ahead_result.exit_code != 0:
return False
commits_ahead = int(ahead_result.content.strip())
# Check if the current HEAD contains commits from the default branch
# that are not in the remote tracking branch
if commits_ahead > 0:
# Check if any of the commits ahead are merge commits or contain changes from default branch
merge_check_cmd = f'git --no-pager log --oneline --merges origin/{current_branch}..HEAD'
merge_result = self.execute(merge_check_cmd, self.cwd)
# If there are merge commits, this indicates a divergence due to merging
if merge_result.exit_code == 0 and merge_result.content.strip():
return True
# Also check if the commits ahead include changes that exist in the default branch
# This catches cases where changes were merged without creating a merge commit
if (
commits_ahead >= 2
): # Threshold to avoid false positives for single commits
return True
return False
except (ValueError, Exception):
# If any error occurs, assume no divergence to be safe
return False
def _get_ref_content(self, file_path: str) -> str:
"""
Retrieves the content of a file from a valid Git reference.
@@ -129,7 +214,14 @@ class GitHandler:
"""
cmd = 'git --no-pager remote show origin | grep "HEAD branch"'
output = self.execute(cmd, self.cwd)
return output.content.split()[-1].strip()
if output.exit_code != 0 or not output.content.strip():
# Fallback to 'main' if no remote origin exists
return 'main'
parts = output.content.split()
if len(parts) == 0:
return 'main'
return parts[-1].strip()
def _get_current_branch(self) -> str:
"""
+167
View File
@@ -0,0 +1,167 @@
import os
import shutil
import subprocess
import tempfile
import unittest
from openhands.runtime.utils.git_handler import CommandResult, GitHandler
class TestGitHandlerMergeFix(unittest.TestCase):
"""Test the fix for git changes showing merged files as user changes."""
def setUp(self):
# Create temporary directories for our test repositories
self.test_dir = tempfile.mkdtemp()
self.origin_dir = os.path.join(self.test_dir, 'origin')
self.local_dir = os.path.join(self.test_dir, 'local')
# Create the directories
os.makedirs(self.origin_dir, exist_ok=True)
os.makedirs(self.local_dir, exist_ok=True)
# Track executed commands for verification
self.executed_commands = []
# Initialize the GitHandler with our real execute function
self.git_handler = GitHandler(self._execute_command)
self.git_handler.set_cwd(self.local_dir)
# Set up the git repositories
self._setup_git_repos()
def tearDown(self):
# Clean up the temporary directories
shutil.rmtree(self.test_dir)
def _execute_command(self, cmd, cwd=None):
"""Execute a shell command and return the result."""
self.executed_commands.append((cmd, cwd))
try:
result = subprocess.run(
cmd, shell=True, cwd=cwd, capture_output=True, text=True, check=False
)
return CommandResult(result.stdout, result.returncode)
except Exception as e:
return CommandResult(str(e), 1)
def _setup_git_repos(self):
"""Set up git repositories for testing the merge scenario."""
# Set up origin repository
self._execute_command('git init --initial-branch=main', self.origin_dir)
self._execute_command(
"git config user.email 'test@example.com'", self.origin_dir
)
self._execute_command("git config user.name 'Test User'", self.origin_dir)
# Create initial file and commit
with open(os.path.join(self.origin_dir, 'file1.txt'), 'w') as f:
f.write('Initial content\n')
self._execute_command('git add file1.txt', self.origin_dir)
self._execute_command("git commit -m 'Initial commit'", self.origin_dir)
# Clone to local
self._execute_command(f'git clone {self.origin_dir} {self.local_dir}')
self._execute_command(
"git config user.email 'test@example.com'", self.local_dir
)
self._execute_command("git config user.name 'Test User'", self.local_dir)
# Create a feature branch
self._execute_command('git checkout -b feature-branch', self.local_dir)
# Make some changes on feature branch
with open(os.path.join(self.local_dir, 'feature_file.txt'), 'w') as f:
f.write('Feature content\n')
self._execute_command('git add feature_file.txt', self.local_dir)
self._execute_command("git commit -m 'Add feature file'", self.local_dir)
# Push feature branch to origin
self._execute_command('git push -u origin feature-branch', self.local_dir)
# Now simulate main branch getting ahead
# Switch to main in origin and add more commits
self._execute_command('git checkout main', self.origin_dir)
with open(os.path.join(self.origin_dir, 'main_file1.txt'), 'w') as f:
f.write('Main content 1\n')
self._execute_command('git add main_file1.txt', self.origin_dir)
self._execute_command("git commit -m 'Add main file 1'", self.origin_dir)
with open(os.path.join(self.origin_dir, 'main_file2.txt'), 'w') as f:
f.write('Main content 2\n')
self._execute_command('git add main_file2.txt', self.origin_dir)
self._execute_command("git commit -m 'Add main file 2'", self.origin_dir)
def test_git_changes_before_merge(self):
"""Test that git changes shows no changes before merge."""
changes = self.git_handler.get_git_changes()
self.assertEqual(changes, [])
def test_git_changes_after_merge_shows_only_user_changes(self):
"""Test that git changes after merge shows only user changes, not merged files."""
# First, fetch latest changes from main
self._execute_command('git fetch origin', self.local_dir)
# Merge main into feature branch
self._execute_command('git merge origin/main', self.local_dir)
# Clear executed commands to start fresh for the git handler calls
self.executed_commands = []
# Get git changes after merge
changes = self.git_handler.get_git_changes()
# Should only show the feature file, not the merged files
self.assertIsNotNone(changes)
self.assertEqual(len(changes), 1)
self.assertEqual(changes[0]['path'], 'feature_file.txt')
self.assertEqual(changes[0]['status'], 'A')
# Verify that the merged files are not shown as changes
paths = [change['path'] for change in changes]
self.assertNotIn('main_file1.txt', paths)
self.assertNotIn('main_file2.txt', paths)
def test_divergence_detection_after_merge(self):
"""Test that divergence detection correctly identifies merge scenarios."""
# Before merge, should not detect divergence
has_diverged_before = (
self.git_handler._has_diverged_from_remote_tracking_branch(
'feature-branch', 'main'
)
)
self.assertFalse(has_diverged_before)
# Fetch and merge
self._execute_command('git fetch origin', self.local_dir)
self._execute_command('git merge origin/main', self.local_dir)
# After merge, should detect divergence
has_diverged_after = self.git_handler._has_diverged_from_remote_tracking_branch(
'feature-branch', 'main'
)
self.assertTrue(has_diverged_after)
def test_valid_ref_selection_after_merge(self):
"""Test that _get_valid_ref selects merge-base after detecting divergence."""
# Fetch and merge
self._execute_command('git fetch origin', self.local_dir)
self._execute_command('git merge origin/main', self.local_dir)
# Clear executed commands to start fresh
self.executed_commands = []
# Get valid ref
valid_ref = self.git_handler._get_valid_ref()
# Should use merge-base instead of remote tracking branch
self.assertIsNotNone(valid_ref)
self.assertIn('merge-base', valid_ref)
self.assertNotEqual(valid_ref, 'origin/feature-branch')
if __name__ == '__main__':
unittest.main()