From 75b3270cf08fff4f72d99f29f6322598dda4fede Mon Sep 17 00:00:00 2001 From: Rohit Malhotra Date: Wed, 2 Apr 2025 15:21:19 -0400 Subject: [PATCH] [Bug]: Ensure repository search only returns public repositories (#7665) Co-authored-by: openhands --- .../integrations/github/github_service.py | 19 ++++++++++++----- .../integrations/gitlab/gitlab_service.py | 21 ++++++++++--------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/openhands/integrations/github/github_service.py b/openhands/integrations/github/github_service.py index dbfcc71d59..238a40a87e 100644 --- a/openhands/integrations/github/github_service.py +++ b/openhands/integrations/github/github_service.py @@ -104,13 +104,15 @@ class GitHubService(GitService): ) -> list[Repository]: MAX_REPOS = 1000 PER_PAGE = 100 # Maximum allowed by GitHub API - all_repos: list[dict]= [] + all_repos: list[dict] = [] page = 1 while len(all_repos) < MAX_REPOS: params = {'page': str(page), 'per_page': str(PER_PAGE)} if installation_id: - url = f'{self.BASE_URL}/user/installations/{installation_id}/repositories' + url = ( + f'{self.BASE_URL}/user/installations/{installation_id}/repositories' + ) response, headers = await self._fetch_data(url, params) response = response.get('repositories', []) else: @@ -136,7 +138,7 @@ class GitHubService(GitService): id=repo.get('id'), full_name=repo.get('full_name'), stargazers_count=repo.get('stargazers_count'), - git_provider=ProviderType.GITHUB + git_provider=ProviderType.GITHUB, ) for repo in all_repos ] @@ -151,7 +153,14 @@ class GitHubService(GitService): self, query: str, per_page: int, sort: str, order: str ) -> list[Repository]: url = f'{self.BASE_URL}/search/repositories' - params = {'q': query, 'per_page': per_page, 'sort': sort, 'order': order} + # Add is:public to the query to ensure we only search for public repositories + query_with_visibility = f'{query} is:public' + params = { + 'q': query_with_visibility, + 'per_page': per_page, + 'sort': sort, + 'order': order, + } response, _ = await self._fetch_data(url, params) repos = response.get('items', []) @@ -161,7 +170,7 @@ class GitHubService(GitService): id=repo.get('id'), full_name=repo.get('full_name'), stargazers_count=repo.get('stargazers_count'), - git_provider=ProviderType.GITHUB + git_provider=ProviderType.GITHUB, ) for repo in repos ] diff --git a/openhands/integrations/gitlab/gitlab_service.py b/openhands/integrations/gitlab/gitlab_service.py index 8446c3fc4c..1d646b900f 100644 --- a/openhands/integrations/gitlab/gitlab_service.py +++ b/openhands/integrations/gitlab/gitlab_service.py @@ -1,8 +1,8 @@ import os from typing import Any +from urllib.parse import quote_plus import httpx -from urllib.parse import quote_plus from pydantic import SecretStr from openhands.integrations.service_types import ( @@ -98,25 +98,26 @@ class GitLabService(GitService): async def search_repositories( self, query: str, per_page: int = 30, sort: str = 'updated', order: str = 'desc' ) -> list[Repository]: - url = f'{self.BASE_URL}/search' + url = f'{self.BASE_URL}/projects' params = { - 'scope': 'projects', 'search': query, 'per_page': per_page, - 'order_by': sort, + 'order_by': 'last_activity_at', 'sort': order, + 'visibility': 'public', } + response, _ = await self._fetch_data(url, params) repos = [ Repository( id=repo.get('id'), full_name=repo.get('path_with_namespace'), stargazers_count=repo.get('star_count'), - git_provider=ProviderType.GITLAB + git_provider=ProviderType.GITLAB, ) for repo in response ] - + return repos async def get_repositories( @@ -124,7 +125,7 @@ class GitLabService(GitService): ) -> list[Repository]: if installation_id: return [] # Not implementing installation_token case yet - + MAX_REPOS = 1000 PER_PAGE = 100 # Maximum allowed by GitLab API all_repos: list[dict] = [] @@ -136,7 +137,7 @@ class GitLabService(GitService): 'pushed': 'last_activity_at', 'updated': 'last_activity_at', 'created': 'created_at', - 'full_name': 'name' + 'full_name': 'name', }.get(sort, 'last_activity_at') while len(all_repos) < MAX_REPOS: @@ -146,7 +147,7 @@ class GitLabService(GitService): 'order_by': order_by, 'sort': 'desc', # GitLab uses sort for direction (asc/desc) 'owned': 1, # Use 1 instead of True - 'membership': 1 # Use 1 instead of True + 'membership': 1, # Use 1 instead of True } response, headers = await self._fetch_data(url, params) @@ -168,7 +169,7 @@ class GitLabService(GitService): id=repo.get('id'), full_name=repo.get('path_with_namespace'), stargazers_count=repo.get('star_count'), - git_provider=ProviderType.GITLAB + git_provider=ProviderType.GITLAB, ) for repo in all_repos ]