Files
openclaw/scripts/sync-credits.py

429 lines
13 KiB
Python

#!/usr/bin/env python3
"""
Sync maintainers and contributors in docs/reference/credits.md from git/GitHub.
- Maintainers: people who have merged PRs (via GitHub API) + direct pushes to main
- Contributors: all unique commit authors on main with commit counts
Usage: python scripts/sync-credits.py
"""
import re
import subprocess
from datetime import datetime, timezone
from pathlib import Path
REPO_ROOT = Path(__file__).parent.parent
CREDITS_FILE = REPO_ROOT / "docs" / "reference" / "credits.md"
REPO = "openclaw/openclaw"
# Exclude bot accounts from maintainer list
EXCLUDED_MAINTAINERS = {
"app/clawdinator",
"clawdinator",
"github-actions",
"dependabot",
}
# Exclude bot/system names from contributor list
EXCLUDED_CONTRIBUTORS = {
"GitHub",
"github-actions[bot]",
"dependabot[bot]",
"clawdinator[bot]",
"blacksmith-sh[bot]",
"google-labs-jules[bot]",
"Maude Bot",
"Pocket Clawd",
"Ghost",
"Gregor's Bot",
"Jarvis",
"Jarvis Deploy",
"CI",
"Ubuntu",
"user",
"Developer",
# Bot names that appear in git history
"CLAWDINATOR Bot",
"Clawd",
"Clawdbot",
"Clawdbot Maintainers",
"Claude Code",
"L36 Server",
"seans-openclawbot",
"therealZpoint-bot",
"Vultr-Clawd Admin",
"hyf0-agent",
}
# Minimum merged PRs to be considered a maintainer
MIN_MERGES = 2
# Regex to extract GitHub username from noreply email
# Matches: ID+username@users.noreply.github.com or username@users.noreply.github.com
GITHUB_NOREPLY_RE = re.compile(r"^(?:\d+\+)?([^@]+)@users\.noreply\.github\.com$", re.I)
def extract_github_username(email: str) -> str | None:
"""Extract GitHub username from noreply email, or return None."""
match = GITHUB_NOREPLY_RE.match(email)
return match.group(1).lower() if match else None
def sanitize_name(name: str) -> str:
"""Sanitize name for MDX by removing curly braces (which MDX interprets as JS)."""
return name.replace("{", "").replace("}", "").strip()
def run_git(*args: str) -> str:
"""Run git command and return stdout."""
result = subprocess.run(
["git", *args],
cwd=REPO_ROOT,
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
check=True,
)
return result.stdout.strip()
def run_gh(*args: str) -> str:
"""Run gh CLI command and return stdout."""
result = subprocess.run(
["gh", *args],
cwd=REPO_ROOT,
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
check=True,
)
return result.stdout.strip()
def categorize_commit_files(files: list[str]) -> str:
"""Categorize a commit based on its changed files.
Returns: 'ci', 'docs only', 'docs', or 'other'
- 'ci': any commit with CI files (.github/, scripts/ci*)
- 'docs only': only documentation files (docs/ or any .md)
- 'docs': docs + other files mixed
- 'other': code without CI or docs
"""
has_ci = False
has_docs = False
has_other = False
for f in files:
f_lower = f.lower()
if f_lower.startswith(".github/") or f_lower.startswith("scripts/ci"):
has_ci = True
elif f_lower.startswith("docs/") or f_lower.endswith(".md"):
has_docs = True
else:
has_other = True
# CI takes priority if present
if has_ci:
return "ci"
if has_other:
if has_docs:
return "docs" # Mixed: docs + other
return "other" # Pure code
if has_docs:
return "docs only" # Pure docs
return "other"
def get_maintainers() -> list[tuple[str, int, dict[str, int]]]:
"""Get maintainers with (login, merge_count, push_counts_by_category).
- Merges: from GitHub API (who clicked "merge")
- Direct pushes: non-merge commits to main (by committer name matching login)
categorized into 'ci', 'docs', 'other'
"""
# 1. Fetch ALL merged PRs using gh pr list (handles pagination automatically)
print(" Fetching merged PRs from GitHub API...")
output = run_gh(
"pr",
"list",
"--repo",
REPO,
"--state",
"merged",
"--limit",
"10000",
"--json",
"mergedBy",
"--jq",
".[].mergedBy.login",
)
merge_counts: dict[str, int] = {}
if output:
for login in output.strip().splitlines():
login = login.strip()
if login and login not in EXCLUDED_MAINTAINERS:
merge_counts[login] = merge_counts.get(login, 0) + 1
print(
f" Found {sum(merge_counts.values())} merged PRs by {len(merge_counts)} users"
)
# 2. Count direct pushes (non-merge commits by committer) with categories
# Use GitHub username from noreply emails, or committer name as fallback
print(" Counting direct pushes from git history...")
# push_counts[key] = {"ci": N, "docs only": N, "docs": N, "other": N}
push_counts: dict[str, dict[str, int]] = {}
# Get commits with files using a delimiter to parse
output = run_git(
"log", "main", "--no-merges", "--format=COMMIT|%cN|%cE", "--name-only"
)
current_key: str | None = None
current_files: list[str] = []
def flush_commit() -> None:
nonlocal current_key, current_files
if current_key and current_files:
category = categorize_commit_files(current_files)
if current_key not in push_counts:
push_counts[current_key] = {
"ci": 0,
"docs only": 0,
"docs": 0,
"other": 0,
}
push_counts[current_key][category] += 1
current_key = None
current_files = []
for line in output.splitlines():
line = line.strip()
if not line:
continue
if line.startswith("COMMIT|"):
# Flush previous commit
flush_commit()
# Parse new commit
parts = line.split("|", 2)
if len(parts) < 3:
continue
_, name, email = parts
name = name.strip()
email = email.strip().lower()
if not name or name in EXCLUDED_CONTRIBUTORS:
current_key = None
continue
# Use GitHub username from noreply email if available
gh_user = extract_github_username(email)
current_key = gh_user if gh_user else name.lower()
else:
# This is a file path
if current_key:
current_files.append(line)
# Flush last commit
flush_commit()
# 3. Build maintainer list: anyone with merges >= MIN_MERGES
maintainers: list[tuple[str, int, dict[str, int]]] = []
for login, merges in merge_counts.items():
if merges >= MIN_MERGES:
# Try to find matching push count (case-insensitive)
pushes = push_counts.get(
login.lower(), {"ci": 0, "docs only": 0, "docs": 0, "other": 0}
)
maintainers.append((login, merges, pushes))
# Sort by total activity (merges + sum of pushes) descending
maintainers.sort(key=lambda x: (-(x[1] + sum(x[2].values())), x[0].lower()))
return maintainers
def get_contributors() -> list[tuple[str, int]]:
"""Get all unique commit authors on main with commit counts.
Merges authors by:
1. GitHub username (extracted from noreply emails)
2. Author name matching a known GitHub username
3. Display name (case-insensitive) as final fallback
"""
output = run_git("log", "main", "--format=%aN|%aE")
if not output:
return []
# First pass: collect all known GitHub usernames from noreply emails
known_github_users: set[str] = set()
for line in output.splitlines():
line = line.strip()
if not line or "|" not in line:
continue
_, email = line.rsplit("|", 1)
email = email.strip().lower()
if not email:
continue
gh_user = extract_github_username(email)
if gh_user:
known_github_users.add(gh_user)
# Second pass: count commits and pick canonical names
# Key priority: gh:username > name:lowercasename
counts: dict[str, int] = {}
canonical: dict[str, str] = {} # key -> preferred display name
for line in output.splitlines():
line = line.strip()
if not line or "|" not in line:
continue
name, email = line.rsplit("|", 1)
name = name.strip()
email = email.strip().lower()
if not name or not email or name in EXCLUDED_CONTRIBUTORS:
continue
# Sanitize name for MDX safety and consistent deduplication
sanitized = sanitize_name(name)
if not sanitized:
continue
# Determine the merge key:
# 1. If email is a noreply email, use the extracted GitHub username
# 2. If the author name matches a known GitHub username, use that
# 3. Otherwise use the sanitized display name (case-insensitive)
gh_user = extract_github_username(email)
if gh_user:
key = f"gh:{gh_user}"
elif sanitized.lower() in known_github_users:
key = f"gh:{sanitized.lower()}"
else:
key = f"name:{sanitized.lower()}"
counts[key] = counts.get(key, 0) + 1
# Prefer capitalized version, or longer name (more specific)
if key not in canonical or (
(sanitized[0].isupper() and not canonical[key][0].isupper())
or (
sanitized[0].isupper() == canonical[key][0].isupper()
and len(sanitized) > len(canonical[key])
)
):
canonical[key] = sanitized
# Build list with counts, sorted by count descending then name
contributors = [(canonical[key], count) for key, count in counts.items()]
contributors.sort(key=lambda x: (-x[1], x[0].lower()))
return contributors
def update_credits(
maintainers: list[tuple[str, int, dict[str, int]]],
contributors: list[tuple[str, int]],
) -> None:
"""Update the credits.md file with maintainers and contributors."""
content = CREDITS_FILE.read_text(encoding="utf-8")
# Build maintainers section (GitHub usernames with profile links)
maintainer_lines = []
for login, merges, push_cats in maintainers:
total_pushes = sum(push_cats.values())
if total_pushes > 0:
# Build categorized push breakdown
push_parts = []
if push_cats.get("ci", 0) > 0:
push_parts.append(f"{push_cats['ci']} ci")
if push_cats.get("docs only", 0) > 0:
push_parts.append(f"{push_cats['docs only']} docs only")
if push_cats.get("docs", 0) > 0:
push_parts.append(f"{push_cats['docs']} docs")
if push_cats.get("other", 0) > 0:
push_parts.append(f"{push_cats['other']} other")
push_str = ", ".join(push_parts)
line = f"- [@{login}](https://github.com/{login}) ({merges} merges, {total_pushes} direct changes: {push_str})"
else:
line = f"- [@{login}](https://github.com/{login}) ({merges} merges)"
maintainer_lines.append(line)
maintainer_section = (
"\n".join(maintainer_lines)
if maintainer_lines
else "_No maintainers detected._"
)
# Build contributors section with commit counts
# Sanitize names to avoid MDX interpreting special characters (like {}) as JS
contributor_lines = [
f"{sanitize_name(name)} ({count})" for name, count in contributors
]
contributor_section = (
", ".join(contributor_lines)
if contributor_lines
else "_No contributors detected._"
)
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
contributor_section = f"{len(contributors)} contributors: {contributor_section}\n\n_Last updated: {timestamp}_"
# Replace sections by finding markers and rebuilding
lines = content.split("\n")
result = []
skip_until_next_section = False
i = 0
while i < len(lines):
line = lines[i]
if line == "## Maintainers":
result.append(line)
result.append("")
result.append(maintainer_section)
skip_until_next_section = True
i += 1
continue
if line == "## Contributors":
result.append("")
result.append(line)
result.append("")
result.append(contributor_section)
skip_until_next_section = True
i += 1
continue
# Check if we hit the next section
if skip_until_next_section and (
line.startswith("## ") or line.startswith("> ")
):
skip_until_next_section = False
result.append("") # blank line before next section
if not skip_until_next_section:
result.append(line)
i += 1
content = "\n".join(result)
CREDITS_FILE.write_text(content, encoding="utf-8")
print(f"Updated {CREDITS_FILE}")
print(f" Maintainers: {len(maintainers)}")
print(f" Contributors: {len(contributors)}")
def main() -> None:
print("Syncing credits from git/GitHub...")
maintainers = get_maintainers()
contributors = get_contributors()
update_credits(maintainers, contributors)
if __name__ == "__main__":
main()