build: speed up apply_all_patches by ~60% (#50417)

git am rewrites the index 2-3x per patch. In Chromium (~500K files,
70MB index) this dominated wall time: ~67 of 73 seconds were spent
rehashing and rewriting the index ~300 times for 150 patches.

- Add index.skipHash=true to skip recomputing the trailing SHA over
  the full index on every write
- Force index v4 before am so path-prefix compression roughly halves
  the on-disk index size (70MB -> 40MB)
- Disable core.fsync and gc.auto during am since a crashed apply is
  just re-run from a clean reset
- Apply patch targets in parallel (capped at ncpu-2); Chromium still
  dominates but this hides node/nan/etc behind it. Falls back to
  sequential on roller/ branches where conflict output needs to be
  readable.
- Prefix each output line with the target name so parallel output is
  attributable

Measured on a 13-target config with 238 total patches: 73s -> 28s.
This commit is contained in:
Samuel Attard
2026-03-23 04:49:48 -04:00
committed by GitHub
parent b8fa540fd3
commit 882a6b2cf9
2 changed files with 78 additions and 5 deletions

View File

@@ -1,13 +1,16 @@
#!/usr/bin/env python3
import argparse
import concurrent.futures
import json
import os
import subprocess
import warnings
from lib import git
from lib.patches import patch_from_dir
ELECTRON_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
THREEWAY = "ELECTRON_USE_THREE_WAY_MERGE_FOR_PATCHES" in os.environ
def apply_patches(target):
@@ -19,14 +22,43 @@ def apply_patches(target):
git.import_patches(
committer_email="scripts@electron",
committer_name="Electron Scripts",
output_prefix=f'[{os.path.basename(patch_dir)}] ',
patch_data=patch_from_dir(patch_dir),
repo=repo,
threeway=THREEWAY,
)
def is_roller_branch():
try:
branch = subprocess.check_output(
['git', '-C', ELECTRON_DIR, 'rev-parse', '--abbrev-ref', 'HEAD'],
stderr=subprocess.DEVNULL,
).decode('utf-8').strip()
return branch.startswith('roller/')
except subprocess.CalledProcessError:
return False
def apply_config(config):
for target in config:
apply_patches(target)
# Targets are independent git repos, so apply in parallel. The work is
# subprocess-bound (git am), so threads are sufficient. On roller/
# branches, patch conflicts are expected and interleaved failure output
# from multiple repos is hard to read, so force sequential there.
if is_roller_branch():
max_workers = 1
else:
max_workers = max(1, (os.cpu_count() or 4) - 2)
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
futures = {pool.submit(apply_patches, t): t for t in config}
failed = []
for f in concurrent.futures.as_completed(futures):
try:
f.result()
except Exception as e: # pylint: disable=broad-except
failed.append((futures[f].get('repo'), e))
if failed:
for repo, e in failed:
print(f'ERROR applying patches to {repo}: {e}')
raise failed[0][1]
def parse_args():
parser = argparse.ArgumentParser(description='Apply Electron patches')

View File

@@ -12,6 +12,7 @@ import posixpath
import re
import subprocess
import sys
import threading
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(SCRIPT_DIR)
@@ -51,7 +52,8 @@ def get_repo_root(path):
def am(repo, patch_data, threeway=False, directory=None, exclude=None,
committer_name=None, committer_email=None, keep_cr=True):
committer_name=None, committer_email=None, keep_cr=True,
output_prefix=None):
# --keep-non-patch prevents stripping leading bracketed strings on the subject line
args = ['--keep-non-patch']
if threeway:
@@ -72,9 +74,42 @@ def am(repo, patch_data, threeway=False, directory=None, exclude=None,
if committer_email is not None:
root_args += ['-c', 'user.email=' + committer_email]
root_args += ['-c', 'commit.gpgsign=false']
# git am rewrites the index 2-3x per patch. In large repos (Chromium's
# index is ~70MB / ~500K files) this dominates wall time. skipHash
# avoids recomputing the trailing SHA over the full index on every
# write, and index v4 roughly halves the on-disk size via path prefix
# compression. Also skip per-object fsync and auto-gc since a crashed
# apply is simply re-run from a clean reset.
root_args += [
'-c', 'index.skipHash=true',
'-c', 'index.version=4',
'-c', 'core.fsync=none',
'-c', 'gc.auto=0',
]
command = ['git'] + root_args + ['am'] + args
with subprocess.Popen(command, stdin=subprocess.PIPE) as proc:
proc.communicate(patch_data.encode('utf-8'))
popen_kwargs = {'stdin': subprocess.PIPE}
if output_prefix is not None:
popen_kwargs['stdout'] = subprocess.PIPE
popen_kwargs['stderr'] = subprocess.STDOUT
with subprocess.Popen(command, **popen_kwargs) as proc:
def feed_stdin():
proc.stdin.write(patch_data.encode('utf-8'))
proc.stdin.close()
if output_prefix is not None:
writer = threading.Thread(target=feed_stdin)
writer.start()
for line in proc.stdout:
try:
sys.stdout.write(
f'{output_prefix}{line.decode("utf-8", "replace")}')
sys.stdout.flush()
except BrokenPipeError:
pass
writer.join()
proc.wait()
else:
feed_stdin()
proc.wait()
if proc.returncode != 0:
raise RuntimeError(f"Command {command} returned {proc.returncode}")
@@ -83,6 +118,12 @@ def import_patches(repo, ref=UPSTREAM_HEAD, **kwargs):
"""same as am(), but we save the upstream HEAD so we can refer to it when we
later export patches"""
update_ref(repo=repo, ref=ref, newvalue='HEAD')
# Upgrade to index v4 before applying so every intermediate index write
# during am benefits from path-prefix compression (roughly halves index
# size in large repos).
subprocess.call(
['git', '-C', repo, 'update-index', '--index-version', '4'],
stderr=subprocess.DEVNULL)
am(repo=repo, **kwargs)