mirror of
https://github.com/electron/electron.git
synced 2026-04-10 03:01:51 -04:00
build: speed up apply_all_patches by ~60% (#50417)
git am rewrites the index 2-3x per patch. In Chromium (~500K files, 70MB index) this dominated wall time: ~67 of 73 seconds were spent rehashing and rewriting the index ~300 times for 150 patches. - Add index.skipHash=true to skip recomputing the trailing SHA over the full index on every write - Force index v4 before am so path-prefix compression roughly halves the on-disk index size (70MB -> 40MB) - Disable core.fsync and gc.auto during am since a crashed apply is just re-run from a clean reset - Apply patch targets in parallel (capped at ncpu-2); Chromium still dominates but this hides node/nan/etc behind it. Falls back to sequential on roller/ branches where conflict output needs to be readable. - Prefix each output line with the target name so parallel output is attributable Measured on a 13-target config with 238 total patches: 73s -> 28s.
This commit is contained in:
@@ -1,13 +1,16 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import warnings
|
||||
|
||||
from lib import git
|
||||
from lib.patches import patch_from_dir
|
||||
|
||||
ELECTRON_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
THREEWAY = "ELECTRON_USE_THREE_WAY_MERGE_FOR_PATCHES" in os.environ
|
||||
|
||||
def apply_patches(target):
|
||||
@@ -19,14 +22,43 @@ def apply_patches(target):
|
||||
git.import_patches(
|
||||
committer_email="scripts@electron",
|
||||
committer_name="Electron Scripts",
|
||||
output_prefix=f'[{os.path.basename(patch_dir)}] ',
|
||||
patch_data=patch_from_dir(patch_dir),
|
||||
repo=repo,
|
||||
threeway=THREEWAY,
|
||||
)
|
||||
|
||||
def is_roller_branch():
|
||||
try:
|
||||
branch = subprocess.check_output(
|
||||
['git', '-C', ELECTRON_DIR, 'rev-parse', '--abbrev-ref', 'HEAD'],
|
||||
stderr=subprocess.DEVNULL,
|
||||
).decode('utf-8').strip()
|
||||
return branch.startswith('roller/')
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
def apply_config(config):
|
||||
for target in config:
|
||||
apply_patches(target)
|
||||
# Targets are independent git repos, so apply in parallel. The work is
|
||||
# subprocess-bound (git am), so threads are sufficient. On roller/
|
||||
# branches, patch conflicts are expected and interleaved failure output
|
||||
# from multiple repos is hard to read, so force sequential there.
|
||||
if is_roller_branch():
|
||||
max_workers = 1
|
||||
else:
|
||||
max_workers = max(1, (os.cpu_count() or 4) - 2)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||
futures = {pool.submit(apply_patches, t): t for t in config}
|
||||
failed = []
|
||||
for f in concurrent.futures.as_completed(futures):
|
||||
try:
|
||||
f.result()
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
failed.append((futures[f].get('repo'), e))
|
||||
if failed:
|
||||
for repo, e in failed:
|
||||
print(f'ERROR applying patches to {repo}: {e}')
|
||||
raise failed[0][1]
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Apply Electron patches')
|
||||
|
||||
@@ -12,6 +12,7 @@ import posixpath
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(SCRIPT_DIR)
|
||||
@@ -51,7 +52,8 @@ def get_repo_root(path):
|
||||
|
||||
|
||||
def am(repo, patch_data, threeway=False, directory=None, exclude=None,
|
||||
committer_name=None, committer_email=None, keep_cr=True):
|
||||
committer_name=None, committer_email=None, keep_cr=True,
|
||||
output_prefix=None):
|
||||
# --keep-non-patch prevents stripping leading bracketed strings on the subject line
|
||||
args = ['--keep-non-patch']
|
||||
if threeway:
|
||||
@@ -72,9 +74,42 @@ def am(repo, patch_data, threeway=False, directory=None, exclude=None,
|
||||
if committer_email is not None:
|
||||
root_args += ['-c', 'user.email=' + committer_email]
|
||||
root_args += ['-c', 'commit.gpgsign=false']
|
||||
# git am rewrites the index 2-3x per patch. In large repos (Chromium's
|
||||
# index is ~70MB / ~500K files) this dominates wall time. skipHash
|
||||
# avoids recomputing the trailing SHA over the full index on every
|
||||
# write, and index v4 roughly halves the on-disk size via path prefix
|
||||
# compression. Also skip per-object fsync and auto-gc since a crashed
|
||||
# apply is simply re-run from a clean reset.
|
||||
root_args += [
|
||||
'-c', 'index.skipHash=true',
|
||||
'-c', 'index.version=4',
|
||||
'-c', 'core.fsync=none',
|
||||
'-c', 'gc.auto=0',
|
||||
]
|
||||
command = ['git'] + root_args + ['am'] + args
|
||||
with subprocess.Popen(command, stdin=subprocess.PIPE) as proc:
|
||||
proc.communicate(patch_data.encode('utf-8'))
|
||||
popen_kwargs = {'stdin': subprocess.PIPE}
|
||||
if output_prefix is not None:
|
||||
popen_kwargs['stdout'] = subprocess.PIPE
|
||||
popen_kwargs['stderr'] = subprocess.STDOUT
|
||||
with subprocess.Popen(command, **popen_kwargs) as proc:
|
||||
def feed_stdin():
|
||||
proc.stdin.write(patch_data.encode('utf-8'))
|
||||
proc.stdin.close()
|
||||
if output_prefix is not None:
|
||||
writer = threading.Thread(target=feed_stdin)
|
||||
writer.start()
|
||||
for line in proc.stdout:
|
||||
try:
|
||||
sys.stdout.write(
|
||||
f'{output_prefix}{line.decode("utf-8", "replace")}')
|
||||
sys.stdout.flush()
|
||||
except BrokenPipeError:
|
||||
pass
|
||||
writer.join()
|
||||
proc.wait()
|
||||
else:
|
||||
feed_stdin()
|
||||
proc.wait()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"Command {command} returned {proc.returncode}")
|
||||
|
||||
@@ -83,6 +118,12 @@ def import_patches(repo, ref=UPSTREAM_HEAD, **kwargs):
|
||||
"""same as am(), but we save the upstream HEAD so we can refer to it when we
|
||||
later export patches"""
|
||||
update_ref(repo=repo, ref=ref, newvalue='HEAD')
|
||||
# Upgrade to index v4 before applying so every intermediate index write
|
||||
# during am benefits from path-prefix compression (roughly halves index
|
||||
# size in large repos).
|
||||
subprocess.call(
|
||||
['git', '-C', repo, 'update-index', '--index-version', '4'],
|
||||
stderr=subprocess.DEVNULL)
|
||||
am(repo=repo, **kwargs)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user