ci: split src cache into 4 parallel-extractable shards

Windows tar extraction is ~1ms/entry for ~1.2M entries (~20 min)
regardless of tool, well under the 75k IOPS / 1000 MBps the D16lds_v5
ephemeral disk can do. Tar is a sequential stream so the only way to
parallelize is to split at creation time.

Shards (balanced by entry count, ~220-360k each):
  a: src/third_party/blink
  b: src/third_party/{dawn,electron_node,tflite,devtools-frontend}
  c: src/third_party (rest)
  d: src (excluding third_party)

DEPSHASH is now the raw hash; shard files are
v2-src-cache-shard-{a..d}-${DEPSHASH}.tar (all pass the sas-sidecar
filename regex). sas-token is now a JSON keyed by shard letter. All
restore paths extract the four shards in parallel with per-PID wait so
a failed shard aborts the step.
This commit is contained in:
Samuel Attard
2026-04-05 15:00:19 -07:00
parent 5bea309980
commit 970574998b
8 changed files with 99 additions and 97 deletions

View File

@@ -28,9 +28,8 @@ runs:
shell: bash
run: |
node src/electron/script/generate-deps-hash.js
DEPSHASH="v2-src-cache-$(cat src/electron/.depshash)"
echo "DEPSHASH=$DEPSHASH" >> $GITHUB_ENV
echo "CACHE_FILE=$DEPSHASH.tar" >> $GITHUB_ENV
echo "DEPSHASH=$(cat src/electron/.depshash)" >> $GITHUB_ENV
echo "CACHE_SHARDS=a b c d" >> $GITHUB_ENV
if [ "${{ inputs.target-platform }}" = "win" ]; then
echo "CACHE_DRIVE=/mnt/win-cache" >> $GITHUB_ENV
else
@@ -40,7 +39,13 @@ runs:
if: ${{ inputs.generate-sas-token == 'true' }}
shell: bash
run: |
curl --unix-socket /var/run/sas/sas.sock --fail "http://foo/$CACHE_FILE?platform=${{ inputs.target-platform }}&getAccountName=true" > sas-token
args='{}'
for s in $CACHE_SHARDS; do
shard_file="v2-src-cache-shard-${s}-${DEPSHASH}.tar"
resp=$(curl --unix-socket /var/run/sas/sas.sock --fail "http://foo/${shard_file}?platform=${{ inputs.target-platform }}&getAccountName=true")
args=$(jq --arg s "$s" --argjson r "$resp" '. + {($s): $r}' <<< "$args")
done
echo "$args" > sas-token
- name: Save SAS Key
if: ${{ inputs.generate-sas-token == 'true' }}
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
@@ -56,16 +61,16 @@ runs:
echo "Not using cache this time..."
echo "cache_exists=false" >> $GITHUB_OUTPUT
else
cache_path=$CACHE_DRIVE/$CACHE_FILE
echo "Using cache key: $DEPSHASH"
echo "Checking for cache in: $cache_path"
if [ ! -f "$cache_path" ] || [ `du $cache_path | cut -f1` = "0" ]; then
echo "cache_exists=false" >> $GITHUB_OUTPUT
echo "Cache Does Not Exist for $DEPSHASH"
else
echo "cache_exists=true" >> $GITHUB_OUTPUT
echo "Cache Already Exists for $DEPSHASH, Skipping.."
fi
cache_exists=true
for s in $CACHE_SHARDS; do
cache_path="$CACHE_DRIVE/v2-src-cache-shard-${s}-${DEPSHASH}.tar"
if [ ! -f "$cache_path" ] || [ `du $cache_path | cut -f1` = "0" ]; then
cache_exists=false
echo "Cache shard ${s} missing at $cache_path"
fi
done
echo "cache_exists=$cache_exists" >> $GITHUB_OUTPUT
fi
- name: Check cross instance cache disk space
if: steps.check-cache.outputs.cache_exists == 'false' && inputs.use-cache == 'true'
@@ -189,22 +194,29 @@ runs:
echo "Uncompressed src size: $(du -sh src | cut -f1 -d' ')"
# Named .tar but zstd-compressed; the sas-sidecar's filename allowlist
# only permits .tar/.tgz so we keep the extension and decode on restore.
tar -cf - src | zstd -T0 --long=30 -f -o $CACHE_FILE
echo "Compressed src to $(du -sh $CACHE_FILE | cut -f1 -d' ')"
cp ./$CACHE_FILE $CACHE_DRIVE/
SHARD_B_DIRS="src/third_party/dawn src/third_party/electron_node src/third_party/tflite src/third_party/devtools-frontend"
tar -cf - src/third_party/blink | zstd -T0 --long=30 -f -o v2-src-cache-shard-a-${DEPSHASH}.tar
tar -cf - $SHARD_B_DIRS | zstd -T0 --long=30 -f -o v2-src-cache-shard-b-${DEPSHASH}.tar
tar -cf - --exclude=src/third_party/blink $(printf -- '--exclude=%s ' $SHARD_B_DIRS) src/third_party | zstd -T0 --long=30 -f -o v2-src-cache-shard-c-${DEPSHASH}.tar
tar -cf - --exclude=src/third_party src | zstd -T0 --long=30 -f -o v2-src-cache-shard-d-${DEPSHASH}.tar
for s in $CACHE_SHARDS; do
f="v2-src-cache-shard-${s}-${DEPSHASH}.tar"
echo "Compressed shard ${s} to $(du -sh $f | cut -f1 -d' ')"
cp ./$f $CACHE_DRIVE/
done
- name: Persist Src Cache
if: ${{ steps.check-cache.outputs.cache_exists == 'false' && inputs.use-cache == 'true' }}
shell: bash
run: |
final_cache_path=$CACHE_DRIVE/$CACHE_FILE
echo "Using cache key: $DEPSHASH"
echo "Checking path: $final_cache_path"
if [ ! -f "$final_cache_path" ]; then
echo "Cache key not found"
exit 1
else
echo "Cache key persisted in $final_cache_path"
fi
for s in $CACHE_SHARDS; do
final_cache_path="$CACHE_DRIVE/v2-src-cache-shard-${s}-${DEPSHASH}.tar"
if [ ! -f "$final_cache_path" ]; then
echo "Cache shard ${s} not found at $final_cache_path"
exit 1
fi
done
echo "All cache shards persisted"
- name: Wait for active SSH sessions
shell: bash
if: always() && !cancelled()

View File

@@ -10,28 +10,28 @@ runs:
shell: bash
run: |
if [ "${{ inputs.target-platform }}" = "win" ]; then
cache_path=/mnt/win-cache/$DEPSHASH.tar
cache_drive=/mnt/win-cache
else
cache_path=/mnt/cross-instance-cache/$DEPSHASH.tar
cache_drive=/mnt/cross-instance-cache
fi
echo "Using cache key: $DEPSHASH"
echo "Checking for cache in: $cache_path"
if [ ! -f "$cache_path" ]; then
echo "Cache Does Not Exist for $DEPSHASH - exiting"
exit 1
else
echo "Found Cache for $DEPSHASH at $cache_path"
fi
for s in $CACHE_SHARDS; do
cache_path="${cache_drive}/v2-src-cache-shard-${s}-${DEPSHASH}.tar"
if [ ! -f "$cache_path" ] || [ `du $cache_path | cut -f1` = "0" ]; then
echo "Cache shard ${s} missing or empty at $cache_path - exiting"
exit 1
fi
echo "Found shard ${s}: $(du -sh $cache_path | cut -f1)"
done
echo "Persisted cache is $(du -sh $cache_path | cut -f1)"
if [ `du $cache_path | cut -f1` = "0" ]; then
echo "Cache is empty - exiting"
exit 1
fi
mkdir temp-cache
zstd -d --long=30 -c $cache_path | tar -xf - -C temp-cache
mkdir -p temp-cache/src/third_party
pids=()
for s in $CACHE_SHARDS; do
zstd -d --long=30 -c "${cache_drive}/v2-src-cache-shard-${s}-${DEPSHASH}.tar" | tar -xf - -C temp-cache &
pids+=($!)
done
for pid in "${pids[@]}"; do wait $pid; done
echo "Unzipped cache is $(du -sh temp-cache/src | cut -f1)"
if [ -d "temp-cache/src" ]; then

View File

@@ -31,21 +31,22 @@ runs:
retry_on: error
shell: bash
command: |
sas_token=$(cat sas-token)
if [ -z "$sas_token" ]; then
if [ ! -s sas-token ]; then
echo "SAS Token not found; exiting src cache download early..."
exit 1
else
sas_token=$(jq -r '.sasToken' sas-token)
account_name=$(jq -r '.accountName' sas-token)
if [ "${{ inputs.target-platform }}" = "win" ]; then
azcopy copy --log-level=ERROR \
"https://$account_name.file.core.windows.net/${{ env.AZURE_AKS_WIN_CACHE_SHARE_NAME }}/${{ env.CACHE_PATH }}?$sas_token" $DEPSHASH.tar
else
azcopy copy --log-level=ERROR \
"https://$account_name.file.core.windows.net/${{ env.AZURE_AKS_CACHE_SHARE_NAME }}/${{ env.CACHE_PATH }}?$sas_token" $DEPSHASH.tar
fi
fi
if [ "${{ inputs.target-platform }}" = "win" ]; then
share_name="${{ env.AZURE_AKS_WIN_CACHE_SHARE_NAME }}"
else
share_name="${{ env.AZURE_AKS_CACHE_SHARE_NAME }}"
fi
for s in $CACHE_SHARDS; do
shard_file="v2-src-cache-shard-${s}-${DEPSHASH}.tar"
sas_token=$(jq -r ".${s}.sasToken" sas-token)
account_name=$(jq -r ".${s}.accountName" sas-token)
azcopy copy --log-level=ERROR \
"https://${account_name}.file.core.windows.net/${share_name}/${shard_file}?${sas_token}" "${shard_file}"
done
env:
AZURE_AKS_CACHE_SHARE_NAME: linux-cache
AZURE_AKS_WIN_CACHE_SHARE_NAME: windows-cache
@@ -53,50 +54,44 @@ runs:
shell: bash
run: rm -f sas-token
- name: Unzip and Ensure Src Cache
if: ${{ inputs.target-platform == 'macos' || inputs.target-platform == 'win' }}
shell: bash
run: |
for s in $CACHE_SHARDS; do
shard_file="v2-src-cache-shard-${s}-${DEPSHASH}.tar"
if [ ! -f "$shard_file" ] || [ `du $shard_file | cut -f1` = "0" ]; then
echo "Cache shard ${s} is missing or empty - exiting"
exit 1
fi
echo "Downloaded shard ${s}: $(du -sh $shard_file | cut -f1)"
done
mkdir -p temp-cache/src/third_party
pids=()
for s in $CACHE_SHARDS; do
zstd -d --long=30 -c "v2-src-cache-shard-${s}-${DEPSHASH}.tar" | tar -xf - -C temp-cache &
pids+=($!)
done
for pid in "${pids[@]}"; do wait $pid; done
echo "Unzipped cache is $(du -sh temp-cache/src | cut -f1)"
rm -f v2-src-cache-shard-*-${DEPSHASH}.tar
- name: Move Src Cache (macOS)
if: ${{ inputs.target-platform == 'macos' }}
shell: bash
run: |
echo "Downloaded cache is $(du -sh $DEPSHASH.tar | cut -f1)"
if [ `du $DEPSHASH.tar | cut -f1` = "0" ]; then
echo "Cache is empty - exiting"
exit 1
fi
mkdir temp-cache
zstd -d --long=30 -c $DEPSHASH.tar | tar -xf - -C temp-cache
echo "Unzipped cache is $(du -sh temp-cache/src | cut -f1)"
if [ -d "temp-cache/src" ]; then
echo "Relocating Cache"
rm -rf src
mv temp-cache/src src
echo "Deleting zip file"
rm -rf $DEPSHASH.tar
fi
if [ ! -d "src/third_party/blink" ]; then
echo "Cache was not correctly restored - exiting"
exit 1
fi
echo "Wiping Electron Directory"
rm -rf src/electron
- name: Unzip and Ensure Src Cache (Windows)
if: ${{ inputs.target-platform == 'win' }}
shell: bash
run: |
echo "Downloaded cache is $(du -sh $DEPSHASH.tar | cut -f1)"
if [ `du $DEPSHASH.tar | cut -f1` = "0" ]; then
echo "Cache is empty - exiting"
exit 1
fi
mkdir temp-cache
zstd -d --long=30 -c $DEPSHASH.tar | tar -xf - -C temp-cache
rm -f $DEPSHASH.tar
- name: Move Src Cache (Windows)
if: ${{ inputs.target-platform == 'win' }}
uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0

View File

@@ -35,9 +35,8 @@ jobs:
- name: Generate DEPS Hash
run: |
node src/electron/script/generate-deps-hash.js
DEPSHASH=v2-src-cache-$(cat src/electron/.depshash)
echo "DEPSHASH=$DEPSHASH" >> $GITHUB_ENV
echo "CACHE_PATH=$DEPSHASH.tar" >> $GITHUB_ENV
echo "DEPSHASH=$(cat src/electron/.depshash)" >> $GITHUB_ENV
echo "CACHE_SHARDS=a b c d" >> $GITHUB_ENV
- name: Restore src cache via AKS
uses: ./src/electron/.github/actions/restore-cache-aks
with:

View File

@@ -156,9 +156,8 @@ jobs:
- name: Generate DEPS Hash
run: |
node src/electron/script/generate-deps-hash.js
DEPSHASH=v2-src-cache-$(cat src/electron/.depshash)
echo "DEPSHASH=$DEPSHASH" >> $GITHUB_ENV
echo "CACHE_PATH=$DEPSHASH.tar" >> $GITHUB_ENV
echo "DEPSHASH=$(cat src/electron/.depshash)" >> $GITHUB_ENV
echo "CACHE_SHARDS=a b c d" >> $GITHUB_ENV
- name: Restore src cache via AZCopy
if: ${{ inputs.target-platform != 'linux' }}
uses: ./src/electron/.github/actions/restore-cache-azcopy

View File

@@ -80,9 +80,8 @@ jobs:
- name: Generate DEPS Hash
run: |
node src/electron/script/generate-deps-hash.js
DEPSHASH=v2-src-cache-$(cat src/electron/.depshash)
echo "DEPSHASH=$DEPSHASH" >> $GITHUB_ENV
echo "CACHE_PATH=$DEPSHASH.tar" >> $GITHUB_ENV
echo "DEPSHASH=$(cat src/electron/.depshash)" >> $GITHUB_ENV
echo "CACHE_SHARDS=a b c d" >> $GITHUB_ENV
- name: Restore src cache via AZCopy
if: ${{ inputs.target-platform == 'macos' }}
uses: ./src/electron/.github/actions/restore-cache-azcopy

View File

@@ -81,9 +81,8 @@ jobs:
- name: Generate DEPS Hash
run: |
node src/electron/script/generate-deps-hash.js
DEPSHASH=v2-src-cache-$(cat src/electron/.depshash)
echo "DEPSHASH=$DEPSHASH" >> $GITHUB_ENV
echo "CACHE_PATH=$DEPSHASH.tar" >> $GITHUB_ENV
echo "DEPSHASH=$(cat src/electron/.depshash)" >> $GITHUB_ENV
echo "CACHE_SHARDS=a b c d" >> $GITHUB_ENV
- name: Restore src cache via AZCopy
if: ${{ inputs.target-platform == 'macos' }}
uses: ./src/electron/.github/actions/restore-cache-azcopy

View File

@@ -165,9 +165,8 @@ jobs:
- name: Generate DEPS Hash
run: |
node src/electron/script/generate-deps-hash.js
DEPSHASH=v2-src-cache-$(cat src/electron/.depshash)
echo "DEPSHASH=$DEPSHASH" >> $GITHUB_ENV
echo "CACHE_PATH=$DEPSHASH.tar" >> $GITHUB_ENV
echo "DEPSHASH=$(cat src/electron/.depshash)" >> $GITHUB_ENV
echo "CACHE_SHARDS=a b c d" >> $GITHUB_ENV
- name: Restore src cache via AZCopy
if: ${{ inputs.target-platform != 'linux' }}
uses: ./src/electron/.github/actions/restore-cache-azcopy