Compare commits

..

2 Commits

Author SHA1 Message Date
johnsonjie
629664d18b add workflow to release e2e test image weekly 2026-01-13 16:54:28 +08:00
Ho
7de388ef1a [Fix] Accept proof submission even it has been timeout (#1764) 2025-12-12 12:18:34 +09:00
2 changed files with 134 additions and 13 deletions

102
.github/workflows/docker-e2e.yml vendored Normal file
View File

@@ -0,0 +1,102 @@
name: Docker E2E Weekly
on:
# Trigger every Friday at 00:00 UTC. You can adjust the cron expression as needed.
schedule:
- cron: '0 0 * * 5'
# Allow manual triggering from the Actions tab (useful for testing).
workflow_dispatch:
env:
IMAGE_TAG: e2e-test
jobs:
rollup_relayer:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up QEMU
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Docker Hub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 #v3.4.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
env:
REPOSITORY: rollup-relayer
with:
context: .
file: ./build/dockerfiles/rollup_relayer.Dockerfile
platforms: linux/amd64,linux/arm64
push: true
tags: |
scrolltech/${{ env.REPOSITORY }}:${{ env.IMAGE_TAG }}
coordinator-api:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up QEMU
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Docker Hub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 #v3.4.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
env:
REPOSITORY: coordinator-api
with:
context: .
file: ./build/dockerfiles/coordinator-api.Dockerfile
# platforms: linux/amd64,linux/arm64 #bugs for arm64
push: true
tags: |
scrolltech/${{ env.REPOSITORY }}:${{ env.IMAGE_TAG }}
coordinator-cron:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up QEMU
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Docker Hub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 #v3.4.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
env:
REPOSITORY: coordinator-cron
with:
context: .
file: ./build/dockerfiles/coordinator-cron.Dockerfile
platforms: linux/amd64,linux/arm64
push: true
tags: |
scrolltech/${{ env.REPOSITORY }}:${{ env.IMAGE_TAG }}

View File

@@ -155,7 +155,7 @@ func NewSubmitProofReceiverLogic(cfg *config.ProverManager, chainCfg *params.Cha
// HandleZkProof handle a ZkProof submitted from a prover.
// For now only proving/verifying error will lead to setting status as skipped.
// db/unmarshal errors will not because they are errors on the business logic side.
func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofParameter coordinatorType.SubmitProofParameter) error {
func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofParameter coordinatorType.SubmitProofParameter) (rerr error) {
m.proofReceivedTotal.Inc()
pk := ctx.GetString(coordinatorType.PublicKey)
if len(pk) == 0 {
@@ -172,6 +172,18 @@ func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofParameter coor
return ErrValidatorFailureProverTaskEmpty
}
defer func() {
if rerr != nil && types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverAssigned {
// trigger a last-chance closing of current task if some routine had missed it
log.Warn("last chance proof recover triggerred",
"proofID", proofParameter.TaskID,
"err", rerr,
)
m.proofRecover(ctx.Copy(), proverTask, types.ProverTaskFailureTypeUndefined, proofParameter)
}
}()
proofTime := time.Since(proverTask.CreatedAt)
proofTimeSec := uint64(proofTime.Seconds())
@@ -311,6 +323,20 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
}
}()
// Internally we overide the timeout failure:
// if prover task FailureType is SessionInfoFailureTimeout, the submit proof is timeout, but we still accept it
if types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverProofInvalid &&
types.ProverTaskFailureType(proverTask.FailureType) == types.ProverTaskFailureTypeTimeout {
m.validateFailureProverTaskTimeout.Inc()
proverTask.ProvingStatus = int16(types.ProverAssigned)
proofTime := time.Since(proverTask.CreatedAt)
proofTimeSec := uint64(proofTime.Seconds())
log.Warn("proof submit proof have timeout", "hash", proofParameter.TaskID, "taskType", proverTask.TaskType,
"proverName", proverTask.ProverName, "proverPublicKey", pk, "proofTime", proofTimeSec)
}
// Ensure this prover is eligible to participate in the prover task.
if types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverProofValid ||
types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverProofInvalid {
@@ -328,9 +354,6 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
return ErrValidatorFailureProverTaskCannotSubmitTwice
}
proofTime := time.Since(proverTask.CreatedAt)
proofTimeSec := uint64(proofTime.Seconds())
if proofParameter.Status != int(coordinatorType.StatusOk) {
// Temporarily replace "panic" with "pa-nic" to prevent triggering the alert based on logs.
failureMsg := strings.Replace(proofParameter.FailureMsg, "panic", "pa-nic", -1)
@@ -346,14 +369,6 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
return ErrValidatorFailureProofMsgStatusNotOk
}
// if prover task FailureType is SessionInfoFailureTimeout, the submit proof is timeout, need skip it
if types.ProverTaskFailureType(proverTask.FailureType) == types.ProverTaskFailureTypeTimeout {
m.validateFailureProverTaskTimeout.Inc()
log.Info("proof submit proof have timeout, skip this submit proof", "hash", proofParameter.TaskID, "taskType", proverTask.TaskType,
"proverName", proverTask.ProverName, "proverPublicKey", pk, "proofTime", proofTimeSec)
return ErrValidatorFailureProofTimeout
}
// store the proof to prover task
if updateTaskProofErr := m.updateProverTaskProof(ctx, proverTask, proofParameter); updateTaskProofErr != nil {
log.Warn("update prover task proof failure", "hash", proofParameter.TaskID, "proverPublicKey", pk,
@@ -368,6 +383,7 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
"taskType", proverTask.TaskType, "proverName", proverTask.ProverName, "proverPublicKey", pk)
return ErrValidatorFailureTaskHaveVerifiedSuccess
}
return nil
}
@@ -384,7 +400,7 @@ func (m *ProofReceiverLogic) closeProofTask(ctx context.Context, proverTask *orm
log.Info("proof close task update proof status", "hash", proverTask.TaskID, "proverPublicKey", proverTask.ProverPublicKey,
"taskType", message.ProofType(proverTask.TaskType).String(), "status", types.ProvingTaskVerified.String())
if err := m.updateProofStatus(ctx, proverTask, proofParameter, types.ProverProofValid, types.ProverTaskFailureTypeUndefined, proofTimeSec); err != nil {
if err := m.updateProofStatus(ctx, proverTask, proofParameter, types.ProverProofValid, types.ProverTaskFailureType(proverTask.FailureType), proofTimeSec); err != nil {
log.Error("failed to updated proof status ProvingTaskVerified", "hash", proverTask.TaskID, "proverPublicKey", proverTask.ProverPublicKey, "error", err)
return err
}
@@ -445,6 +461,9 @@ func (m *ProofReceiverLogic) updateProofStatus(ctx context.Context, proverTask *
if err != nil {
return err
}
// sync status and failture type into proverTask
proverTask.ProvingStatus = int16(status)
proverTask.FailureType = int16(failureType)
if status == types.ProverProofValid && message.ProofType(proofParameter.TaskType) == message.ProofTypeChunk {
if checkReadyErr := m.checkAreAllChunkProofsReady(ctx, proverTask.TaskID); checkReadyErr != nil {