Compare commits

...

1 Commits

Author SHA1 Message Date
Ho
7de388ef1a [Fix] Accept proof submission even it has been timeout (#1764) 2025-12-12 12:18:34 +09:00

View File

@@ -155,7 +155,7 @@ func NewSubmitProofReceiverLogic(cfg *config.ProverManager, chainCfg *params.Cha
// HandleZkProof handle a ZkProof submitted from a prover. // HandleZkProof handle a ZkProof submitted from a prover.
// For now only proving/verifying error will lead to setting status as skipped. // For now only proving/verifying error will lead to setting status as skipped.
// db/unmarshal errors will not because they are errors on the business logic side. // db/unmarshal errors will not because they are errors on the business logic side.
func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofParameter coordinatorType.SubmitProofParameter) error { func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofParameter coordinatorType.SubmitProofParameter) (rerr error) {
m.proofReceivedTotal.Inc() m.proofReceivedTotal.Inc()
pk := ctx.GetString(coordinatorType.PublicKey) pk := ctx.GetString(coordinatorType.PublicKey)
if len(pk) == 0 { if len(pk) == 0 {
@@ -172,6 +172,18 @@ func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofParameter coor
return ErrValidatorFailureProverTaskEmpty return ErrValidatorFailureProverTaskEmpty
} }
defer func() {
if rerr != nil && types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverAssigned {
// trigger a last-chance closing of current task if some routine had missed it
log.Warn("last chance proof recover triggerred",
"proofID", proofParameter.TaskID,
"err", rerr,
)
m.proofRecover(ctx.Copy(), proverTask, types.ProverTaskFailureTypeUndefined, proofParameter)
}
}()
proofTime := time.Since(proverTask.CreatedAt) proofTime := time.Since(proverTask.CreatedAt)
proofTimeSec := uint64(proofTime.Seconds()) proofTimeSec := uint64(proofTime.Seconds())
@@ -311,6 +323,20 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
} }
}() }()
// Internally we overide the timeout failure:
// if prover task FailureType is SessionInfoFailureTimeout, the submit proof is timeout, but we still accept it
if types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverProofInvalid &&
types.ProverTaskFailureType(proverTask.FailureType) == types.ProverTaskFailureTypeTimeout {
m.validateFailureProverTaskTimeout.Inc()
proverTask.ProvingStatus = int16(types.ProverAssigned)
proofTime := time.Since(proverTask.CreatedAt)
proofTimeSec := uint64(proofTime.Seconds())
log.Warn("proof submit proof have timeout", "hash", proofParameter.TaskID, "taskType", proverTask.TaskType,
"proverName", proverTask.ProverName, "proverPublicKey", pk, "proofTime", proofTimeSec)
}
// Ensure this prover is eligible to participate in the prover task. // Ensure this prover is eligible to participate in the prover task.
if types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverProofValid || if types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverProofValid ||
types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverProofInvalid { types.ProverProveStatus(proverTask.ProvingStatus) == types.ProverProofInvalid {
@@ -328,9 +354,6 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
return ErrValidatorFailureProverTaskCannotSubmitTwice return ErrValidatorFailureProverTaskCannotSubmitTwice
} }
proofTime := time.Since(proverTask.CreatedAt)
proofTimeSec := uint64(proofTime.Seconds())
if proofParameter.Status != int(coordinatorType.StatusOk) { if proofParameter.Status != int(coordinatorType.StatusOk) {
// Temporarily replace "panic" with "pa-nic" to prevent triggering the alert based on logs. // Temporarily replace "panic" with "pa-nic" to prevent triggering the alert based on logs.
failureMsg := strings.Replace(proofParameter.FailureMsg, "panic", "pa-nic", -1) failureMsg := strings.Replace(proofParameter.FailureMsg, "panic", "pa-nic", -1)
@@ -346,14 +369,6 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
return ErrValidatorFailureProofMsgStatusNotOk return ErrValidatorFailureProofMsgStatusNotOk
} }
// if prover task FailureType is SessionInfoFailureTimeout, the submit proof is timeout, need skip it
if types.ProverTaskFailureType(proverTask.FailureType) == types.ProverTaskFailureTypeTimeout {
m.validateFailureProverTaskTimeout.Inc()
log.Info("proof submit proof have timeout, skip this submit proof", "hash", proofParameter.TaskID, "taskType", proverTask.TaskType,
"proverName", proverTask.ProverName, "proverPublicKey", pk, "proofTime", proofTimeSec)
return ErrValidatorFailureProofTimeout
}
// store the proof to prover task // store the proof to prover task
if updateTaskProofErr := m.updateProverTaskProof(ctx, proverTask, proofParameter); updateTaskProofErr != nil { if updateTaskProofErr := m.updateProverTaskProof(ctx, proverTask, proofParameter); updateTaskProofErr != nil {
log.Warn("update prover task proof failure", "hash", proofParameter.TaskID, "proverPublicKey", pk, log.Warn("update prover task proof failure", "hash", proofParameter.TaskID, "proverPublicKey", pk,
@@ -368,6 +383,7 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
"taskType", proverTask.TaskType, "proverName", proverTask.ProverName, "proverPublicKey", pk) "taskType", proverTask.TaskType, "proverName", proverTask.ProverName, "proverPublicKey", pk)
return ErrValidatorFailureTaskHaveVerifiedSuccess return ErrValidatorFailureTaskHaveVerifiedSuccess
} }
return nil return nil
} }
@@ -384,7 +400,7 @@ func (m *ProofReceiverLogic) closeProofTask(ctx context.Context, proverTask *orm
log.Info("proof close task update proof status", "hash", proverTask.TaskID, "proverPublicKey", proverTask.ProverPublicKey, log.Info("proof close task update proof status", "hash", proverTask.TaskID, "proverPublicKey", proverTask.ProverPublicKey,
"taskType", message.ProofType(proverTask.TaskType).String(), "status", types.ProvingTaskVerified.String()) "taskType", message.ProofType(proverTask.TaskType).String(), "status", types.ProvingTaskVerified.String())
if err := m.updateProofStatus(ctx, proverTask, proofParameter, types.ProverProofValid, types.ProverTaskFailureTypeUndefined, proofTimeSec); err != nil { if err := m.updateProofStatus(ctx, proverTask, proofParameter, types.ProverProofValid, types.ProverTaskFailureType(proverTask.FailureType), proofTimeSec); err != nil {
log.Error("failed to updated proof status ProvingTaskVerified", "hash", proverTask.TaskID, "proverPublicKey", proverTask.ProverPublicKey, "error", err) log.Error("failed to updated proof status ProvingTaskVerified", "hash", proverTask.TaskID, "proverPublicKey", proverTask.ProverPublicKey, "error", err)
return err return err
} }
@@ -445,6 +461,9 @@ func (m *ProofReceiverLogic) updateProofStatus(ctx context.Context, proverTask *
if err != nil { if err != nil {
return err return err
} }
// sync status and failture type into proverTask
proverTask.ProvingStatus = int16(status)
proverTask.FailureType = int16(failureType)
if status == types.ProverProofValid && message.ProofType(proofParameter.TaskType) == message.ProofTypeChunk { if status == types.ProverProofValid && message.ProofType(proofParameter.TaskType) == message.ProofTypeChunk {
if checkReadyErr := m.checkAreAllChunkProofsReady(ctx, proverTask.TaskID); checkReadyErr != nil { if checkReadyErr := m.checkAreAllChunkProofsReady(ctx, proverTask.TaskID); checkReadyErr != nil {