feat(prover): prover report err (#815 )

Co-authored-by: HAOYUatHZ <HAOYUatHZ@users.noreply.github.com> Co-authored-by: georgehao <haohongfan@gmail.com>
feat(coordinator): when prover failure, recover status (#814 )
2026-01-11 23:18:07 -05:00 · 2023-08-17 21:19:08 +08:00 · 2023-08-17 21:15:27 +08:00
8 changed files with 72 additions and 39 deletions
--- a/common/types/message/message.go
+++ b/common/types/message/message.go
@@ -13,6 +13,18 @@ import (
 	"github.com/scroll-tech/go-ethereum/rlp"
 )

+// ProofFailureType the proof failure type
+type ProofFailureType int
+
+const (
+	// ProofFailureUndefined the undefined type proof failure type
+	ProofFailureUndefined ProofFailureType = iota
+	// ProofFailurePanic proof failure for prover panic
+	ProofFailurePanic
+	// ProofFailureNoPanic proof failure for no prover panic
+	ProofFailureNoPanic
+)
+
 // RespStatus represents status code from prover to scroll
 type RespStatus uint32

--- a/common/version/version.go
+++ b/common/version/version.go
@@ -6,7 +6,7 @@ import (
 	"strings"
 )

-var tag = "v4.1.61"
+var tag = "v4.1.63"

 var commit = func() string {
 	if info, ok := debug.ReadBuildInfo(); ok {
--- a/coordinator/internal/controller/api/submit_proof.go
+++ b/coordinator/internal/controller/api/submit_proof.go
@@ -64,7 +64,7 @@ func (spc *SubmitProofController) SubmitProof(ctx *gin.Context) {
 		proofMsg.BatchProof = &tmpBatchProof
 	}

-	if err := spc.submitProofReceiverLogic.HandleZkProof(ctx, &proofMsg); err != nil {
+	if err := spc.submitProofReceiverLogic.HandleZkProof(ctx, &proofMsg, spp); err != nil {
 		nerr := fmt.Errorf("handle zk proof failure, err:%w", err)
 		coodinatorType.RenderJSON(ctx, types.ErrCoordinatorHandleZkProofFailure, nerr, nil)
 		return
--- a/coordinator/internal/logic/submitproof/proof_receiver.go
+++ b/coordinator/internal/logic/submitproof/proof_receiver.go
@@ -121,7 +121,7 @@ func NewSubmitProofReceiverLogic(cfg *config.ProverManager, db *gorm.DB, reg pro
 // HandleZkProof handle a ZkProof submitted from a prover.
 // For now only proving/verifying error will lead to setting status as skipped.
 // db/unmarshal errors will not because they are errors on the business logic side.
-func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.ProofMsg) error {
+func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.ProofMsg, proofParameter coordinatorType.SubmitProofParameter) error {
 	m.proofReceivedTotal.Inc()
 	pk := ctx.GetString(coordinatorType.PublicKey)
 	if len(pk) == 0 {
@@ -144,7 +144,7 @@ func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.P
 	log.Info("handling zk proof", "proof id", proofMsg.ID, "prover name", proverTask.ProverName,
 		"prover pk", pk, "prove type", proverTask.TaskType, "proof time", proofTimeSec)

-	if err = m.validator(ctx, proverTask, pk, proofMsg); err != nil {
+	if err = m.validator(ctx, proverTask, pk, proofMsg, proofParameter); err != nil {
 		return err
 	}

@@ -161,7 +161,7 @@ func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.P

 	if verifyErr != nil || !success {
 		m.verifierFailureTotal.WithLabelValues(proverVersion).Inc()
-		m.proofFailure(ctx, proofMsg.ID, pk, proofMsg)
+		m.proofRecover(ctx, proofMsg.ID, pk, proofMsg)

 		log.Info("proof verified by coordinator failed", "proof id", proofMsg.ID, "prover name", proverTask.ProverName,
 			"prover pk", pk, "prove type", proofMsg.Type, "proof time", proofTimeSec, "error", verifyErr)
@@ -205,7 +205,7 @@ func (m *ProofReceiverLogic) checkAreAllChunkProofsReady(ctx context.Context, ch
 	return nil
 }

-func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.ProverTask, pk string, proofMsg *message.ProofMsg) (err error) {
+func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.ProverTask, pk string, proofMsg *message.ProofMsg, proofParameter coordinatorType.SubmitProofParameter) (err error) {
 	defer func() {
 		if err != nil {
 			m.validateFailureTotal.Inc()
@@ -232,14 +232,12 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
 	proofTimeSec := uint64(proofTime.Seconds())

 	if proofMsg.Status != message.StatusOk {
+		m.proofRecover(ctx, proofMsg.ID, pk, proofMsg)
 		m.validateFailureProverTaskStatusNotOk.Inc()
-		log.Info("proof generated by prover failed", "proof id", proofMsg.ID, "prover name", proverTask.ProverName,
-			"prover pk", pk, "prove type", proofMsg.Type, "error", proofMsg.Error)
-
-		if updateErr := m.proverTaskOrm.UpdateProverTaskProvingStatus(ctx, proofMsg.Type, proofMsg.ID, pk, types.ProverProofInvalid); updateErr != nil {
-			log.Error("proof generated by prover failed update prover task proving status failure", "proof id", proofMsg.ID,
-				"prover name", proverTask.ProverName, "prover pk", pk, "prove type", proofMsg.Type, "error", proofMsg.Error)
-		}
+		log.Info("proof generated by prover failed",
+			"prove type", proofMsg.Type, "proof id", proofMsg.ID,
+			"prover name", proverTask.ProverName, "prover version", proverTask.ProverVersion,
+			"prover pk", "failure type", proofParameter.FailureType, "failure message", proofParameter.FailureMsg)
 		return ErrValidatorFailureProofMsgStatusNotOk
 	}

@@ -267,14 +265,14 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
 	return nil
 }

-func (m *ProofReceiverLogic) proofFailure(ctx context.Context, hash string, pubKey string, proofMsg *message.ProofMsg) {
-	log.Info("proof failure update proof status", "hash", hash, "public key", pubKey,
-		"proof type", proofMsg.Type.String(), "status", types.ProvingTaskFailed.String())
-
-	if err := m.updateProofStatus(ctx, hash, pubKey, proofMsg, types.ProvingTaskFailed, 0); err != nil {
-		log.Error("failed to updated proof status ProvingTaskFailed", "hash", hash, "pubKey", pubKey, "error", err)
-	}
-}
+//func (m *ProofReceiverLogic) proofFailure(ctx context.Context, hash string, pubKey string, proofMsg *message.ProofMsg) {
+//	log.Info("proof failure update proof status", "hash", hash, "public key", pubKey,
+//		"proof type", proofMsg.Type.String(), "status", types.ProvingTaskFailed.String())
+//
+//	if err := m.updateProofStatus(ctx, hash, pubKey, proofMsg, types.ProvingTaskFailed, 0); err != nil {
+//		log.Error("failed to updated proof status ProvingTaskFailed", "hash", hash, "pubKey", pubKey, "error", err)
+//	}
+//}

 func (m *ProofReceiverLogic) proofRecover(ctx context.Context, hash string, pubKey string, proofMsg *message.ProofMsg) {
 	log.Info("proof recover update proof status", "hash", hash, "public key", pubKey,
--- a/coordinator/internal/types/submit_proof.go
+++ b/coordinator/internal/types/submit_proof.go
@@ -2,8 +2,10 @@ package types

 // SubmitProofParameter the SubmitProof api request parameter
 type SubmitProofParameter struct {
-	TaskID   string `form:"task_id" json:"task_id" binding:"required"`
-	TaskType int    `form:"task_type" json:"task_type" binding:"required"`
-	Status   int    `form:"status" json:"status"`
-	Proof    string `form:"proof" json:"proof"`
+	TaskID      string `form:"task_id" json:"task_id" binding:"required"`
+	TaskType    int    `form:"task_type" json:"task_type" binding:"required"`
+	Status      int    `form:"status" json:"status"`
+	Proof       string `form:"proof" json:"proof"`
+	FailureType int    `form:"failure_type" json:"failure_type"`
+	FailureMsg  string `form:"failure_msg" json:"failure_msg"`
 }
--- a/coordinator/test/api_test.go
+++ b/coordinator/test/api_test.go
@@ -317,7 +317,7 @@ func testInvalidProof(t *testing.T) {
 			assert.NoError(t, err)
 			batchProofStatus, err = batchOrm.GetProvingStatusByHash(context.Background(), batch.Hash)
 			assert.NoError(t, err)
-			if chunkProofStatus == types.ProvingTaskFailed && batchProofStatus == types.ProvingTaskFailed {
+			if chunkProofStatus == types.ProvingTaskUnassigned && batchProofStatus == types.ProvingTaskUnassigned {
 				return
 			}
 		case <-tickStop:
--- a/prover/client/types.go
+++ b/prover/client/types.go
@@ -53,10 +53,12 @@ type GetTaskResponse struct {

 // SubmitProofRequest defines the request structure for the SubmitProof API.
 type SubmitProofRequest struct {
-	TaskID   string `json:"task_id"`
-	TaskType int    `json:"task_type"`
-	Status   int    `json:"status"`
-	Proof    string `json:"proof"`
+	TaskID      string `json:"task_id"`
+	TaskType    int    `json:"task_type"`
+	Status      int    `json:"status"`
+	Proof       string `json:"proof"`
+	FailureType int    `json:"failure_type,omitempty"`
+	FailureMsg  string `json:"failure_msg,omitempty"`
 }

 // SubmitProofResponse defines the response structure for the SubmitProof API.
--- a/prover/prover.go
+++ b/prover/prover.go
@@ -151,13 +151,6 @@ func (r *Prover) proveAndSubmit() error {
 		}
 	}

-	defer func() {
-		err = r.stack.Delete(task.Task.ID)
-		if err != nil {
-			log.Error("prover stack pop failed!", "err", err)
-		}
-	}()
-
 	var proofMsg *message.ProofDetail
 	if task.Times <= 2 {
 		// If panic times <= 2, try to proof the task.
@@ -168,7 +161,8 @@ func (r *Prover) proveAndSubmit() error {
 		log.Info("start to prove task", "task-type", task.Task.Type, "task-id", task.Task.ID)
 		proofMsg, err = r.prove(task)
 		if err != nil { // handling error from prove
-			return fmt.Errorf("failed to prove task, task-type: %v, err: %v", task.Task.Type, err)
+			log.Error("failed to prove task", "task_type", task.Task.Type, "task-id", task.Task.ID, "err", err)
+			return r.submitErr(task, message.ProofFailureNoPanic, err)
 		}

 		return r.submitProof(proofMsg)
@@ -176,7 +170,11 @@ func (r *Prover) proveAndSubmit() error {

 	// when the prover has more than 3 times panic,
 	// it will omit to prove the task, submit StatusProofError and then Delete the task.
-	return fmt.Errorf("zk proving panic for task, task-type: %v, task-id: %v", task.Task.Type, task.Task.ID)
+	if err = r.stack.Delete(task.Task.ID); err != nil {
+		log.Error("prover stack pop failed", "task_type", task.Task.Type, "task_id", task.Task.ID, "err", err)
+	}
+	log.Error("zk proving panic for task", "task-type", task.Task.Type, "task-id", task.Task.ID)
+	return r.submitErr(task, message.ProofFailurePanic, errors.New("zk proving panic for task"))
 }

 // fetchTaskFromCoordinator fetches a new task from the server
@@ -336,6 +334,27 @@ func (r *Prover) submitProof(msg *message.ProofDetail) error {
 	return nil
 }

+func (r *Prover) submitErr(task *store.ProvingTask, proofFailureType message.ProofFailureType, err error) error {
+	// prepare the submit request
+	req := &client.SubmitProofRequest{
+		TaskID:      task.Task.ID,
+		TaskType:    int(task.Task.Type),
+		Status:      int(message.StatusProofError),
+		Proof:       "",
+		FailureType: int(proofFailureType),
+		FailureMsg:  err.Error(),
+	}
+
+	// send the submit request
+	if submitErr := r.coordinatorClient.SubmitProof(r.ctx, req); submitErr != nil {
+		return fmt.Errorf("error submitting proof: %v", submitErr)
+	}
+
+	log.Info("proof submitted report failure successfully", "task-id", task.Task.ID, "task-type",
+		task.Task.Type, "task-status", message.StatusProofError, "err", err)
+	return nil
+}
+
 func (r *Prover) getSortedTracesByHashes(blockHashes []common.Hash) ([]*types.BlockTrace, error) {
 	if len(blockHashes) == 0 {
 		return nil, fmt.Errorf("blockHashes is empty")
Author	SHA1	Message	Date
HAOYUatHZ	95121093c8	feat(prover): prover report err (#815 ) Co-authored-by: HAOYUatHZ <HAOYUatHZ@users.noreply.github.com> Co-authored-by: georgehao <haohongfan@gmail.com>	2023-08-17 21:19:08 +08:00
georgehao	b85a109fd3	feat(coordinator): when prover failure, recover status (#814 ) Co-authored-by: georgehao <georgehao@users.noreply.github.com>	2023-08-17 21:15:27 +08:00