Compare commits

..

2 Commits

Author SHA1 Message Date
HAOYUatHZ
95121093c8 feat(prover): prover report err (#815)
Co-authored-by: HAOYUatHZ <HAOYUatHZ@users.noreply.github.com>
Co-authored-by: georgehao <haohongfan@gmail.com>
2023-08-17 21:19:08 +08:00
georgehao
b85a109fd3 feat(coordinator): when prover failure, recover status (#814)
Co-authored-by: georgehao <georgehao@users.noreply.github.com>
2023-08-17 21:15:27 +08:00
8 changed files with 72 additions and 39 deletions

View File

@@ -13,6 +13,18 @@ import (
"github.com/scroll-tech/go-ethereum/rlp"
)
// ProofFailureType the proof failure type
type ProofFailureType int
const (
// ProofFailureUndefined the undefined type proof failure type
ProofFailureUndefined ProofFailureType = iota
// ProofFailurePanic proof failure for prover panic
ProofFailurePanic
// ProofFailureNoPanic proof failure for no prover panic
ProofFailureNoPanic
)
// RespStatus represents status code from prover to scroll
type RespStatus uint32

View File

@@ -6,7 +6,7 @@ import (
"strings"
)
var tag = "v4.1.61"
var tag = "v4.1.63"
var commit = func() string {
if info, ok := debug.ReadBuildInfo(); ok {

View File

@@ -64,7 +64,7 @@ func (spc *SubmitProofController) SubmitProof(ctx *gin.Context) {
proofMsg.BatchProof = &tmpBatchProof
}
if err := spc.submitProofReceiverLogic.HandleZkProof(ctx, &proofMsg); err != nil {
if err := spc.submitProofReceiverLogic.HandleZkProof(ctx, &proofMsg, spp); err != nil {
nerr := fmt.Errorf("handle zk proof failure, err:%w", err)
coodinatorType.RenderJSON(ctx, types.ErrCoordinatorHandleZkProofFailure, nerr, nil)
return

View File

@@ -121,7 +121,7 @@ func NewSubmitProofReceiverLogic(cfg *config.ProverManager, db *gorm.DB, reg pro
// HandleZkProof handle a ZkProof submitted from a prover.
// For now only proving/verifying error will lead to setting status as skipped.
// db/unmarshal errors will not because they are errors on the business logic side.
func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.ProofMsg) error {
func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.ProofMsg, proofParameter coordinatorType.SubmitProofParameter) error {
m.proofReceivedTotal.Inc()
pk := ctx.GetString(coordinatorType.PublicKey)
if len(pk) == 0 {
@@ -144,7 +144,7 @@ func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.P
log.Info("handling zk proof", "proof id", proofMsg.ID, "prover name", proverTask.ProverName,
"prover pk", pk, "prove type", proverTask.TaskType, "proof time", proofTimeSec)
if err = m.validator(ctx, proverTask, pk, proofMsg); err != nil {
if err = m.validator(ctx, proverTask, pk, proofMsg, proofParameter); err != nil {
return err
}
@@ -161,7 +161,7 @@ func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.P
if verifyErr != nil || !success {
m.verifierFailureTotal.WithLabelValues(proverVersion).Inc()
m.proofFailure(ctx, proofMsg.ID, pk, proofMsg)
m.proofRecover(ctx, proofMsg.ID, pk, proofMsg)
log.Info("proof verified by coordinator failed", "proof id", proofMsg.ID, "prover name", proverTask.ProverName,
"prover pk", pk, "prove type", proofMsg.Type, "proof time", proofTimeSec, "error", verifyErr)
@@ -205,7 +205,7 @@ func (m *ProofReceiverLogic) checkAreAllChunkProofsReady(ctx context.Context, ch
return nil
}
func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.ProverTask, pk string, proofMsg *message.ProofMsg) (err error) {
func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.ProverTask, pk string, proofMsg *message.ProofMsg, proofParameter coordinatorType.SubmitProofParameter) (err error) {
defer func() {
if err != nil {
m.validateFailureTotal.Inc()
@@ -232,14 +232,12 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
proofTimeSec := uint64(proofTime.Seconds())
if proofMsg.Status != message.StatusOk {
m.proofRecover(ctx, proofMsg.ID, pk, proofMsg)
m.validateFailureProverTaskStatusNotOk.Inc()
log.Info("proof generated by prover failed", "proof id", proofMsg.ID, "prover name", proverTask.ProverName,
"prover pk", pk, "prove type", proofMsg.Type, "error", proofMsg.Error)
if updateErr := m.proverTaskOrm.UpdateProverTaskProvingStatus(ctx, proofMsg.Type, proofMsg.ID, pk, types.ProverProofInvalid); updateErr != nil {
log.Error("proof generated by prover failed update prover task proving status failure", "proof id", proofMsg.ID,
"prover name", proverTask.ProverName, "prover pk", pk, "prove type", proofMsg.Type, "error", proofMsg.Error)
}
log.Info("proof generated by prover failed",
"prove type", proofMsg.Type, "proof id", proofMsg.ID,
"prover name", proverTask.ProverName, "prover version", proverTask.ProverVersion,
"prover pk", "failure type", proofParameter.FailureType, "failure message", proofParameter.FailureMsg)
return ErrValidatorFailureProofMsgStatusNotOk
}
@@ -267,14 +265,14 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
return nil
}
func (m *ProofReceiverLogic) proofFailure(ctx context.Context, hash string, pubKey string, proofMsg *message.ProofMsg) {
log.Info("proof failure update proof status", "hash", hash, "public key", pubKey,
"proof type", proofMsg.Type.String(), "status", types.ProvingTaskFailed.String())
if err := m.updateProofStatus(ctx, hash, pubKey, proofMsg, types.ProvingTaskFailed, 0); err != nil {
log.Error("failed to updated proof status ProvingTaskFailed", "hash", hash, "pubKey", pubKey, "error", err)
}
}
//func (m *ProofReceiverLogic) proofFailure(ctx context.Context, hash string, pubKey string, proofMsg *message.ProofMsg) {
// log.Info("proof failure update proof status", "hash", hash, "public key", pubKey,
// "proof type", proofMsg.Type.String(), "status", types.ProvingTaskFailed.String())
//
// if err := m.updateProofStatus(ctx, hash, pubKey, proofMsg, types.ProvingTaskFailed, 0); err != nil {
// log.Error("failed to updated proof status ProvingTaskFailed", "hash", hash, "pubKey", pubKey, "error", err)
// }
//}
func (m *ProofReceiverLogic) proofRecover(ctx context.Context, hash string, pubKey string, proofMsg *message.ProofMsg) {
log.Info("proof recover update proof status", "hash", hash, "public key", pubKey,

View File

@@ -2,8 +2,10 @@ package types
// SubmitProofParameter the SubmitProof api request parameter
type SubmitProofParameter struct {
TaskID string `form:"task_id" json:"task_id" binding:"required"`
TaskType int `form:"task_type" json:"task_type" binding:"required"`
Status int `form:"status" json:"status"`
Proof string `form:"proof" json:"proof"`
TaskID string `form:"task_id" json:"task_id" binding:"required"`
TaskType int `form:"task_type" json:"task_type" binding:"required"`
Status int `form:"status" json:"status"`
Proof string `form:"proof" json:"proof"`
FailureType int `form:"failure_type" json:"failure_type"`
FailureMsg string `form:"failure_msg" json:"failure_msg"`
}

View File

@@ -317,7 +317,7 @@ func testInvalidProof(t *testing.T) {
assert.NoError(t, err)
batchProofStatus, err = batchOrm.GetProvingStatusByHash(context.Background(), batch.Hash)
assert.NoError(t, err)
if chunkProofStatus == types.ProvingTaskFailed && batchProofStatus == types.ProvingTaskFailed {
if chunkProofStatus == types.ProvingTaskUnassigned && batchProofStatus == types.ProvingTaskUnassigned {
return
}
case <-tickStop:

View File

@@ -53,10 +53,12 @@ type GetTaskResponse struct {
// SubmitProofRequest defines the request structure for the SubmitProof API.
type SubmitProofRequest struct {
TaskID string `json:"task_id"`
TaskType int `json:"task_type"`
Status int `json:"status"`
Proof string `json:"proof"`
TaskID string `json:"task_id"`
TaskType int `json:"task_type"`
Status int `json:"status"`
Proof string `json:"proof"`
FailureType int `json:"failure_type,omitempty"`
FailureMsg string `json:"failure_msg,omitempty"`
}
// SubmitProofResponse defines the response structure for the SubmitProof API.

View File

@@ -151,13 +151,6 @@ func (r *Prover) proveAndSubmit() error {
}
}
defer func() {
err = r.stack.Delete(task.Task.ID)
if err != nil {
log.Error("prover stack pop failed!", "err", err)
}
}()
var proofMsg *message.ProofDetail
if task.Times <= 2 {
// If panic times <= 2, try to proof the task.
@@ -168,7 +161,8 @@ func (r *Prover) proveAndSubmit() error {
log.Info("start to prove task", "task-type", task.Task.Type, "task-id", task.Task.ID)
proofMsg, err = r.prove(task)
if err != nil { // handling error from prove
return fmt.Errorf("failed to prove task, task-type: %v, err: %v", task.Task.Type, err)
log.Error("failed to prove task", "task_type", task.Task.Type, "task-id", task.Task.ID, "err", err)
return r.submitErr(task, message.ProofFailureNoPanic, err)
}
return r.submitProof(proofMsg)
@@ -176,7 +170,11 @@ func (r *Prover) proveAndSubmit() error {
// when the prover has more than 3 times panic,
// it will omit to prove the task, submit StatusProofError and then Delete the task.
return fmt.Errorf("zk proving panic for task, task-type: %v, task-id: %v", task.Task.Type, task.Task.ID)
if err = r.stack.Delete(task.Task.ID); err != nil {
log.Error("prover stack pop failed", "task_type", task.Task.Type, "task_id", task.Task.ID, "err", err)
}
log.Error("zk proving panic for task", "task-type", task.Task.Type, "task-id", task.Task.ID)
return r.submitErr(task, message.ProofFailurePanic, errors.New("zk proving panic for task"))
}
// fetchTaskFromCoordinator fetches a new task from the server
@@ -336,6 +334,27 @@ func (r *Prover) submitProof(msg *message.ProofDetail) error {
return nil
}
func (r *Prover) submitErr(task *store.ProvingTask, proofFailureType message.ProofFailureType, err error) error {
// prepare the submit request
req := &client.SubmitProofRequest{
TaskID: task.Task.ID,
TaskType: int(task.Task.Type),
Status: int(message.StatusProofError),
Proof: "",
FailureType: int(proofFailureType),
FailureMsg: err.Error(),
}
// send the submit request
if submitErr := r.coordinatorClient.SubmitProof(r.ctx, req); submitErr != nil {
return fmt.Errorf("error submitting proof: %v", submitErr)
}
log.Info("proof submitted report failure successfully", "task-id", task.Task.ID, "task-type",
task.Task.Type, "task-status", message.StatusProofError, "err", err)
return nil
}
func (r *Prover) getSortedTracesByHashes(blockHashes []common.Hash) ([]*types.BlockTrace, error) {
if len(blockHashes) == 0 {
return nil, fmt.Errorf("blockHashes is empty")