From cca4a8c821ea035dfa2782046944eb397c044da9 Mon Sep 17 00:00:00 2001 From: Mohamed Zahoor Date: Tue, 6 Jul 2021 20:41:11 +0530 Subject: [PATCH] exploredb tool to display bucket contents (#9116) * add state bucket content display test case * save work * state db disection methods * fix go.mod * gazel fix * pass the testcase if the db is not present. for CI * get db path from env. optimize code * update deps.bzl * go mod tidy * revery deps.bzl * moved bucket content inspection to exploredb tool * satisfy deepsource and tidy go.mod Co-authored-by: Raul Jordan --- go.mod | 2 +- tools/exploredb/BUILD.bazel | 5 + tools/exploredb/main.go | 284 ++++++++++++++++++++++++++++++------ 3 files changed, 242 insertions(+), 49 deletions(-) diff --git a/go.mod b/go.mod index c9dc387289..3567d32d00 100644 --- a/go.mod +++ b/go.mod @@ -95,7 +95,7 @@ require ( github.com/rs/cors v1.7.0 github.com/schollz/progressbar/v3 v3.3.4 github.com/sirupsen/logrus v1.6.0 - github.com/status-im/keycard-go v0.0.0-20200402102358-957c09536969 // indirect + github.com/status-im/keycard-go v0.0.0-20200402102358-957c09536969 github.com/stretchr/testify v1.7.0 github.com/supranational/blst v0.3.4 github.com/thomaso-mirodin/intmath v0.0.0-20160323211736-5dc6d854e46e diff --git a/tools/exploredb/BUILD.bazel b/tools/exploredb/BUILD.bazel index f5872c51f8..cd057c0b94 100644 --- a/tools/exploredb/BUILD.bazel +++ b/tools/exploredb/BUILD.bazel @@ -7,8 +7,13 @@ go_library( importpath = "github.com/prysmaticlabs/prysm/tools/exploredb", visibility = ["//visibility:private"], deps = [ + "//beacon-chain/db/kv:go_default_library", + "//proto/beacon/p2p/v1:go_default_library", + "//proto/eth/v1alpha1:go_default_library", + "//shared/bytesutil:go_default_library", "@com_github_dustin_go_humanize//:go_default_library", "@com_github_sirupsen_logrus//:go_default_library", + "@com_github_status_im_keycard_go//hexutils:go_default_library", "@io_etcd_go_bbolt//:go_default_library", ], ) diff --git a/tools/exploredb/main.go b/tools/exploredb/main.go index d70b2b6370..84ab91eb52 100644 --- a/tools/exploredb/main.go +++ b/tools/exploredb/main.go @@ -9,6 +9,7 @@ package main import ( + "context" "flag" "fmt" "os" @@ -16,16 +17,22 @@ import ( "time" "github.com/dustin/go-humanize" + "github.com/prysmaticlabs/prysm/beacon-chain/db/kv" + pbp2p "github.com/prysmaticlabs/prysm/proto/beacon/p2p/v1" + ethpb "github.com/prysmaticlabs/prysm/proto/eth/v1alpha1" + "github.com/prysmaticlabs/prysm/shared/bytesutil" log "github.com/sirupsen/logrus" + "github.com/status-im/keycard-go/hexutils" bolt "go.etcd.io/bbolt" ) var ( datadir = flag.String("datadir", "", "Path to data directory.") dbName = flag.String("dbname", "", "database name.") - bucketStats = flag.Bool("bucket-stats", true, "Show all the bucket stats.") - bucketContents = flag.String("bucket-contents", "", "Show contents of a given bucket.") - bucketName = flag.String("bucket", "", "contents of the bucket to show.") + bucketStats = flag.Bool("bucket-stats", false, "Show all the bucket stats.") + bucketContents = flag.Bool("bucket-contents", false, "Show contents of a given bucket.") + bucketName = flag.String("bucket-name", "", "bucket to show contents.") + rowLimit = flag.Uint64("limit", 10, "limit to rows.") ) func main() { @@ -33,55 +40,64 @@ func main() { // Check for the mandatory flags. if *datadir == "" { - log.Fatal("Please specify --datadir to read the database") + log.Fatal("Please specify --datadir to read the database") } if *dbName == "" { - log.Fatal("Please specify --dbname to specify the database file.") + log.Fatal("Please specify --dbname to specify the database file.") } - if !*bucketStats { - if *bucketContents == "" { - log.Fatal("Please specify either --bucket-stats or --bucket-contents") - } else if *bucketName == "" { - log.Fatal("Please specify --bucket for which to show the contents ") - } - } - - // check if the database file is present - dbName := filepath.Join(*datadir, *dbName) + // check if the database file is present. + dbNameWithPath := filepath.Join(*datadir, *dbName) if _, err := os.Stat(*datadir); os.IsNotExist(err) { - log.Fatalf("database file is not present, %v", err) - } - - // open the beacon-chain database - // if some other process has the file lock, it will quit after a second - db, err := bolt.Open(dbName, 0600, &bolt.Options{Timeout: 1 * time.Second}) - if err != nil { - log.Fatalf("could not open db, %v", err) - } - defer func() { - err := db.Close() - log.Fatalf("could not close database file, %v", err) - }() - - // get a list of all the existing buckets - buckets := make(map[string]*bolt.Bucket) - if viewErr := db.View(func(tx *bolt.Tx) error { - return tx.ForEach(func(name []byte, buc *bolt.Bucket) error { - buckets[string(name)] = buc - return nil - }) - }); viewErr != nil { - log.Fatalf("could not read buckets from db: %v", viewErr) + log.Fatalf("could not locate database file : %s, %v", dbNameWithPath, err) } + // show stats of all the buckets. if *bucketStats { - showBucketStats(db, buckets) + showBucketStats(dbNameWithPath) + return + } + + // show teh contents of the specified bucket. + if *bucketContents { + switch *bucketName { + case "state", "state-summary": + printBucketContents(dbNameWithPath, *rowLimit, *bucketName) + default: + log.Fatal("Oops, Only 'state' and 'state-summary' buckets are supported for now.") + } } } -func showBucketStats(db *bolt.DB, buckets map[string]*bolt.Bucket) { - for bName := range buckets { +func showBucketStats(dbNameWithPath string) { + // open the raw database file. If the file is busy, then exit. + db, openErr := bolt.Open(dbNameWithPath, 0600, &bolt.Options{Timeout: 1 * time.Second}) + if openErr != nil { + log.Fatalf("could not open db to show bucket stats, %v", openErr) + } + + // make sure we close the database before ejecting out of this function. + defer func() { + closeErr := db.Close() + if closeErr != nil { + log.Fatalf("could not close db after showing bucket stats, %v", closeErr) + } + }() + + // get a list of all the existing buckets. + var buckets []string + if viewErr1 := db.View(func(tx *bolt.Tx) error { + return tx.ForEach(func(name []byte, buc *bolt.Bucket) error { + buckets = append(buckets, string(name)) + return nil + }) + }); viewErr1 != nil { + log.Fatalf("could not read buckets from db while getting list of buckets: %v", viewErr1) + } + + // for every bucket, calculate the stats and display them. + // TODO: parallelize the execution + for _, bName := range buckets { count := uint64(0) minValueSize := ^uint64(0) maxValueSize := uint64(0) @@ -89,9 +105,9 @@ func showBucketStats(db *bolt.DB, buckets map[string]*bolt.Bucket) { minKeySize := ^uint64(0) maxKeySize := uint64(0) totalKeySize := uint64(0) - if err := db.View(func(tx *bolt.Tx) error { + if viewErr2 := db.View(func(tx *bolt.Tx) error { b := tx.Bucket([]byte(bName)) - if err := b.ForEach(func(k, v []byte) error { + if forEachErr := b.ForEach(func(k, v []byte) error { count++ valueSize := uint64(len(v)) if valueSize < minValueSize { @@ -111,13 +127,13 @@ func showBucketStats(db *bolt.DB, buckets map[string]*bolt.Bucket) { } totalKeySize += uint64(len(k)) return nil - }); err != nil { - log.Errorf("could not process row %d for bucket: %s, %v", count, bName, err) - return err + }); forEachErr != nil { + log.Errorf("could not process row %d for bucket: %s, %v", count, bName, forEachErr) + return forEachErr } return nil - }); err != nil { - log.Errorf("could not get stats for bucket: %s, %v", bName, err) + }); viewErr2 != nil { + log.Errorf("could not get stats for bucket: %s, %v", bName, viewErr2) continue } @@ -132,3 +148,175 @@ func showBucketStats(db *bolt.DB, buckets map[string]*bolt.Bucket) { } } } + +func printBucketContents(dbNameWithPath string, rowLimit uint64, bucketName string) { + // get the keys within the supplied limit for the given bucket. + bucketNameInBytes := []byte(bucketName) + keys, sizes := keysOfBucket(dbNameWithPath, bucketNameInBytes, rowLimit) + + // create a new KV Store. + dbDirectory := filepath.Dir(dbNameWithPath) + db, openErr := kv.NewKVStore(context.Background(), dbDirectory, &kv.Config{}) + if openErr != nil { + log.Fatalf("could not open db, %v", openErr) + } + + // dont forget to close it when ejecting out of this function. + defer func() { + closeErr := db.Close() + if closeErr != nil { + log.Fatalf("could not close db, %v", closeErr) + } + }() + + // retrieve every element for keys in the list and call the respective display function. + ctx := context.Background() + rowCount := uint64(0) + for index, key := range keys { + switch bucketName { + case "state": + printState(ctx, db, key, rowCount, sizes[index]) + case "state-summary": + printStateSummary(ctx, db, key, rowCount) + } + rowCount++ + } +} + +func printState(ctx context.Context, db *kv.Store, key []byte, rowCount, valueSize uint64) { + st, stateErr := db.State(ctx, bytesutil.ToBytes32(key)) + if stateErr != nil { + log.Errorf("could not get state for key : , %v", stateErr) + } + rowStr := fmt.Sprintf("---- row = %04d ----", rowCount) + fmt.Println(rowStr) + fmt.Println("key :", key) + fmt.Println("value : compressed size = ", humanize.Bytes(valueSize)) + fmt.Println("genesis_time :", st.GenesisTime()) + fmt.Println("genesis_validators_root :", hexutils.BytesToHex(st.GenesisValidatorRoot())) + fmt.Println("slot :", st.Slot()) + fmt.Println("fork : previous_version: ", st.Fork().PreviousVersion, ", current_version: ", st.Fork().CurrentVersion) + fmt.Println("latest_block_header : sizeSSZ = ", humanize.Bytes(uint64(st.LatestBlockHeader().SizeSSZ()))) + size, count := sizeAndCountOfByteList(st.BlockRoots()) + fmt.Println("block_roots : size = ", humanize.Bytes(size), ", count = ", count) + size, count = sizeAndCountOfByteList(st.StateRoots()) + fmt.Println("state_roots : size = ", humanize.Bytes(size), ", count = ", count) + size, count = sizeAndCountOfByteList(st.HistoricalRoots()) + fmt.Println("historical_roots : size = ", humanize.Bytes(size), ", count = ", count) + fmt.Println("eth1_data : sizeSSZ = ", humanize.Bytes(uint64(st.Eth1Data().SizeSSZ()))) + size, count = sizeAndCountGeneric(st.Eth1DataVotes(), nil) + fmt.Println("eth1_data_votes : sizeSSZ = ", humanize.Bytes(size), ", count = ", count) + fmt.Println("eth1_deposit_index :", st.Eth1DepositIndex()) + size, count = sizeAndCountGeneric(st.Validators(), nil) + fmt.Println("validators : sizeSSZ = ", humanize.Bytes(size), ", count = ", count) + size, count = sizeAndCountOfUin64List(st.Balances()) + fmt.Println("balances : size = ", humanize.Bytes(size), ", count = ", count) + size, count = sizeAndCountOfByteList(st.RandaoMixes()) + fmt.Println("randao_mixes : size = ", humanize.Bytes(size), ", count = ", count) + size, count = sizeAndCountOfUin64List(st.Slashings()) + fmt.Println("slashings : size = ", humanize.Bytes(size), ", count = ", count) + size, count = sizeAndCountGeneric(st.PreviousEpochAttestations()) + fmt.Println("previous_epoch_attestations : sizeSSZ ", humanize.Bytes(size), ", count = ", count) + size, count = sizeAndCountGeneric(st.CurrentEpochAttestations()) + fmt.Println("current_epoch_attestations : sizeSSZ = ", humanize.Bytes(size), ", count = ", count) + fmt.Println("justification_bits : size = ", humanize.Bytes(st.JustificationBits().Len()), ", count = ", st.JustificationBits().Count()) + fmt.Println("previous_justified_checkpoint : sizeSSZ = ", humanize.Bytes(uint64(st.PreviousJustifiedCheckpoint().SizeSSZ()))) + fmt.Println("current_justified_checkpoint : sizeSSZ = ", humanize.Bytes(uint64(st.CurrentJustifiedCheckpoint().SizeSSZ()))) + fmt.Println("finalized_checkpoint : sizeSSZ = ", humanize.Bytes(uint64(st.FinalizedCheckpoint().SizeSSZ()))) +} + +func printStateSummary(ctx context.Context, db *kv.Store, key []byte, rowCount uint64) { + ss, ssErr := db.StateSummary(ctx, bytesutil.ToBytes32(key)) + if ssErr != nil { + log.Errorf("could not get state summary for key : , %v", ssErr) + } + rowCountStr := fmt.Sprintf("row : %04d, ", rowCount) + fmt.Println(rowCountStr, "slot : ", ss.Slot, ", root : ", hexutils.BytesToHex(ss.Root)) +} + +func keysOfBucket(dbNameWithPath string, bucketName []byte, rowLimit uint64) ([][]byte, []uint64) { + // open the raw database file. If the file is busy, then exit. + db, openErr := bolt.Open(dbNameWithPath, 0600, &bolt.Options{Timeout: 1 * time.Second}) + if openErr != nil { + log.Fatalf("could not open db while getting keys of a bucket, %v", openErr) + } + + // make sure we close the database before ejecting out of this function. + defer func() { + closeErr := db.Close() + if closeErr != nil { + log.Fatalf("could not close db while getting keys of a bucket, %v", closeErr) + } + }() + + // get all the keys of the given bucket. + var keys [][]byte + var sizes []uint64 + if viewErr := db.View(func(tx *bolt.Tx) error { + b := tx.Bucket(bucketName) + c := b.Cursor() + count := uint64(0) + for k, v := c.First(); k != nil; k, v = c.Next() { + if count >= rowLimit { + return nil + } + keys = append(keys, k) + sizes = append(sizes, uint64(len(v))) + count++ + } + return nil + }); viewErr != nil { + log.Fatalf("could not read keys of bucket from db: %v", viewErr) + } + return keys, sizes +} + +func sizeAndCountOfByteList(list [][]byte) (uint64, uint64) { + size := uint64(0) + count := uint64(0) + for _, root := range list { + size += uint64(len(root)) + count += 1 + } + return size, count +} + +func sizeAndCountOfUin64List(list []uint64) (uint64, uint64) { + size := uint64(0) + count := uint64(0) + for i := 0; i < len(list); i++ { + size += uint64(8) + count += 1 + } + return size, count +} + +func sizeAndCountGeneric(genericItems interface{}, err error) (uint64, uint64) { + size := uint64(0) + count := uint64(0) + if err != nil { + return size, count + } + + switch items := genericItems.(type) { + case []*ethpb.Eth1Data: + for _, item := range items { + size += uint64(item.SizeSSZ()) + } + count = uint64(len(items)) + case []*ethpb.Validator: + for _, item := range items { + size += uint64(item.SizeSSZ()) + } + count = uint64(len(items)) + case []*pbp2p.PendingAttestation: + for _, item := range items { + size += uint64(item.SizeSSZ()) + } + count = uint64(len(items)) + default: + return 0, 0 + } + + return size, count +}