mirror of
https://github.com/OffchainLabs/prysm.git
synced 2026-01-10 07:58:22 -05:00
Add eth1 balance monitoring alert (#2575)
* Add eth1 balance monitoring * lint * lint * priority * lint * use value in alerts * fix beacon-chain service * working on stability * more yaml * add more alerts to the finality alerts * add nother header to ignore * extend requirement time for low balance * remove old flag * remove extra flag * feedback to use consistent flag
This commit is contained in:
committed by
Raul Jordan
parent
632f6797cd
commit
40588021d4
@@ -56,7 +56,7 @@ spec:
|
||||
- --p2p-port=5000
|
||||
#- --enable-tracing
|
||||
- --tracing-process-name=$(POD_NAME)
|
||||
- --tracing-endpoint=http://jaeger-collector.istio-system.svc.cluster.local:14268
|
||||
- --tracing-endpoint=http://jaeger-collector.istio-system.svc.cluster.local:14268/api/traces
|
||||
- --trace-sample-fraction=1.0
|
||||
- --datadir=/data
|
||||
- --p2p-max-peers=50
|
||||
|
||||
@@ -9,7 +9,7 @@ spec:
|
||||
servers:
|
||||
- port:
|
||||
number: 30002
|
||||
name: grpc-beacon-chain
|
||||
name: grpc-beacon-chain
|
||||
protocol: GRPC
|
||||
hosts:
|
||||
- beacon.prylabs.net
|
||||
@@ -52,6 +52,25 @@ spec:
|
||||
port:
|
||||
number: 4000
|
||||
host: beacon-chain.beacon-chain.svc.cluster.local
|
||||
---
|
||||
kind: VirtualService
|
||||
apiVersion: networking.istio.io/v1alpha3
|
||||
metadata:
|
||||
name: beacon-chain
|
||||
namespace: beacon-chain
|
||||
spec:
|
||||
hosts:
|
||||
- beacon-chain.beacon-chain.svc.cluster.local
|
||||
gateways:
|
||||
- mesh
|
||||
http:
|
||||
- match:
|
||||
- port: 4000
|
||||
route:
|
||||
- destination:
|
||||
port:
|
||||
number: 4000
|
||||
host: beacon-chain.beacon-chain.svc.cluster.local
|
||||
---
|
||||
apiVersion: networking.istio.io/v1alpha3
|
||||
kind: DestinationRule
|
||||
|
||||
@@ -4,7 +4,7 @@ metadata:
|
||||
name: validator
|
||||
namespace: beacon-chain
|
||||
spec:
|
||||
replicas: 8
|
||||
replicas: 9
|
||||
selector:
|
||||
matchLabels:
|
||||
component: validator
|
||||
@@ -18,6 +18,9 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
component: validator
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
prometheus.io/port: '9090'
|
||||
spec:
|
||||
priorityClassName: production-priority
|
||||
containers:
|
||||
@@ -27,10 +30,11 @@ spec:
|
||||
- --keystore-path=/keystore
|
||||
- --password=nopass
|
||||
- --datadir=/data
|
||||
- --beacon-rpc-provider=beacon-chain:4000
|
||||
- --beacon-rpc-provider=beacon-chain.beacon-chain.svc.cluster.local:4000
|
||||
- --monitoring-port=9090
|
||||
- --enable-tracing
|
||||
- --tracing-process-name=$(POD_NAME)
|
||||
- --tracing-endpoint=http://jaeger-collector.istio-system.svc.cluster.local:14268
|
||||
- --tracing-endpoint=http://jaeger-collector.istio-system.svc.cluster.local:14268/api/traces
|
||||
- --trace-sample-fraction=1.0
|
||||
- --log-format=fluentd
|
||||
- --disable-rewards-penalties-logging
|
||||
|
||||
56
k8s/geth/eth1monitor.yaml
Normal file
56
k8s/geth/eth1monitor.yaml
Normal file
@@ -0,0 +1,56 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: eth1monitor
|
||||
namespace: pow
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: eth1monitor
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: eth1monitor
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
prometheus.io/port: '9090'
|
||||
spec:
|
||||
priorityClassName: monitoring-priority
|
||||
containers:
|
||||
- name: monitor
|
||||
image: gcr.io/prysmaticlabs/eth1monitor:latest
|
||||
args:
|
||||
- --port=9090
|
||||
- --addresses=/etc/config/addresses.txt
|
||||
- --web3-provider=http://public-rpc-nodes.pow.svc.cluster.local:8545
|
||||
volumeMounts:
|
||||
- name: addresses
|
||||
mountPath: /etc/config
|
||||
readOnly: true
|
||||
- name: configmap-reload
|
||||
image: jimmidyson/configmap-reload:v0.2.2
|
||||
args:
|
||||
- --volume-dir=/etc/config
|
||||
- --webhook-url=http://localhost:9090/reload
|
||||
volumeMounts:
|
||||
- name: addresses
|
||||
mountPath: /etc/config
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: addresses
|
||||
configMap:
|
||||
name: eth1-addresses
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: eth1-addresses
|
||||
namespace: pow
|
||||
data:
|
||||
addresses.txt: |
|
||||
faucet:0xae1f3f522cfb1b0ed128819e8e72fda207c47e5e
|
||||
pk-manager:0x1bcd2c7dd8d5ffd8b789d90c71ac9aab55e51d5d
|
||||
prylabs:0xd9a5179f091d85051d3c982785efd1455cec8699
|
||||
|
||||
@@ -13,18 +13,52 @@ data:
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: No block slots advanced in 2 minutes
|
||||
- alert: too_long_since_finality
|
||||
- alert: too_long_since_finality_10
|
||||
expr: max(state_last_slot / 8) - floor(max(state_last_finalized_epoch)) > 10
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: No finality in 10 epochs
|
||||
- alert: too_long_since_finality_25
|
||||
expr: max(state_last_slot / 8) - floor(max(state_last_finalized_epoch)) > 25
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: No finality in 25 epochs
|
||||
- alert: too_long_since_finality_100
|
||||
expr: max(state_last_slot / 8) - floor(max(state_last_finalized_epoch)) > 100
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: No finality in 100 epochs
|
||||
- alert: high_reorg_rate
|
||||
expr: max(delta(reorg_counter[10m])) > 5
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: Some nodes are seeing more than 5 reorgs in 10 minutes
|
||||
- alert: high_goroutines
|
||||
expr: max_over_time(go_goroutines{component="beacon-chain"}[1m]) > 1000
|
||||
expr: max_over_time(go_goroutines{component="beacon-chain"}[1m]) > 2000
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: Some nodes are experencing more than 1000 goroutines
|
||||
summary: Pod {{ $labels.pod_name}} experencing more than 2000 goroutines
|
||||
description: "{{ $labels.pod_name }} has {{ $value }} goroutines"
|
||||
- alert: low_disk_space_1gb
|
||||
expr: kubelet_volume_stats_available_bytes < 1e9
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: Pod has less than 1GB free disk space in their persistent disk.
|
||||
- alert: balance_too_low
|
||||
expr: eth_balance < 200
|
||||
for: 15m
|
||||
annotations:
|
||||
summary: ETH1 wallet has less than 200 ETH
|
||||
description: "{{ $labels.name }} ({{ $labels.address }}) has {{ $value }} ETH"
|
||||
- alert: high_grpc_beacon_chain_error_rate
|
||||
expr: sum(rate(istio_requests_total{reporter="destination",destination_service=~"beacon-chain.beacon-chain.svc.cluster.local",response_code!~"5.*"}[5m])) / sum(rate(istio_requests_total{reporter="destination",destination_service=~"beacon-chain.beacon-chain.svc.cluster.local"}[5m])) < 0.99
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: Beacon node global success rate is less than 99%.
|
||||
description: Success rate = {{ $value }}
|
||||
- alert: high_grpc_beacon_chain_method_error_rate
|
||||
expr: sum(rate(grpc_server_handled_total{component="beacon-chain",grpc_code="OK"}[5m])) by (grpc_method) / sum(rate(grpc_server_handled_total{component="beacon-chain"}[5m])) by (grpc_method) < 0.95
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: Beacon chain gRPC method success rate is less than 95%.
|
||||
description: "{{ $labels.grpc_method }} {{ $value }}"
|
||||
|
||||
18
k8s/monitoring/jaeger-collector.virtualservice.yaml
Normal file
18
k8s/monitoring/jaeger-collector.virtualservice.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
apiVersion: networking.istio.io/v1alpha3
|
||||
kind: VirtualService
|
||||
metadata:
|
||||
name: jaeger-collector-virtual-service
|
||||
namespace: istio-system
|
||||
spec:
|
||||
hosts:
|
||||
- jaeger-collector.istio-system.svc.cluster.local
|
||||
gateways:
|
||||
- mesh
|
||||
http:
|
||||
- match:
|
||||
- port: 14268
|
||||
route:
|
||||
- destination:
|
||||
host: jaeger-collector.istio-system.svc.cluster.local
|
||||
port:
|
||||
number: 14268
|
||||
39
k8s/x_headers_rule.yaml
Normal file
39
k8s/x_headers_rule.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
apiVersion: config.istio.io/v1alpha2
|
||||
kind: rule
|
||||
metadata:
|
||||
name: x-headers
|
||||
namespace: istio-system
|
||||
spec:
|
||||
match: destination.name != "unknown"
|
||||
responseHeaderOperations:
|
||||
- name: x-backend
|
||||
values: [ destination.name ]
|
||||
operation: APPEND
|
||||
- name: x-source
|
||||
values: [ source.workload.name ]
|
||||
operation: APPEND
|
||||
---
|
||||
apiVersion: config.istio.io/v1alpha2
|
||||
kind: rule
|
||||
metadata:
|
||||
name: x-headers-public
|
||||
namespace: istio-system
|
||||
spec:
|
||||
match: source.workload.name == "istio-ingressgateway"
|
||||
responseHeaderOperations:
|
||||
- name: x-backend
|
||||
operation: REMOVE
|
||||
- name: x-source
|
||||
operation: REMOVE
|
||||
- name: x-envoy-upstream-service-time
|
||||
operation: REMOVE
|
||||
- name: server
|
||||
operation: REMOVE
|
||||
- name: grpc-metadata-x-backend
|
||||
operation: REMOVE
|
||||
- name: grpc-metadata-x-source
|
||||
operation: REMOVE
|
||||
- name: grpc-metadata-x-envoy-upstream-service-time
|
||||
operation: REMOVE
|
||||
- name: grpc-metadata-server
|
||||
operation: REMOVE
|
||||
47
tools/eth1exporter/BUILD.bazel
Normal file
47
tools/eth1exporter/BUILD.bazel
Normal file
@@ -0,0 +1,47 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
|
||||
load("@io_bazel_rules_docker//go:image.bzl", "go_image")
|
||||
load("@io_bazel_rules_docker//container:container.bzl", "container_push")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["main.go"],
|
||||
importpath = "github.com/prysmaticlabs/prysm/tools/eth1exporter",
|
||||
visibility = ["//visibility:private"],
|
||||
deps = [
|
||||
"@com_github_ethereum_go_ethereum//common:go_default_library",
|
||||
"@com_github_ethereum_go_ethereum//ethclient:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
go_binary(
|
||||
name = "eth1exporter",
|
||||
embed = [":go_default_library"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
go_image(
|
||||
name = "image",
|
||||
srcs = ["main.go"],
|
||||
goarch = "amd64",
|
||||
goos = "linux",
|
||||
importpath = "github.com/prysmaticlabs/prysm/tools/eth1exporter",
|
||||
race = "off",
|
||||
static = "off",
|
||||
tags = ["manual"],
|
||||
visibility = ["//visibility:private"],
|
||||
deps = [
|
||||
"@com_github_ethereum_go_ethereum//common:go_default_library",
|
||||
"@com_github_ethereum_go_ethereum//ethclient:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
container_push(
|
||||
name = "push_image",
|
||||
format = "Docker",
|
||||
image = ":image",
|
||||
registry = "gcr.io",
|
||||
repository = "prysmaticlabs/eth1monitor",
|
||||
tag = "latest",
|
||||
tags = ["manual"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
173
tools/eth1exporter/main.go
Normal file
173
tools/eth1exporter/main.go
Normal file
@@ -0,0 +1,173 @@
|
||||
// Prometheus exporter for Ethereum address balances.
|
||||
// Forked from https://github.com/hunterlong/ethexporter
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"math/big"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/ethclient"
|
||||
)
|
||||
|
||||
var (
|
||||
allWatching []*Watching
|
||||
loadSeconds float64
|
||||
totalLoaded int64
|
||||
eth *ethclient.Client
|
||||
)
|
||||
|
||||
var (
|
||||
port = flag.Int("port", 9090, "Port to serve /metrics")
|
||||
web3URL = flag.String("web3-provider", "https://goerli.prylabs.net", "Web3 URL to access information about ETH1")
|
||||
prefix = flag.String("prefix", "", "Metrics prefix.")
|
||||
addressFilePath = flag.String("addresses", "", "File path to addresses text file.")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if *addressFilePath == "" {
|
||||
log.Println("--addresses is required")
|
||||
return
|
||||
}
|
||||
|
||||
err := OpenAddresses(*addressFilePath)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
err = ConnectionToGeth(*web3URL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// check address balances
|
||||
go func() {
|
||||
for {
|
||||
totalLoaded = 0
|
||||
t1 := time.Now()
|
||||
fmt.Printf("Checking %v wallets...\n", len(allWatching))
|
||||
for _, v := range allWatching {
|
||||
v.Balance = GetEthBalance(v.Address).String()
|
||||
totalLoaded++
|
||||
}
|
||||
t2 := time.Now()
|
||||
loadSeconds = t2.Sub(t1).Seconds()
|
||||
fmt.Printf("Finished checking %v wallets in %0.0f seconds, sleeping for %v seconds.\n", len(allWatching), loadSeconds, 15)
|
||||
time.Sleep(15 * time.Second)
|
||||
}
|
||||
}()
|
||||
|
||||
block := CurrentBlock()
|
||||
|
||||
fmt.Printf("ETHexporter has started on port %v using web3 server: %v at block #%v\n", *port, *web3URL, block)
|
||||
|
||||
http.HandleFunc("/metrics", MetricsHTTP)
|
||||
http.HandleFunc("/reload", ReloadHTTP)
|
||||
panic(http.ListenAndServe(fmt.Sprintf("0.0.0.0:%d", *port), nil))
|
||||
}
|
||||
|
||||
// Watching address wrapper
|
||||
type Watching struct {
|
||||
Name string
|
||||
Address string
|
||||
Balance string
|
||||
}
|
||||
|
||||
// ConnectionToGeth - Connect to remote server.
|
||||
func ConnectionToGeth(url string) error {
|
||||
var err error
|
||||
eth, err = ethclient.Dial(url)
|
||||
return err
|
||||
}
|
||||
|
||||
// GetEthBalance from remote server.
|
||||
func GetEthBalance(address string) *big.Float {
|
||||
balance, err := eth.BalanceAt(context.TODO(), common.HexToAddress(address), nil)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching ETH Balance for address: %v\n", address)
|
||||
}
|
||||
return ToEther(balance)
|
||||
}
|
||||
|
||||
// CurrentBlock in ETH1.
|
||||
func CurrentBlock() uint64 {
|
||||
block, err := eth.BlockByNumber(context.TODO(), nil)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching current block height: %v\n", err)
|
||||
return 0
|
||||
}
|
||||
return block.NumberU64()
|
||||
}
|
||||
|
||||
// ToEther from Wei.
|
||||
func ToEther(o *big.Int) *big.Float {
|
||||
pul, int := big.NewFloat(0), big.NewFloat(0)
|
||||
int.SetInt(o)
|
||||
pul.Mul(big.NewFloat(0.000000000000000001), int)
|
||||
return pul
|
||||
}
|
||||
|
||||
// MetricsHTTP - HTTP response handler for /metrics.
|
||||
func MetricsHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
var allOut []string
|
||||
total := big.NewFloat(0)
|
||||
for _, v := range allWatching {
|
||||
if v.Balance == "" {
|
||||
v.Balance = "0"
|
||||
}
|
||||
bal := big.NewFloat(0)
|
||||
bal.SetString(v.Balance)
|
||||
total.Add(total, bal)
|
||||
allOut = append(allOut, fmt.Sprintf("%veth_balance{name=\"%v\",address=\"%v\"} %v", *prefix, v.Name, v.Address, v.Balance))
|
||||
}
|
||||
allOut = append(allOut, fmt.Sprintf("%veth_balance_total %0.18f", *prefix, total))
|
||||
allOut = append(allOut, fmt.Sprintf("%veth_load_seconds %0.2f", *prefix, loadSeconds))
|
||||
allOut = append(allOut, fmt.Sprintf("%veth_loaded_addresses %v", *prefix, totalLoaded))
|
||||
allOut = append(allOut, fmt.Sprintf("%veth_total_addresses %v", *prefix, len(allWatching)))
|
||||
fmt.Fprintln(w, strings.Join(allOut, "\n"))
|
||||
}
|
||||
|
||||
// ReloadHTTP reloads the addresses from disk.
|
||||
func ReloadHTTP(w http.ResponseWriter, _ *http.Request) {
|
||||
if err := OpenAddresses(*addressFilePath); err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
log.Println("Reloaded addresses")
|
||||
}
|
||||
|
||||
// OpenAddresses from text file (name:address)
|
||||
func OpenAddresses(filename string) error {
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
scanner := bufio.NewScanner(file)
|
||||
allWatching = []*Watching{}
|
||||
for scanner.Scan() {
|
||||
object := strings.Split(scanner.Text(), ":")
|
||||
if common.IsHexAddress(object[1]) {
|
||||
w := &Watching{
|
||||
Name: object[0],
|
||||
Address: object[1],
|
||||
}
|
||||
allWatching = append(allWatching, w)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -43,7 +43,7 @@ go_image(
|
||||
importpath = IMPORT_PATH,
|
||||
deps = DEPS,
|
||||
pure = "off", # depends on cgo for go-ethereum crypto
|
||||
static = "on",
|
||||
static = "off", # go-ethereum is bad about static
|
||||
tags = ["manual"],
|
||||
goarch = "amd64",
|
||||
goos = "linux",
|
||||
|
||||
Reference in New Issue
Block a user