diff --git a/k8s/monitoring/alertmanager-templates.config.yaml b/k8s/monitoring/alertmanager-templates.config.yaml
new file mode 100644
index 0000000000..ea082230d1
--- /dev/null
+++ b/k8s/monitoring/alertmanager-templates.config.yaml
@@ -0,0 +1,177 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ creationTimestamp: null
+ name: alertmanager-templates
+ namespace: istio-system
+data:
+ default.tmpl: |
+ {{ define "__alertmanager" }}AlertManager{{ end }}
+ {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }}
+ {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
+ {{ define "__description" }}{{ end }}
+ {{ define "__text_alert_list" }}{{ range . }}Labels:
+ {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }}
+ {{ end }}Annotations:
+ {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }}
+ {{ end }}Source: {{ .GeneratorURL }}
+ {{ end }}{{ end }}
+ {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }}
+ {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }}
+ {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }}
+ {{ define "slack.default.pretext" }}{{ end }}
+ {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }}
+ {{ define "slack.default.iconemoji" }}{{ end }}
+ {{ define "slack.default.iconurl" }}{{ end }}
+ {{ define "slack.default.text" }}{{ end }}
+ {{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }}
+ {{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }}
+ {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }}
+ {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }}
+ {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }}
+ {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }}
+ {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }}
+ {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }}
+ {{ if gt (len .Alerts.Firing) 0 -}}
+ Alerts Firing:
+ {{ template "__text_alert_list" .Alerts.Firing }}
+ {{- end }}
+ {{ if gt (len .Alerts.Resolved) 0 -}}
+ Alerts Resolved:
+ {{ template "__text_alert_list" .Alerts.Resolved }}
+ {{- end }}
+ {{- end }}
+ {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }}
+ {{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }}
+ {{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }}
+ {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }}
+ {{ define "email.default.html" }}
+
+
+
+
+
+
+ {{ template "__subject" . }}
+
+
+
+
+ |
+
+
+
+
+ |
+ {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }}
+ {{ .Name }}={{ .Value }}
+ {{ end }}
+ |
+
+
+
+
+
+ |
+ View in {{ template "__alertmanager" . }}
+ |
+
+ {{ if gt (len .Alerts.Firing) 0 }}
+
+ |
+ [{{ .Alerts.Firing | len }}] Firing
+ |
+
+ {{ end }}
+ {{ range .Alerts.Firing }}
+
+
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }} {{ end }}
+ {{ if gt (len .Annotations) 0 }}Annotations {{ end }}
+ {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }} {{ end }}
+ Source
+ |
+
+ {{ end }}
+ {{ if gt (len .Alerts.Resolved) 0 }}
+ {{ if gt (len .Alerts.Firing) 0 }}
+
+
+
+
+
+ |
+
+ {{ end }}
+
+ |
+ [{{ .Alerts.Resolved | len }}] Resolved
+ |
+
+ {{ end }}
+ {{ range .Alerts.Resolved }}
+
+
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }} {{ end }}
+ {{ if gt (len .Annotations) 0 }}Annotations {{ end }}
+ {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }} {{ end }}
+ Source
+ |
+
+ {{ end }}
+
+ |
+
+
+
+ |
+ |
+
+
+
+
+ {{ end }}
+ {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }}
+ {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }}
+ {{ if gt (len .Alerts.Firing) 0 }}
+ Alerts Firing:
+ {{ template "__text_alert_list" .Alerts.Firing }}
+ {{ end }}
+ {{ if gt (len .Alerts.Resolved) 0 }}
+ Alerts Resolved:
+ {{ template "__text_alert_list" .Alerts.Resolved }}
+ {{ end }}
+ {{ end }}
+ {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }}
+ slack.tmpl: |
+ {{ define "slack.devops.text" }}
+ {{range .Alerts}}{{.Annotations.DESCRIPTION}}
+ {{end}}
+ {{ end }}
diff --git a/k8s/monitoring/alertmanager.config.yaml b/k8s/monitoring/alertmanager.config.yaml
new file mode 100644
index 0000000000..a523cae438
--- /dev/null
+++ b/k8s/monitoring/alertmanager.config.yaml
@@ -0,0 +1,19 @@
+kind: ConfigMap
+apiVersion: v1
+metadata:
+ name: alertmanager-config
+ namespace: istio-system
+data:
+ config.yml: |-
+ global:
+ templates:
+ - '/etc/alertmanager/*.tmpl'
+ route:
+ receiver: discord
+ group_by: ['alertname', 'priority']
+ group_wait: 10s
+ repeat_interval: 30m
+ receivers:
+ - name: discord
+ webhook_configs:
+ - url: http://alertmanager-discord:9094
diff --git a/k8s/monitoring/alertmanager.deploy.yaml b/k8s/monitoring/alertmanager.deploy.yaml
new file mode 100644
index 0000000000..cc250d5917
--- /dev/null
+++ b/k8s/monitoring/alertmanager.deploy.yaml
@@ -0,0 +1,73 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: alertmanager
+ namespace: istio-system
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: alertmanager
+ template:
+ metadata:
+ name: alertmanager
+ labels:
+ app: alertmanager
+ spec:
+ priorityClassName: monitoring-priority
+ containers:
+ - name: alertmanager
+ image: prom/alertmanager:latest
+ args:
+ - "--config.file=/etc/alertmanager/config.yml"
+ - "--storage.path=/alertmanager"
+ - "--log.level=debug"
+ ports:
+ - name: alertmanager
+ containerPort: 9093
+ volumeMounts:
+ - name: config-volume
+ mountPath: /etc/alertmanager
+ - name: templates-volume
+ mountPath: /etc/alertmanager-templates
+ - name: alertmanager
+ mountPath: /alertmanager
+ volumes:
+ - name: config-volume
+ configMap:
+ name: alertmanager-config
+ - name: templates-volume
+ configMap:
+ name: alertmanager-templates
+ - name: alertmanager
+ emptyDir: {}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: alertmanager-discord
+ namespace: istio-system
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: alertmanager-discord
+ template:
+ metadata:
+ name: alertmanager-discord
+ labels:
+ app: alertmanager-discord
+ spec:
+ priorityClassName: monitoring-priority
+ containers:
+ - name: alertmanager-discord
+ image: gcr.io/prysmaticlabs/alertmanager-discord:latest
+ ports:
+ - name: webhook
+ containerPort: 9094
+ env:
+ - name: DISCORD_WEBHOOK
+ valueFrom:
+ secretKeyRef:
+ name: discord-webhook
+ key: url
diff --git a/k8s/monitoring/alertmanager.service.yaml b/k8s/monitoring/alertmanager.service.yaml
new file mode 100644
index 0000000000..a7d4b959ff
--- /dev/null
+++ b/k8s/monitoring/alertmanager.service.yaml
@@ -0,0 +1,29 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: alertmanager
+ namespace: istio-system
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/path: /
+ prometheus.io/port: '8080'
+spec:
+ selector:
+ app: alertmanager
+ type: ClusterIP
+ ports:
+ - port: 9093
+ targetPort: 9093
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: alertmanager-discord
+ namespace: istio-system
+spec:
+ selector:
+ app: alertmanager-discord
+ type: ClusterIP
+ ports:
+ - port: 9094
+ targetPort: 9094
diff --git a/k8s/monitoring/alerts.yaml b/k8s/monitoring/alerts.yaml
new file mode 100644
index 0000000000..64f2ee2a8e
--- /dev/null
+++ b/k8s/monitoring/alerts.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: prometheus-alerts
+ namespace: istio-system
+data:
+ prometheus.rules.yml: |-
+ groups:
+ - name: prysm
+ rules:
+ - alert: stalled_chain
+ expr: delta(avg(state_last_slot > scalar(max(state_last_slot) - 100))[10m:30s]) < 50
+ for: 1m
+ annotations:
+ summary: No block slots advanced in 2 minutes
+ - alert: too_long_since_finality
+ expr: max(state_last_slot / 8) - floor(max(state_last_finalized_epoch)) > 10
+ for: 1m
+ annotations:
+ summary: No finality in 10 epochs
+ - alert: high_reorg_rate
+ expr: max(delta(reorg_counter[10m])) > 5
+ for: 1m
+ annotations:
+ summary: Some nodes are seeing more than 5 reorgs in 10 minutes
+ - alert: high_goroutines
+ expr: max_over_time(go_goroutines{component="beacon-chain"}[1m]) > 1000
+ for: 1m
+ annotations:
+ summary: Some nodes are experencing more than 1000 goroutines
diff --git a/k8s/monitoring/discord-webhook.encrypted_sercret.yaml b/k8s/monitoring/discord-webhook.encrypted_sercret.yaml
new file mode 100644
index 0000000000..64a9d5d8c4
--- /dev/null
+++ b/k8s/monitoring/discord-webhook.encrypted_sercret.yaml
@@ -0,0 +1,11 @@
+apiVersion: v1
+data:
+ url: 3Bh4HrgcSs/QdGDVnvg31lEggQELhDEycmTNAoa5WP7gb7UPd0XC20uPZXftWF37eVX2CF4wWkXn7ge4/E+Ut6Fo3K/nLYQOc5BBGiZQZcgVGnYn5adZ+8SLCXQTTwBaCBhLUmxwB8IQYA9icnJg/ZCgXKjq2pgRnkIEn58Y5kw2lIxlbQ1drFKGha1YCBHCbpJZ+dWHsryGoh7S2d0vDQVBu0AW3L8/cM1O5rIv7OcWkV1QfEY5P4xiAgCtz+uS.yYUhj5ra/rofJcNd.j2DDF/zrMlD0efGqYKhfkg==
+kind: Secret
+metadata:
+ name: discord-webhook
+ namespace: istio-system
+type: Opaque
+# kubesec:v:3
+# kubesec:gcp:projects/prysmaticlabs/locations/global/keyRings/prysmatic-k8s-secrets/cryptoKeys/testkey:CiQAaKPz55imKn09+ay5Fipt8Ejsa0fl9RAiDUwIB8QjWJniNfESSQD23sIsO81pJ6gDAzc7733PGECLQ+ftcvluf41iLs5GUBBHrk6ziqmtDmfiTzc9E2YDuXYwAU4EejVjXoIHMESywm9EvetOEt4=
+# kubesec:mac:JaD9nI5ZuJQNiB9L.5gAZrH4oEyQNTUmUHmSjRQ==