misc: initial working setup for metrics observabilit

This commit is contained in:
Sheen Capadngan
2024-06-05 21:46:10 +08:00
parent 4c06f134fb
commit 60895537a7
11 changed files with 1781 additions and 20 deletions

View File

@@ -63,3 +63,5 @@ CLIENT_SECRET_GITHUB_LOGIN=
CLIENT_ID_GITLAB_LOGIN=
CLIENT_SECRET_GITLAB_LOGIN=
TELEMETRY_EXPORT_URL=

1659
backend/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -86,6 +86,13 @@
"@node-saml/passport-saml": "^4.0.4",
"@octokit/rest": "^20.0.2",
"@octokit/webhooks-types": "^7.3.1",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/auto-instrumentations-node": "^0.46.1",
"@opentelemetry/exporter-metrics-otlp-proto": "^0.51.1",
"@opentelemetry/instrumentation": "^0.51.1",
"@opentelemetry/resources": "^1.24.1",
"@opentelemetry/sdk-metrics": "^1.24.1",
"@opentelemetry/semantic-conventions": "^1.24.1",
"@serdnam/pino-cloudwatch-transport": "^1.0.4",
"@sindresorhus/slugify": "^2.2.1",
"@ucast/mongo2js": "^1.3.4",

View File

@@ -119,7 +119,8 @@ const envSchema = z
.transform((val) => val === "true")
.optional(),
INFISICAL_CLOUD: zodStrBool.default("false"),
MAINTENANCE_MODE: zodStrBool.default("false")
MAINTENANCE_MODE: zodStrBool.default("false"),
TELEMETRY_EXPORT_URL: zpStr(z.string().optional())
})
.transform((data) => ({
...data,

View File

@@ -0,0 +1,37 @@
import opentelemetry from "@opentelemetry/api";
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-proto";
import { registerInstrumentations } from "@opentelemetry/instrumentation";
import { Resource } from "@opentelemetry/resources";
import { AggregationTemporality, MeterProvider, PeriodicExportingMetricReader } from "@opentelemetry/sdk-metrics";
import { SEMRESATTRS_SERVICE_NAME, SEMRESATTRS_SERVICE_VERSION } from "@opentelemetry/semantic-conventions";
export const initTelemetry = (exportURL: string) => {
const resource = Resource.default().merge(
new Resource({
[SEMRESATTRS_SERVICE_NAME]: "infisical-server",
[SEMRESATTRS_SERVICE_VERSION]: "0.1.0"
})
);
const metricExporter = new OTLPMetricExporter({
url: `${exportURL}/v1/metrics`,
temporalityPreference: AggregationTemporality.DELTA
});
const metricReader = new PeriodicExportingMetricReader({
exporter: metricExporter,
exportIntervalMillis: 30000
});
const myServiceMeterProvider = new MeterProvider({
resource,
readers: [metricReader]
});
opentelemetry.metrics.setGlobalMeterProvider(myServiceMeterProvider);
registerInstrumentations({
instrumentations: [getNodeAutoInstrumentations()]
});
};

View File

@@ -4,6 +4,7 @@ import { initDbConnection } from "./db";
import { keyStoreFactory } from "./keystore/keystore";
import { formatSmtpConfig, initEnvConfig } from "./lib/config/env";
import { initLogger } from "./lib/logger";
import { initTelemetry } from "./lib/telemetry/instrumentation";
import { queueServiceFactory } from "./queue";
import { main } from "./server/app";
import { bootstrapCheck } from "./server/boot-strap-check";
@@ -13,6 +14,11 @@ dotenv.config();
const run = async () => {
const logger = await initLogger();
const appCfg = initEnvConfig(logger);
if (appCfg.TELEMETRY_EXPORT_URL) {
initTelemetry(appCfg.TELEMETRY_EXPORT_URL);
}
const db = initDbConnection({
dbConnectionUri: appCfg.DB_CONNECTION_URI,
dbRootCert: appCfg.DB_ROOT_CERT

View File

@@ -20,6 +20,7 @@ import { TQueueServiceFactory } from "@app/queue";
import { TSmtpService } from "@app/services/smtp/smtp-service";
import { globalRateLimiterCfg } from "./config/rateLimiter";
import { apiMetrics } from "./plugins/api-metrics";
import { fastifyErrHandler } from "./plugins/error-handler";
import { registerExternalNextjs } from "./plugins/external-nextjs";
import { serializerCompiler, validatorCompiler, ZodTypeProvider } from "./plugins/fastify-zod";
@@ -62,7 +63,7 @@ export const main = async ({ db, smtp, logger, queue, keyStore }: TMain) => {
});
// pull ip based on various proxy headers
await server.register(fastifyIp);
await server.register(apiMetrics);
await server.register(fastifySwagger);
await server.register(fastifyFormBody);
await server.register(fastifyErrHandler);

View File

@@ -0,0 +1,21 @@
import opentelemetry from "@opentelemetry/api";
import fp from "fastify-plugin";
export const apiMetrics = fp(async (fastify) => {
const apiMeter = opentelemetry.metrics.getMeter("API");
const latencyHistogram = apiMeter.createHistogram("API latency", {
unit: "ms"
});
fastify.addHook("onResponse", async (request, reply) => {
const { method } = request;
const route = request.routerPath;
const { statusCode } = reply;
latencyHistogram.record(reply.elapsedTime, {
route,
method,
statusCode
});
});
});

View File

@@ -158,6 +158,38 @@ services:
- openldap
profiles: [ldap]
otel-collector:
image: otel/opentelemetry-collector-contrib
volumes:
- ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml
ports:
- 1888:1888 # pprof extension
- 8888:8888 # Prometheus metrics exposed by the Collector
- 8889:8889 # Prometheus exporter metrics
- 13133:13133 # health_check extension
- 4317:4317 # OTLP gRPC receiver
- 4318:4318 # OTLP http receiver
- 55679:55679 # zpages extension
prometheus:
image: prom/prometheus
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yml"
grafana:
image: grafana/grafana
container_name: grafana
restart: unless-stopped
environment:
- GF_LOG_LEVEL=debug
ports:
- "3000:3000"
volumes:
- "grafana_storage:/var/lib/grafana"
volumes:
postgres-data:
driver: local
@@ -165,3 +197,4 @@ volumes:
driver: local
ldap_data:
ldap_config:
grafana_storage:

View File

@@ -0,0 +1,25 @@
receivers:
otlp:
protocols:
http:
endpoint: 0.0.0.0:4318
processors:
batch:
exporters:
prometheus:
endpoint: "0.0.0.0:8889"
resource_to_telemetry_conversion:
enabled: true
extensions:
health_check:
pprof:
zpages:
service:
extensions: [health_check, pprof, zpages]
pipelines:
metrics:
receivers: [otlp]
processors: [batch]
exporters: [prometheus]

5
prometheus.yml Normal file
View File

@@ -0,0 +1,5 @@
scrape_configs:
- job_name: "otel-collector"
scrape_interval: 30s
static_configs:
- targets: ["otel-collector:8889"]