feat: integrate Datadog APM + Sentry error tracking with CloudWatch metrics FRE-4806
- Add CloudWatch metrics emitter (api_latency, api_requests, api_errors) - Add request monitoring middleware for API (latency, error rate, throughput) - Register error-handling, logging, and monitoring middleware in server.ts - Add Datadog log forwarding via HTTP intake API - Add application-level CloudWatch alarms for P99 latency, error rate, throughput - Inject Datadog/Sentry env vars and secrets into ECS task definitions - Add DD_API_KEY and SENTRY_DSN to ECS secrets - Create CloudWatch log groups for datadog and sentry services - Update .env.example with AWS_REGION and monitoring variables - Add @aws-sdk/client-cloudwatch dependency to monitoring package Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
23
packages/monitoring/package.json
Normal file
23
packages/monitoring/package.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"name": "@shieldai/monitoring",
|
||||
"version": "0.1.0",
|
||||
"main": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"lint": "eslint src/"
|
||||
},
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-cloudwatch": "^3.500.0",
|
||||
"dd-trace": "^5.0.0",
|
||||
"@sentry/node": "^8.0.0",
|
||||
"zod": "^3.23.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^25.6.0",
|
||||
"typescript": "^5.7.0"
|
||||
},
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
}
|
||||
}
|
||||
97
packages/monitoring/src/cloudwatch.ts
Normal file
97
packages/monitoring/src/cloudwatch.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import { CloudWatchClient, PutMetricDataCommand, StandardUnit } from '@aws-sdk/client-cloudwatch';
|
||||
import { getMonitoringConfig } from './config';
|
||||
|
||||
let client: CloudWatchClient | null = null;
|
||||
|
||||
function getClient(): CloudWatchClient | null {
|
||||
if (client) return client;
|
||||
|
||||
const config = getMonitoringConfig();
|
||||
const region = process.env.AWS_REGION || 'us-east-1';
|
||||
|
||||
try {
|
||||
client = new CloudWatchClient({ region });
|
||||
return client;
|
||||
} catch {
|
||||
console.warn('[CloudWatch] Metrics client initialization skipped');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export interface MetricDataPoint {
|
||||
MetricName: string;
|
||||
Dimensions?: { Name: string; Value: string }[];
|
||||
Value: number;
|
||||
Unit?: string;
|
||||
Timestamp?: Date;
|
||||
}
|
||||
|
||||
const NAMESPACE = 'ShieldAI';
|
||||
|
||||
export async function emitMetric(
|
||||
serviceName: string,
|
||||
metricName: string,
|
||||
value: number,
|
||||
unit: StandardUnit = 'Count',
|
||||
dimensions?: Record<string, string>
|
||||
) {
|
||||
const cw = getClient();
|
||||
if (!cw) return;
|
||||
|
||||
const dims: { Name: string; Value: string }[] = [
|
||||
{ Name: 'service', Value: serviceName },
|
||||
...(dimensions ? Object.entries(dimensions).map(([n, v]) => ({ Name: n, Value: v })) : []),
|
||||
];
|
||||
|
||||
const command = new PutMetricDataCommand({
|
||||
Namespace: NAMESPACE,
|
||||
MetricData: [
|
||||
{
|
||||
MetricName: metricName,
|
||||
Dimensions: dims,
|
||||
Value: value,
|
||||
Unit: unit,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
try {
|
||||
await cw.send(command);
|
||||
} catch (err) {
|
||||
console.warn('[CloudWatch] Metric emit failed:', (err as Error).message);
|
||||
}
|
||||
}
|
||||
|
||||
export async function emitLatency(
|
||||
serviceName: string,
|
||||
latencyMs: number,
|
||||
percentile: 'p50' | 'p95' | 'p99'
|
||||
) {
|
||||
await emitMetric(
|
||||
serviceName,
|
||||
'api_latency',
|
||||
latencyMs,
|
||||
'Milliseconds' as StandardUnit,
|
||||
{ percentile }
|
||||
);
|
||||
}
|
||||
|
||||
export async function emitRequestCount(serviceName: string, statusCode: number) {
|
||||
await emitMetric(
|
||||
serviceName,
|
||||
'api_requests',
|
||||
1,
|
||||
'Count' as StandardUnit,
|
||||
{ status_class: String(Math.floor(statusCode / 100)) + 'xx' }
|
||||
);
|
||||
}
|
||||
|
||||
export async function emitError(serviceName: string, errorType: string) {
|
||||
await emitMetric(
|
||||
serviceName,
|
||||
'api_errors',
|
||||
1,
|
||||
'Count' as StandardUnit,
|
||||
{ error_type: errorType }
|
||||
);
|
||||
}
|
||||
35
packages/monitoring/src/config.ts
Normal file
35
packages/monitoring/src/config.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
const monitoringEnvSchema = z.object({
|
||||
DD_SERVICE: z.string().default('shieldai-api'),
|
||||
DD_ENV: z.string().default(process.env.NODE_ENV || 'development'),
|
||||
DD_VERSION: z.string().default('0.1.0'),
|
||||
DD_TRACE_ENABLED: z.string().default('true'),
|
||||
DD_TRACE_SAMPLE_RATE: z.string().transform((v) => Number(v)).default('1.0'),
|
||||
DD_LOGS_INJECTION: z.string().default('true'),
|
||||
DD_AGENT_HOST: z.string().default('localhost'),
|
||||
DD_AGENT_PORT: z.string().transform((v) => Number(v)).default('8126'),
|
||||
SENTRY_DSN: z.string().default(''),
|
||||
SENTRY_ENVIRONMENT: z.string().default(process.env.NODE_ENV || 'development'),
|
||||
SENTRY_RELEASE: z.string().default('0.1.0'),
|
||||
SENTRY_TRACES_SAMPLE_RATE: z.string().transform((v) => Number(v)).default('0.1'),
|
||||
});
|
||||
|
||||
export type MonitoringConfig = z.infer<typeof monitoringEnvSchema>;
|
||||
|
||||
export function getMonitoringConfig(): MonitoringConfig {
|
||||
return monitoringEnvSchema.parse({
|
||||
DD_SERVICE: process.env.DD_SERVICE,
|
||||
DD_ENV: process.env.DD_ENV,
|
||||
DD_VERSION: process.env.DD_VERSION,
|
||||
DD_TRACE_ENABLED: process.env.DD_TRACE_ENABLED,
|
||||
DD_TRACE_SAMPLE_RATE: process.env.DD_TRACE_SAMPLE_RATE,
|
||||
DD_LOGS_INJECTION: process.env.DD_LOGS_INJECTION,
|
||||
DD_AGENT_HOST: process.env.DD_AGENT_HOST,
|
||||
DD_AGENT_PORT: process.env.DD_AGENT_PORT,
|
||||
SENTRY_DSN: process.env.SENTRY_DSN,
|
||||
SENTRY_ENVIRONMENT: process.env.SENTRY_ENVIRONMENT,
|
||||
SENTRY_RELEASE: process.env.SENTRY_RELEASE,
|
||||
SENTRY_TRACES_SAMPLE_RATE: process.env.SENTRY_TRACES_SAMPLE_RATE,
|
||||
});
|
||||
}
|
||||
49
packages/monitoring/src/datadog-logs.ts
Normal file
49
packages/monitoring/src/datadog-logs.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { getMonitoringConfig } from './config';
|
||||
|
||||
let logForwarder: { send: (log: string, service: string) => Promise<void> } | null = null;
|
||||
|
||||
export function initDatadogLogs() {
|
||||
const config = getMonitoringConfig();
|
||||
|
||||
if (!process.env.DD_API_KEY) {
|
||||
console.log('[Datadog Logs] API key not configured, log forwarding disabled');
|
||||
return;
|
||||
}
|
||||
|
||||
const site = process.env.DD_SITE || 'datadoghq.com';
|
||||
const logIntakeUrl = `https://http-intake.logs.${site}`;
|
||||
|
||||
logForwarder = {
|
||||
async send(log: string, service: string) {
|
||||
try {
|
||||
const payload = JSON.stringify({
|
||||
ddsource: 'nodejs',
|
||||
ddtags: `env:${config.DD_ENV},service:${service}`,
|
||||
hostname: config.DD_SERVICE,
|
||||
message: log,
|
||||
service,
|
||||
});
|
||||
|
||||
await fetch(`${logIntakeUrl}/api/v2/logs`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'DD-API-KEY': process.env.DD_API_KEY!,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: payload,
|
||||
});
|
||||
} catch (err) {
|
||||
console.warn('[Datadog Logs] Forward failed:', (err as Error).message);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function forwardLog(log: string, service: string = 'shieldai-api') {
|
||||
if (!logForwarder) return;
|
||||
await logForwarder.send(log, service);
|
||||
}
|
||||
|
||||
export function getLogForwarder() {
|
||||
return logForwarder;
|
||||
}
|
||||
49
packages/monitoring/src/datadog.ts
Normal file
49
packages/monitoring/src/datadog.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { getMonitoringConfig } from './config';
|
||||
|
||||
let initialized = false;
|
||||
|
||||
export function initDatadog() {
|
||||
if (initialized) return;
|
||||
|
||||
const config = getMonitoringConfig();
|
||||
|
||||
if (config.DD_TRACE_ENABLED !== 'true') {
|
||||
console.log('[Datadog] APM tracing disabled');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const tracer = require('dd-trace').init({
|
||||
service: config.DD_SERVICE,
|
||||
env: config.DD_ENV,
|
||||
version: config.DD_VERSION,
|
||||
sampleRate: config.DD_TRACE_SAMPLE_RATE,
|
||||
logInjection: config.DD_LOGS_INJECTION === 'true',
|
||||
agentHost: config.DD_AGENT_HOST,
|
||||
agentPort: config.DD_AGENT_PORT,
|
||||
plugins: true,
|
||||
debug: config.DD_ENV === 'development',
|
||||
});
|
||||
|
||||
initialized = true;
|
||||
console.log(`[Datadog] APM initialized for service "${config.DD_SERVICE}" in "${config.DD_ENV}"`);
|
||||
return tracer;
|
||||
} catch (err) {
|
||||
console.warn('[Datadog] APM initialization skipped:', (err as Error).message);
|
||||
}
|
||||
}
|
||||
|
||||
export function getDatadogTracer() {
|
||||
try {
|
||||
return require('dd-trace').tracer;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function createDatadogSpan(name: string, options?: Record<string, unknown>) {
|
||||
const tracer = getDatadogTracer();
|
||||
if (!tracer) return;
|
||||
|
||||
return tracer.startChild(name, options);
|
||||
}
|
||||
5
packages/monitoring/src/index.ts
Normal file
5
packages/monitoring/src/index.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
export * from './datadog';
|
||||
export * from './sentry';
|
||||
export * from './config';
|
||||
export * from './cloudwatch';
|
||||
export * from './datadog-logs';
|
||||
90
packages/monitoring/src/sentry.ts
Normal file
90
packages/monitoring/src/sentry.ts
Normal file
@@ -0,0 +1,90 @@
|
||||
import { getMonitoringConfig } from './config';
|
||||
|
||||
let initialized = false;
|
||||
|
||||
export function initSentry() {
|
||||
if (initialized) return;
|
||||
|
||||
const config = getMonitoringConfig();
|
||||
|
||||
if (!config.SENTRY_DSN) {
|
||||
console.log('[Sentry] DSN not configured, error tracking disabled');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const Sentry = require('@sentry/node');
|
||||
|
||||
Sentry.init({
|
||||
dsn: config.SENTRY_DSN,
|
||||
environment: config.SENTRY_ENVIRONMENT,
|
||||
release: config.SENTRY_RELEASE,
|
||||
tracesSampleRate: config.SENTRY_TRACES_SAMPLE_RATE,
|
||||
attachStacktrace: true,
|
||||
debug: config.SENTRY_ENVIRONMENT === 'development',
|
||||
beforeSend(event: any) {
|
||||
const req = (event as any).request;
|
||||
if (req?.url) {
|
||||
try {
|
||||
const url = new URL(req.url);
|
||||
req.url = url.origin + url.pathname;
|
||||
} catch {
|
||||
// fallback: keep original URL
|
||||
}
|
||||
}
|
||||
return event;
|
||||
},
|
||||
});
|
||||
|
||||
initialized = true;
|
||||
console.log(`[Sentry] Error tracking initialized for "${config.SENTRY_ENVIRONMENT}"`);
|
||||
} catch (err) {
|
||||
console.warn('[Sentry] Initialization skipped:', (err as Error).message);
|
||||
}
|
||||
}
|
||||
|
||||
export function captureSentryError(error: Error | string, context?: Record<string, unknown>) {
|
||||
try {
|
||||
const Sentry = require('@sentry/node');
|
||||
const err = typeof error === 'string' ? new Error(error) : error;
|
||||
Sentry.captureException(err, { tags: context as Record<string, string> | undefined });
|
||||
} catch {
|
||||
console.warn('[Sentry] Error capture skipped (not initialized):', error);
|
||||
}
|
||||
}
|
||||
|
||||
export function captureSentryMessage(message: string, level: 'info' | 'warning' | 'error' = 'info') {
|
||||
try {
|
||||
const Sentry = require('@sentry/node');
|
||||
Sentry.captureMessage(message, { level });
|
||||
} catch {
|
||||
console.warn('[Sentry] Message capture skipped (not initialized)');
|
||||
}
|
||||
}
|
||||
|
||||
export function setSentryUser(userId: string, metadata?: Record<string, string>) {
|
||||
try {
|
||||
const Sentry = require('@sentry/node');
|
||||
Sentry.setUser({ id: userId, ...metadata });
|
||||
} catch {
|
||||
// silently ignore
|
||||
}
|
||||
}
|
||||
|
||||
export function setSentryContext(name: string, data: Record<string, unknown>) {
|
||||
try {
|
||||
const Sentry = require('@sentry/node');
|
||||
Sentry.setContext(name, data);
|
||||
} catch {
|
||||
// silently ignore
|
||||
}
|
||||
}
|
||||
|
||||
export function getSentryHub() {
|
||||
try {
|
||||
const Sentry = require('@sentry/node');
|
||||
return Sentry.getCurrentHub?.() || Sentry.hub;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
9
packages/monitoring/tsconfig.json
Normal file
9
packages/monitoring/tsconfig.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"extends": "../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"composite": true
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
1
packages/monitoring/tsconfig.tsbuildinfo
Normal file
1
packages/monitoring/tsconfig.tsbuildinfo
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user