feat: integrate Datadog APM + Sentry error tracking with CloudWatch metrics FRE-4806

- Add CloudWatch metrics emitter (api_latency, api_requests, api_errors)
- Add request monitoring middleware for API (latency, error rate, throughput)
- Register error-handling, logging, and monitoring middleware in server.ts
- Add Datadog log forwarding via HTTP intake API
- Add application-level CloudWatch alarms for P99 latency, error rate, throughput
- Inject Datadog/Sentry env vars and secrets into ECS task definitions
- Add DD_API_KEY and SENTRY_DSN to ECS secrets
- Create CloudWatch log groups for datadog and sentry services
- Update .env.example with AWS_REGION and monitoring variables
- Add @aws-sdk/client-cloudwatch dependency to monitoring package

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
2026-05-10 02:15:11 -04:00
parent 57a206d7b3
commit c7df40ac26
18 changed files with 5260 additions and 76 deletions

View File

@@ -1,4 +1,5 @@
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { captureSentryError, setSentryContext, setSentryUser } from '@shieldai/monitoring';
export interface ErrorResponse {
error: string;
@@ -13,19 +14,37 @@ export interface ErrorResponse {
export async function errorHandlingMiddleware(fastify: FastifyInstance) {
// Custom error handler
fastify.setErrorHandler((error, request: FastifyRequest, reply: FastifyReply) => {
const err = error as Error & { statusCode?: number; code?: string };
const response: ErrorResponse = {
error: error.name || 'Internal Server Error',
message: error.message || 'An unexpected error occurred',
statusCode: error.statusCode || 500,
code: (error as any).code,
error: err.name || 'Internal Server Error',
message: err.message || 'An unexpected error occurred',
statusCode: err.statusCode || 500,
code: err.code,
timestamp: new Date().toISOString(),
path: request.url,
};
// Send to Sentry (5xx errors only)
if (response.statusCode >= 500) {
const userId = (request as FastifyRequest & { user?: { id?: string } }).user?.id;
if (userId) setSentryUser(userId);
setSentryContext('request', {
method: request.method,
url: request.url,
userAgent: request.headers['user-agent'],
requestId: request.id,
});
captureSentryError(err, {
statusCode: String(response.statusCode),
path: request.url,
method: request.method,
});
}
// Log error
fastify.log.error({
error: response,
stack: error.stack,
stack: err.stack,
method: request.method,
userAgent: request.headers['user-agent'],
});

View File

@@ -0,0 +1,46 @@
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { emitLatency, emitRequestCount, emitError } from '@shieldai/monitoring';
const SERVICE_NAME = process.env.DD_SERVICE || 'shieldai-api';
export async function monitoringMiddleware(fastify: FastifyInstance) {
fastify.addHook('onResponse', async (request: FastifyRequest, reply: FastifyReply) => {
const statusCode = reply.statusCode;
const responseTime = reply.elapsedTime;
const method = request.method;
const url = request.url;
// Emit request count
await emitRequestCount(SERVICE_NAME, statusCode);
// Emit latency metrics
await emitLatency(SERVICE_NAME, responseTime, 'p50');
await emitLatency(SERVICE_NAME, responseTime, 'p95');
await emitLatency(SERVICE_NAME, responseTime, 'p99');
// Emit error metric for 5xx
if (statusCode >= 500) {
await emitError(SERVICE_NAME, 'server_error');
fastify.log.warn({
event: 'high_latency_or_error',
method,
url,
statusCode,
responseTime,
service: SERVICE_NAME,
});
}
// Log high latency requests (>2s)
if (responseTime > 2000) {
fastify.log.warn({
event: 'high_latency',
method,
url,
statusCode,
responseTime,
service: SERVICE_NAME,
});
}
});
}

View File

@@ -4,15 +4,19 @@ import helmet from "@fastify/helmet";
import sensible from "@fastify/sensible";
import { extractOrGenerateRequestId } from "@shieldai/types";
import { authMiddleware } from "./middleware/auth.middleware";
import { errorHandlingMiddleware } from "./middleware/error-handling.middleware";
import { loggingMiddleware } from "./middleware/logging.middleware";
import { monitoringMiddleware } from "./middleware/monitoring.middleware";
import { darkwatchRoutes } from "./routes/darkwatch.routes";
import { voiceprintRoutes } from "./routes/voiceprint.routes";
import { correlationRoutes } from "./routes/correlation.routes";
import { extensionRoutes } from "./routes/extension.routes";
import { initDatadog, initSentry, captureSentryError } from "@shieldai/monitoring";
import { initDatadog, initSentry, initDatadogLogs, captureSentryError } from "@shieldai/monitoring";
import { getCorsOrigins } from "./config/api.config";
initDatadog();
initSentry();
initDatadogLogs();
const app = Fastify({
logger: {
@@ -29,6 +33,15 @@ async function bootstrap() {
// Register auth middleware to populate request.user
await app.register(authMiddleware);
// Register logging middleware (request/response logging)
await app.register(loggingMiddleware);
// Register monitoring middleware (CloudWatch metrics)
await app.register(monitoringMiddleware);
// Register error handling middleware (Sentry integration)
await app.register(errorHandlingMiddleware);
app.addHook("onRequest", async (request, _reply) => {
const requestId = extractOrGenerateRequestId(request.headers);
request.id = requestId;

View File

@@ -0,0 +1,23 @@
{
"name": "@shieldai/monitoring",
"version": "0.1.0",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"scripts": {
"build": "tsc",
"lint": "eslint src/"
},
"dependencies": {
"@aws-sdk/client-cloudwatch": "^3.500.0",
"dd-trace": "^5.0.0",
"@sentry/node": "^8.0.0",
"zod": "^3.23.0"
},
"devDependencies": {
"@types/node": "^25.6.0",
"typescript": "^5.7.0"
},
"exports": {
".": "./src/index.ts"
}
}

View File

@@ -0,0 +1,97 @@
import { CloudWatchClient, PutMetricDataCommand, StandardUnit } from '@aws-sdk/client-cloudwatch';
import { getMonitoringConfig } from './config';
let client: CloudWatchClient | null = null;
function getClient(): CloudWatchClient | null {
if (client) return client;
const config = getMonitoringConfig();
const region = process.env.AWS_REGION || 'us-east-1';
try {
client = new CloudWatchClient({ region });
return client;
} catch {
console.warn('[CloudWatch] Metrics client initialization skipped');
return null;
}
}
export interface MetricDataPoint {
MetricName: string;
Dimensions?: { Name: string; Value: string }[];
Value: number;
Unit?: string;
Timestamp?: Date;
}
const NAMESPACE = 'ShieldAI';
export async function emitMetric(
serviceName: string,
metricName: string,
value: number,
unit: StandardUnit = 'Count',
dimensions?: Record<string, string>
) {
const cw = getClient();
if (!cw) return;
const dims: { Name: string; Value: string }[] = [
{ Name: 'service', Value: serviceName },
...(dimensions ? Object.entries(dimensions).map(([n, v]) => ({ Name: n, Value: v })) : []),
];
const command = new PutMetricDataCommand({
Namespace: NAMESPACE,
MetricData: [
{
MetricName: metricName,
Dimensions: dims,
Value: value,
Unit: unit,
},
],
});
try {
await cw.send(command);
} catch (err) {
console.warn('[CloudWatch] Metric emit failed:', (err as Error).message);
}
}
export async function emitLatency(
serviceName: string,
latencyMs: number,
percentile: 'p50' | 'p95' | 'p99'
) {
await emitMetric(
serviceName,
'api_latency',
latencyMs,
'Milliseconds' as StandardUnit,
{ percentile }
);
}
export async function emitRequestCount(serviceName: string, statusCode: number) {
await emitMetric(
serviceName,
'api_requests',
1,
'Count' as StandardUnit,
{ status_class: String(Math.floor(statusCode / 100)) + 'xx' }
);
}
export async function emitError(serviceName: string, errorType: string) {
await emitMetric(
serviceName,
'api_errors',
1,
'Count' as StandardUnit,
{ error_type: errorType }
);
}

View File

@@ -0,0 +1,35 @@
import { z } from 'zod';
const monitoringEnvSchema = z.object({
DD_SERVICE: z.string().default('shieldai-api'),
DD_ENV: z.string().default(process.env.NODE_ENV || 'development'),
DD_VERSION: z.string().default('0.1.0'),
DD_TRACE_ENABLED: z.string().default('true'),
DD_TRACE_SAMPLE_RATE: z.string().transform((v) => Number(v)).default('1.0'),
DD_LOGS_INJECTION: z.string().default('true'),
DD_AGENT_HOST: z.string().default('localhost'),
DD_AGENT_PORT: z.string().transform((v) => Number(v)).default('8126'),
SENTRY_DSN: z.string().default(''),
SENTRY_ENVIRONMENT: z.string().default(process.env.NODE_ENV || 'development'),
SENTRY_RELEASE: z.string().default('0.1.0'),
SENTRY_TRACES_SAMPLE_RATE: z.string().transform((v) => Number(v)).default('0.1'),
});
export type MonitoringConfig = z.infer<typeof monitoringEnvSchema>;
export function getMonitoringConfig(): MonitoringConfig {
return monitoringEnvSchema.parse({
DD_SERVICE: process.env.DD_SERVICE,
DD_ENV: process.env.DD_ENV,
DD_VERSION: process.env.DD_VERSION,
DD_TRACE_ENABLED: process.env.DD_TRACE_ENABLED,
DD_TRACE_SAMPLE_RATE: process.env.DD_TRACE_SAMPLE_RATE,
DD_LOGS_INJECTION: process.env.DD_LOGS_INJECTION,
DD_AGENT_HOST: process.env.DD_AGENT_HOST,
DD_AGENT_PORT: process.env.DD_AGENT_PORT,
SENTRY_DSN: process.env.SENTRY_DSN,
SENTRY_ENVIRONMENT: process.env.SENTRY_ENVIRONMENT,
SENTRY_RELEASE: process.env.SENTRY_RELEASE,
SENTRY_TRACES_SAMPLE_RATE: process.env.SENTRY_TRACES_SAMPLE_RATE,
});
}

View File

@@ -0,0 +1,49 @@
import { getMonitoringConfig } from './config';
let logForwarder: { send: (log: string, service: string) => Promise<void> } | null = null;
export function initDatadogLogs() {
const config = getMonitoringConfig();
if (!process.env.DD_API_KEY) {
console.log('[Datadog Logs] API key not configured, log forwarding disabled');
return;
}
const site = process.env.DD_SITE || 'datadoghq.com';
const logIntakeUrl = `https://http-intake.logs.${site}`;
logForwarder = {
async send(log: string, service: string) {
try {
const payload = JSON.stringify({
ddsource: 'nodejs',
ddtags: `env:${config.DD_ENV},service:${service}`,
hostname: config.DD_SERVICE,
message: log,
service,
});
await fetch(`${logIntakeUrl}/api/v2/logs`, {
method: 'POST',
headers: {
'DD-API-KEY': process.env.DD_API_KEY!,
'Content-Type': 'application/json',
},
body: payload,
});
} catch (err) {
console.warn('[Datadog Logs] Forward failed:', (err as Error).message);
}
},
};
}
export async function forwardLog(log: string, service: string = 'shieldai-api') {
if (!logForwarder) return;
await logForwarder.send(log, service);
}
export function getLogForwarder() {
return logForwarder;
}

View File

@@ -0,0 +1,49 @@
import { getMonitoringConfig } from './config';
let initialized = false;
export function initDatadog() {
if (initialized) return;
const config = getMonitoringConfig();
if (config.DD_TRACE_ENABLED !== 'true') {
console.log('[Datadog] APM tracing disabled');
return;
}
try {
const tracer = require('dd-trace').init({
service: config.DD_SERVICE,
env: config.DD_ENV,
version: config.DD_VERSION,
sampleRate: config.DD_TRACE_SAMPLE_RATE,
logInjection: config.DD_LOGS_INJECTION === 'true',
agentHost: config.DD_AGENT_HOST,
agentPort: config.DD_AGENT_PORT,
plugins: true,
debug: config.DD_ENV === 'development',
});
initialized = true;
console.log(`[Datadog] APM initialized for service "${config.DD_SERVICE}" in "${config.DD_ENV}"`);
return tracer;
} catch (err) {
console.warn('[Datadog] APM initialization skipped:', (err as Error).message);
}
}
export function getDatadogTracer() {
try {
return require('dd-trace').tracer;
} catch {
return null;
}
}
export function createDatadogSpan(name: string, options?: Record<string, unknown>) {
const tracer = getDatadogTracer();
if (!tracer) return;
return tracer.startChild(name, options);
}

View File

@@ -0,0 +1,5 @@
export * from './datadog';
export * from './sentry';
export * from './config';
export * from './cloudwatch';
export * from './datadog-logs';

View File

@@ -0,0 +1,90 @@
import { getMonitoringConfig } from './config';
let initialized = false;
export function initSentry() {
if (initialized) return;
const config = getMonitoringConfig();
if (!config.SENTRY_DSN) {
console.log('[Sentry] DSN not configured, error tracking disabled');
return;
}
try {
const Sentry = require('@sentry/node');
Sentry.init({
dsn: config.SENTRY_DSN,
environment: config.SENTRY_ENVIRONMENT,
release: config.SENTRY_RELEASE,
tracesSampleRate: config.SENTRY_TRACES_SAMPLE_RATE,
attachStacktrace: true,
debug: config.SENTRY_ENVIRONMENT === 'development',
beforeSend(event: any) {
const req = (event as any).request;
if (req?.url) {
try {
const url = new URL(req.url);
req.url = url.origin + url.pathname;
} catch {
// fallback: keep original URL
}
}
return event;
},
});
initialized = true;
console.log(`[Sentry] Error tracking initialized for "${config.SENTRY_ENVIRONMENT}"`);
} catch (err) {
console.warn('[Sentry] Initialization skipped:', (err as Error).message);
}
}
export function captureSentryError(error: Error | string, context?: Record<string, unknown>) {
try {
const Sentry = require('@sentry/node');
const err = typeof error === 'string' ? new Error(error) : error;
Sentry.captureException(err, { tags: context as Record<string, string> | undefined });
} catch {
console.warn('[Sentry] Error capture skipped (not initialized):', error);
}
}
export function captureSentryMessage(message: string, level: 'info' | 'warning' | 'error' = 'info') {
try {
const Sentry = require('@sentry/node');
Sentry.captureMessage(message, { level });
} catch {
console.warn('[Sentry] Message capture skipped (not initialized)');
}
}
export function setSentryUser(userId: string, metadata?: Record<string, string>) {
try {
const Sentry = require('@sentry/node');
Sentry.setUser({ id: userId, ...metadata });
} catch {
// silently ignore
}
}
export function setSentryContext(name: string, data: Record<string, unknown>) {
try {
const Sentry = require('@sentry/node');
Sentry.setContext(name, data);
} catch {
// silently ignore
}
}
export function getSentryHub() {
try {
const Sentry = require('@sentry/node');
return Sentry.getCurrentHub?.() || Sentry.hub;
} catch {
return null;
}
}

View File

@@ -0,0 +1,9 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src",
"composite": true
},
"include": ["src"]
}

File diff suppressed because one or more lines are too long