feat: integrate Datadog APM + Sentry error tracking with CloudWatch metrics FRE-4806

- Add CloudWatch metrics emitter (api_latency, api_requests, api_errors)
- Add request monitoring middleware for API (latency, error rate, throughput)
- Register error-handling, logging, and monitoring middleware in server.ts
- Add Datadog log forwarding via HTTP intake API
- Add application-level CloudWatch alarms for P99 latency, error rate, throughput
- Inject Datadog/Sentry env vars and secrets into ECS task definitions
- Add DD_API_KEY and SENTRY_DSN to ECS secrets
- Create CloudWatch log groups for datadog and sentry services
- Update .env.example with AWS_REGION and monitoring variables
- Add @aws-sdk/client-cloudwatch dependency to monitoring package

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
2026-05-10 02:15:11 -04:00
parent 57a206d7b3
commit c7df40ac26
18 changed files with 5260 additions and 76 deletions

View File

@@ -1,4 +1,5 @@
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { captureSentryError, setSentryContext, setSentryUser } from '@shieldai/monitoring';
export interface ErrorResponse {
error: string;
@@ -13,19 +14,37 @@ export interface ErrorResponse {
export async function errorHandlingMiddleware(fastify: FastifyInstance) {
// Custom error handler
fastify.setErrorHandler((error, request: FastifyRequest, reply: FastifyReply) => {
const err = error as Error & { statusCode?: number; code?: string };
const response: ErrorResponse = {
error: error.name || 'Internal Server Error',
message: error.message || 'An unexpected error occurred',
statusCode: error.statusCode || 500,
code: (error as any).code,
error: err.name || 'Internal Server Error',
message: err.message || 'An unexpected error occurred',
statusCode: err.statusCode || 500,
code: err.code,
timestamp: new Date().toISOString(),
path: request.url,
};
// Send to Sentry (5xx errors only)
if (response.statusCode >= 500) {
const userId = (request as FastifyRequest & { user?: { id?: string } }).user?.id;
if (userId) setSentryUser(userId);
setSentryContext('request', {
method: request.method,
url: request.url,
userAgent: request.headers['user-agent'],
requestId: request.id,
});
captureSentryError(err, {
statusCode: String(response.statusCode),
path: request.url,
method: request.method,
});
}
// Log error
fastify.log.error({
error: response,
stack: error.stack,
stack: err.stack,
method: request.method,
userAgent: request.headers['user-agent'],
});

View File

@@ -0,0 +1,46 @@
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { emitLatency, emitRequestCount, emitError } from '@shieldai/monitoring';
const SERVICE_NAME = process.env.DD_SERVICE || 'shieldai-api';
export async function monitoringMiddleware(fastify: FastifyInstance) {
fastify.addHook('onResponse', async (request: FastifyRequest, reply: FastifyReply) => {
const statusCode = reply.statusCode;
const responseTime = reply.elapsedTime;
const method = request.method;
const url = request.url;
// Emit request count
await emitRequestCount(SERVICE_NAME, statusCode);
// Emit latency metrics
await emitLatency(SERVICE_NAME, responseTime, 'p50');
await emitLatency(SERVICE_NAME, responseTime, 'p95');
await emitLatency(SERVICE_NAME, responseTime, 'p99');
// Emit error metric for 5xx
if (statusCode >= 500) {
await emitError(SERVICE_NAME, 'server_error');
fastify.log.warn({
event: 'high_latency_or_error',
method,
url,
statusCode,
responseTime,
service: SERVICE_NAME,
});
}
// Log high latency requests (>2s)
if (responseTime > 2000) {
fastify.log.warn({
event: 'high_latency',
method,
url,
statusCode,
responseTime,
service: SERVICE_NAME,
});
}
});
}

View File

@@ -4,15 +4,19 @@ import helmet from "@fastify/helmet";
import sensible from "@fastify/sensible";
import { extractOrGenerateRequestId } from "@shieldai/types";
import { authMiddleware } from "./middleware/auth.middleware";
import { errorHandlingMiddleware } from "./middleware/error-handling.middleware";
import { loggingMiddleware } from "./middleware/logging.middleware";
import { monitoringMiddleware } from "./middleware/monitoring.middleware";
import { darkwatchRoutes } from "./routes/darkwatch.routes";
import { voiceprintRoutes } from "./routes/voiceprint.routes";
import { correlationRoutes } from "./routes/correlation.routes";
import { extensionRoutes } from "./routes/extension.routes";
import { initDatadog, initSentry, captureSentryError } from "@shieldai/monitoring";
import { initDatadog, initSentry, initDatadogLogs, captureSentryError } from "@shieldai/monitoring";
import { getCorsOrigins } from "./config/api.config";
initDatadog();
initSentry();
initDatadogLogs();
const app = Fastify({
logger: {
@@ -29,6 +33,15 @@ async function bootstrap() {
// Register auth middleware to populate request.user
await app.register(authMiddleware);
// Register logging middleware (request/response logging)
await app.register(loggingMiddleware);
// Register monitoring middleware (CloudWatch metrics)
await app.register(monitoringMiddleware);
// Register error handling middleware (Sentry integration)
await app.register(errorHandlingMiddleware);
app.addHook("onRequest", async (request, _reply) => {
const requestId = extractOrGenerateRequestId(request.headers);
request.id = requestId;