feat: integrate Datadog APM + Sentry error tracking with CloudWatch metrics FRE-4806
- Add CloudWatch metrics emitter (api_latency, api_requests, api_errors) - Add request monitoring middleware for API (latency, error rate, throughput) - Register error-handling, logging, and monitoring middleware in server.ts - Add Datadog log forwarding via HTTP intake API - Add application-level CloudWatch alarms for P99 latency, error rate, throughput - Inject Datadog/Sentry env vars and secrets into ECS task definitions - Add DD_API_KEY and SENTRY_DSN to ECS secrets - Create CloudWatch log groups for datadog and sentry services - Update .env.example with AWS_REGION and monitoring variables - Add @aws-sdk/client-cloudwatch dependency to monitoring package Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { captureSentryError, setSentryContext, setSentryUser } from '@shieldai/monitoring';
|
||||
|
||||
export interface ErrorResponse {
|
||||
error: string;
|
||||
@@ -13,19 +14,37 @@ export interface ErrorResponse {
|
||||
export async function errorHandlingMiddleware(fastify: FastifyInstance) {
|
||||
// Custom error handler
|
||||
fastify.setErrorHandler((error, request: FastifyRequest, reply: FastifyReply) => {
|
||||
const err = error as Error & { statusCode?: number; code?: string };
|
||||
const response: ErrorResponse = {
|
||||
error: error.name || 'Internal Server Error',
|
||||
message: error.message || 'An unexpected error occurred',
|
||||
statusCode: error.statusCode || 500,
|
||||
code: (error as any).code,
|
||||
error: err.name || 'Internal Server Error',
|
||||
message: err.message || 'An unexpected error occurred',
|
||||
statusCode: err.statusCode || 500,
|
||||
code: err.code,
|
||||
timestamp: new Date().toISOString(),
|
||||
path: request.url,
|
||||
};
|
||||
|
||||
// Send to Sentry (5xx errors only)
|
||||
if (response.statusCode >= 500) {
|
||||
const userId = (request as FastifyRequest & { user?: { id?: string } }).user?.id;
|
||||
if (userId) setSentryUser(userId);
|
||||
setSentryContext('request', {
|
||||
method: request.method,
|
||||
url: request.url,
|
||||
userAgent: request.headers['user-agent'],
|
||||
requestId: request.id,
|
||||
});
|
||||
captureSentryError(err, {
|
||||
statusCode: String(response.statusCode),
|
||||
path: request.url,
|
||||
method: request.method,
|
||||
});
|
||||
}
|
||||
|
||||
// Log error
|
||||
fastify.log.error({
|
||||
error: response,
|
||||
stack: error.stack,
|
||||
stack: err.stack,
|
||||
method: request.method,
|
||||
userAgent: request.headers['user-agent'],
|
||||
});
|
||||
|
||||
46
packages/api/src/middleware/monitoring.middleware.ts
Normal file
46
packages/api/src/middleware/monitoring.middleware.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { emitLatency, emitRequestCount, emitError } from '@shieldai/monitoring';
|
||||
|
||||
const SERVICE_NAME = process.env.DD_SERVICE || 'shieldai-api';
|
||||
|
||||
export async function monitoringMiddleware(fastify: FastifyInstance) {
|
||||
fastify.addHook('onResponse', async (request: FastifyRequest, reply: FastifyReply) => {
|
||||
const statusCode = reply.statusCode;
|
||||
const responseTime = reply.elapsedTime;
|
||||
const method = request.method;
|
||||
const url = request.url;
|
||||
|
||||
// Emit request count
|
||||
await emitRequestCount(SERVICE_NAME, statusCode);
|
||||
|
||||
// Emit latency metrics
|
||||
await emitLatency(SERVICE_NAME, responseTime, 'p50');
|
||||
await emitLatency(SERVICE_NAME, responseTime, 'p95');
|
||||
await emitLatency(SERVICE_NAME, responseTime, 'p99');
|
||||
|
||||
// Emit error metric for 5xx
|
||||
if (statusCode >= 500) {
|
||||
await emitError(SERVICE_NAME, 'server_error');
|
||||
fastify.log.warn({
|
||||
event: 'high_latency_or_error',
|
||||
method,
|
||||
url,
|
||||
statusCode,
|
||||
responseTime,
|
||||
service: SERVICE_NAME,
|
||||
});
|
||||
}
|
||||
|
||||
// Log high latency requests (>2s)
|
||||
if (responseTime > 2000) {
|
||||
fastify.log.warn({
|
||||
event: 'high_latency',
|
||||
method,
|
||||
url,
|
||||
statusCode,
|
||||
responseTime,
|
||||
service: SERVICE_NAME,
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -4,15 +4,19 @@ import helmet from "@fastify/helmet";
|
||||
import sensible from "@fastify/sensible";
|
||||
import { extractOrGenerateRequestId } from "@shieldai/types";
|
||||
import { authMiddleware } from "./middleware/auth.middleware";
|
||||
import { errorHandlingMiddleware } from "./middleware/error-handling.middleware";
|
||||
import { loggingMiddleware } from "./middleware/logging.middleware";
|
||||
import { monitoringMiddleware } from "./middleware/monitoring.middleware";
|
||||
import { darkwatchRoutes } from "./routes/darkwatch.routes";
|
||||
import { voiceprintRoutes } from "./routes/voiceprint.routes";
|
||||
import { correlationRoutes } from "./routes/correlation.routes";
|
||||
import { extensionRoutes } from "./routes/extension.routes";
|
||||
import { initDatadog, initSentry, captureSentryError } from "@shieldai/monitoring";
|
||||
import { initDatadog, initSentry, initDatadogLogs, captureSentryError } from "@shieldai/monitoring";
|
||||
import { getCorsOrigins } from "./config/api.config";
|
||||
|
||||
initDatadog();
|
||||
initSentry();
|
||||
initDatadogLogs();
|
||||
|
||||
const app = Fastify({
|
||||
logger: {
|
||||
@@ -29,6 +33,15 @@ async function bootstrap() {
|
||||
// Register auth middleware to populate request.user
|
||||
await app.register(authMiddleware);
|
||||
|
||||
// Register logging middleware (request/response logging)
|
||||
await app.register(loggingMiddleware);
|
||||
|
||||
// Register monitoring middleware (CloudWatch metrics)
|
||||
await app.register(monitoringMiddleware);
|
||||
|
||||
// Register error handling middleware (Sentry integration)
|
||||
await app.register(errorHandlingMiddleware);
|
||||
|
||||
app.addHook("onRequest", async (request, _reply) => {
|
||||
const requestId = extractOrGenerateRequestId(request.headers);
|
||||
request.id = requestId;
|
||||
|
||||
Reference in New Issue
Block a user