feat: integrate Datadog APM + Sentry error tracking with CloudWatch metrics FRE-4806
- Add CloudWatch metrics emitter (api_latency, api_requests, api_errors) - Add request monitoring middleware for API (latency, error rate, throughput) - Register error-handling, logging, and monitoring middleware in server.ts - Add Datadog log forwarding via HTTP intake API - Add application-level CloudWatch alarms for P99 latency, error rate, throughput - Inject Datadog/Sentry env vars and secrets into ECS task definitions - Add DD_API_KEY and SENTRY_DSN to ECS secrets - Create CloudWatch log groups for datadog and sentry services - Update .env.example with AWS_REGION and monitoring variables - Add @aws-sdk/client-cloudwatch dependency to monitoring package Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
97
packages/monitoring/src/cloudwatch.ts
Normal file
97
packages/monitoring/src/cloudwatch.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import { CloudWatchClient, PutMetricDataCommand, StandardUnit } from '@aws-sdk/client-cloudwatch';
|
||||
import { getMonitoringConfig } from './config';
|
||||
|
||||
let client: CloudWatchClient | null = null;
|
||||
|
||||
function getClient(): CloudWatchClient | null {
|
||||
if (client) return client;
|
||||
|
||||
const config = getMonitoringConfig();
|
||||
const region = process.env.AWS_REGION || 'us-east-1';
|
||||
|
||||
try {
|
||||
client = new CloudWatchClient({ region });
|
||||
return client;
|
||||
} catch {
|
||||
console.warn('[CloudWatch] Metrics client initialization skipped');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export interface MetricDataPoint {
|
||||
MetricName: string;
|
||||
Dimensions?: { Name: string; Value: string }[];
|
||||
Value: number;
|
||||
Unit?: string;
|
||||
Timestamp?: Date;
|
||||
}
|
||||
|
||||
const NAMESPACE = 'ShieldAI';
|
||||
|
||||
export async function emitMetric(
|
||||
serviceName: string,
|
||||
metricName: string,
|
||||
value: number,
|
||||
unit: StandardUnit = 'Count',
|
||||
dimensions?: Record<string, string>
|
||||
) {
|
||||
const cw = getClient();
|
||||
if (!cw) return;
|
||||
|
||||
const dims: { Name: string; Value: string }[] = [
|
||||
{ Name: 'service', Value: serviceName },
|
||||
...(dimensions ? Object.entries(dimensions).map(([n, v]) => ({ Name: n, Value: v })) : []),
|
||||
];
|
||||
|
||||
const command = new PutMetricDataCommand({
|
||||
Namespace: NAMESPACE,
|
||||
MetricData: [
|
||||
{
|
||||
MetricName: metricName,
|
||||
Dimensions: dims,
|
||||
Value: value,
|
||||
Unit: unit,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
try {
|
||||
await cw.send(command);
|
||||
} catch (err) {
|
||||
console.warn('[CloudWatch] Metric emit failed:', (err as Error).message);
|
||||
}
|
||||
}
|
||||
|
||||
export async function emitLatency(
|
||||
serviceName: string,
|
||||
latencyMs: number,
|
||||
percentile: 'p50' | 'p95' | 'p99'
|
||||
) {
|
||||
await emitMetric(
|
||||
serviceName,
|
||||
'api_latency',
|
||||
latencyMs,
|
||||
'Milliseconds' as StandardUnit,
|
||||
{ percentile }
|
||||
);
|
||||
}
|
||||
|
||||
export async function emitRequestCount(serviceName: string, statusCode: number) {
|
||||
await emitMetric(
|
||||
serviceName,
|
||||
'api_requests',
|
||||
1,
|
||||
'Count' as StandardUnit,
|
||||
{ status_class: String(Math.floor(statusCode / 100)) + 'xx' }
|
||||
);
|
||||
}
|
||||
|
||||
export async function emitError(serviceName: string, errorType: string) {
|
||||
await emitMetric(
|
||||
serviceName,
|
||||
'api_errors',
|
||||
1,
|
||||
'Count' as StandardUnit,
|
||||
{ error_type: errorType }
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user