feat: integrate Datadog APM + Sentry error tracking with CloudWatch metrics FRE-4806

- Add CloudWatch metrics emitter (api_latency, api_requests, api_errors)
- Add request monitoring middleware for API (latency, error rate, throughput)
- Register error-handling, logging, and monitoring middleware in server.ts
- Add Datadog log forwarding via HTTP intake API
- Add application-level CloudWatch alarms for P99 latency, error rate, throughput
- Inject Datadog/Sentry env vars and secrets into ECS task definitions
- Add DD_API_KEY and SENTRY_DSN to ECS secrets
- Create CloudWatch log groups for datadog and sentry services
- Update .env.example with AWS_REGION and monitoring variables
- Add @aws-sdk/client-cloudwatch dependency to monitoring package

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
2026-05-10 02:15:11 -04:00
parent 57a206d7b3
commit c7df40ac26
18 changed files with 5260 additions and 76 deletions

View File

@@ -1,5 +1,17 @@
version: '3.9'
x-monitoring: &monitoring
DD_ENV: ${DD_ENV:-production}
DD_SERVICE: ${DD_SERVICE:-shieldai}
DD_VERSION: ${DOCKER_TAG:-latest}
DD_TRACE_ENABLED: ${DD_TRACE_ENABLED:-true}
DD_AGENT_HOST: datadog-agent
DD_AGENT_PORT: "8126"
DD_LOGS_INJECTION: "true"
SENTRY_DSN: ${SENTRY_DSN:-}
SENTRY_ENVIRONMENT: ${DD_ENV:-production}
SENTRY_RELEASE: ${DOCKER_TAG:-latest}
services:
api:
image: ghcr.io/${GITHUB_REPOSITORY_OWNER}/shieldai-api:${DOCKER_TAG:-latest}
@@ -7,12 +19,13 @@ services:
ports:
- "${PORT:-3000}:3000"
environment:
- DATABASE_URL=postgresql://shieldai:${POSTGRES_PASSWORD}@postgres:5432/shieldai
- REDIS_URL=redis://redis:6379
- PORT=3000
- LOG_LEVEL=info
- HIBP_API_KEY=${HIBP_API_KEY}
- RESEND_API_KEY=${RESEND_API_KEY}
DATABASE_URL: "postgresql://shieldai:${POSTGRES_PASSWORD}@postgres:5432/shieldai"
REDIS_URL: "redis://redis:6379"
PORT: "3000"
LOG_LEVEL: info
HIBP_API_KEY: ${HIBP_API_KEY}
RESEND_API_KEY: ${RESEND_API_KEY}
<<: *monitoring
depends_on:
postgres:
condition: service_healthy
@@ -25,9 +38,11 @@ services:
image: ghcr.io/${GITHUB_REPOSITORY_OWNER}/shieldai-darkwatch:${DOCKER_TAG:-latest}
restart: unless-stopped
environment:
- DATABASE_URL=postgresql://shieldai:${POSTGRES_PASSWORD}@postgres:5432/shieldai
- REDIS_URL=redis://redis:6379
- HIBP_API_KEY=${HIBP_API_KEY}
DATABASE_URL: "postgresql://shieldai:${POSTGRES_PASSWORD}@postgres:5432/shieldai"
REDIS_URL: "redis://redis:6379"
HIBP_API_KEY: ${HIBP_API_KEY}
DD_SERVICE: "shieldai-darkwatch"
<<: *monitoring
depends_on:
postgres:
condition: service_healthy
@@ -40,8 +55,10 @@ services:
image: ghcr.io/${GITHUB_REPOSITORY_OWNER}/shieldai-spamshield:${DOCKER_TAG:-latest}
restart: unless-stopped
environment:
- DATABASE_URL=postgresql://shieldai:${POSTGRES_PASSWORD}@postgres:5432/shieldai
- REDIS_URL=redis://redis:6379
DATABASE_URL: "postgresql://shieldai:${POSTGRES_PASSWORD}@postgres:5432/shieldai"
REDIS_URL: "redis://redis:6379"
DD_SERVICE: "shieldai-spamshield"
<<: *monitoring
depends_on:
postgres:
condition: service_healthy
@@ -54,8 +71,10 @@ services:
image: ghcr.io/${GITHUB_REPOSITORY_OWNER}/shieldai-voiceprint:${DOCKER_TAG:-latest}
restart: unless-stopped
environment:
- DATABASE_URL=postgresql://shieldai:${POSTGRES_PASSWORD}@postgres:5432/shieldai
- REDIS_URL=redis://redis:6379
DATABASE_URL: "postgresql://shieldai:${POSTGRES_PASSWORD}@postgres:5432/shieldai"
REDIS_URL: "redis://redis:6379"
DD_SERVICE: "shieldai-voiceprint"
<<: *monitoring
depends_on:
postgres:
condition: service_healthy
@@ -64,6 +83,29 @@ services:
networks:
- shieldai
datadog-agent:
image: datadog/agent:7
restart: unless-stopped
environment:
DD_API_KEY: ${DD_API_KEY}
DD_SITE: ${DD_SITE:-datadoghq.com}
DD_ENV: ${DD_ENV:-production}
DD_DOGSTATSD_NON_LOCAL_TRAFFIC: "true"
DD_APM_ENABLED: "true"
DD_APM_NON_LOCAL_TRAFFIC: "true"
DD_LOGS_ENABLED: "true"
DD_LOGS_CONFIG_CONTAINER_COLLECT_ALL: "true"
DD_HEALTH_PORT_ENABLE: "true"
ports:
- "8125:8125/udp"
- "8126:8126"
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- /proc/:/host/proc/:ro
- /sys/fs/cgroup:/host/sys/fs/cgroup:ro
networks:
- shieldai
postgres:
image: postgres:16-alpine
restart: unless-stopped