fix: address Code Reviewer findings for Datadog/Sentry integration FRE-4806

P1: Load dd-trace before other modules via datadog-init.ts entry point P1: Batch all CloudWatch metrics into single PutMetricDataCommand per request P2: Deduplicate warning logs with else-if for high latency vs error P3: Add response.ok check to Datadog log forwarding fetch P3: Update getSentryHub() to use getCurrentScope() for Sentry SDK 8.x Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-05-10 16:02:18 -04:00
parent a653c77959
commit 31e0b39794
9 changed files with 168 additions and 20 deletions
--- a/packages/api/src/middleware/monitoring.middleware.ts
+++ b/packages/api/src/middleware/monitoring.middleware.ts
@@ -1,5 +1,5 @@
 import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
-import { emitLatency, emitRequestCount, emitError } from '@shieldai/monitoring';
+import { emitBatchMetrics, emitError } from '@shieldai/monitoring';
 const SERVICE_NAME = process.env.DD_SERVICE || 'shieldai-api';
@@ -10,15 +10,38 @@ export async function monitoringMiddleware(fastify: FastifyInstance) {
    const method = request.method;
    const url = request.url;
-    // Emit request count
+    // Batch all metrics into a single PutMetricDataCommand to avoid rate limits
-    await emitRequestCount(SERVICE_NAME, statusCode);
+    await emitBatchMetrics({
      serviceName: SERVICE_NAME,
      data: [
        {
          metricName: 'api_requests',
          value: 1,
          unit: 'Count',
          dimensions: { status_class: String(Math.floor(statusCode / 100)) + 'xx' },
        },
        {
          metricName: 'api_latency',
          value: responseTime,
          unit: 'Milliseconds',
          dimensions: { percentile: 'p50' },
        },
        {
          metricName: 'api_latency',
          value: responseTime,
          unit: 'Milliseconds',
          dimensions: { percentile: 'p95' },
        },
        {
          metricName: 'api_latency',
          value: responseTime,
          unit: 'Milliseconds',
          dimensions: { percentile: 'p99' },
        },
      ],
    });
-    // Emit latency metrics
+    // Emit error metric for 5xx (separate call since it has different dimensions)
    await emitLatency(SERVICE_NAME, responseTime, 'p50');
    await emitLatency(SERVICE_NAME, responseTime, 'p95');
    await emitLatency(SERVICE_NAME, responseTime, 'p99');
    // Emit error metric for 5xx
    if (statusCode >= 500) {
      await emitError(SERVICE_NAME, 'server_error');
      fastify.log.warn({
@@ -31,8 +54,8 @@ export async function monitoringMiddleware(fastify: FastifyInstance) {
      });
    }
-    // Log high latency requests (>2s)
+    // Log high latency requests (>2s) — only when not already logged as error
-    if (responseTime > 2000) {
+    else if (responseTime > 2000) {
      fastify.log.warn({
        event: 'high_latency',
        method,
--- a/packages/api/src/server.ts
+++ b/packages/api/src/server.ts
@@ -1,3 +1,5 @@
 // dd-trace must be initialized before any other module is loaded for auto-instrumentation
 import '@shieldai/monitoring/datadog-init';
 import Fastify from "fastify";
 import cors from "@fastify/cors";
 import helmet from "@fastify/helmet";
@@ -11,13 +13,9 @@ import { darkwatchRoutes } from "./routes/darkwatch.routes";
 import { voiceprintRoutes } from "./routes/voiceprint.routes";
 import { correlationRoutes } from "./routes/correlation.routes";
 import { extensionRoutes } from "./routes/extension.routes";
-import { initDatadog, initSentry, initDatadogLogs, captureSentryError } from "@shieldai/monitoring";
+import { captureSentryError } from "@shieldai/monitoring";
 import { getCorsOrigins } from "./config/api.config";
 initDatadog();
 initSentry();
 initDatadogLogs();
 const app = Fastify({
  logger: {
    level: process.env.LOG_LEVEL || "info",
--- a/packages/monitoring/package.json
+++ b/packages/monitoring/package.json
@@ -18,6 +18,7 @@
    "typescript": "^5.7.0"
  },
  "exports": {
-    ".": "./src/index.ts"
+    ".": "./src/index.ts",
    "./datadog-init": "./src/datadog-init.ts"
  }
 }
--- a/packages/monitoring/src/cloudwatch.ts
+++ b/packages/monitoring/src/cloudwatch.ts
@@ -62,6 +62,35 @@ export async function emitMetric(
  }
 }
 export async function emitBatchMetrics(metrics: {
  serviceName: string;
  data: { metricName: string; value: number; unit: StandardUnit; dimensions?: Record<string, string> }[];
 }) {
  const cw = getClient();
  if (!cw) return;
  const metricData = metrics.data.map((m) => ({
    MetricName: m.metricName,
    Dimensions: [
      { Name: 'service', Value: metrics.serviceName },
      ...(m.dimensions ? Object.entries(m.dimensions).map(([n, v]) => ({ Name: n, Value: v })) : []),
    ],
    Value: m.value,
    Unit: m.unit,
  }));
  const command = new PutMetricDataCommand({
    Namespace: NAMESPACE,
    MetricData: metricData,
  });
  try {
    await cw.send(command);
  } catch (err) {
    console.warn('[CloudWatch] Batch metric emit failed:', (err as Error).message);
  }
 }
 export async function emitLatency(
  serviceName: string,
  latencyMs: number,
--- a/packages/monitoring/src/datadog-init.ts
+++ b/packages/monitoring/src/datadog-init.ts
@@ -0,0 +1,8 @@
 import { getMonitoringConfig } from './config';
 import { initDatadog } from './datadog';
 import { initSentry } from './sentry';
 import { initDatadogLogs } from './datadog-logs';
 initDatadog();
 initSentry();
 initDatadogLogs();
--- a/packages/monitoring/src/datadog-logs.ts
+++ b/packages/monitoring/src/datadog-logs.ts
@@ -24,7 +24,7 @@ export function initDatadogLogs() {
          service,
        });
-        await fetch(`${logIntakeUrl}/api/v2/logs`, {
+        const response = await fetch(`${logIntakeUrl}/api/v2/logs`, {
          method: 'POST',
          headers: {
            'DD-API-KEY': process.env.DD_API_KEY!,
@@ -32,6 +32,12 @@ export function initDatadogLogs() {
          },
          body: payload,
        });
        if (!response.ok) {
          console.warn(
            `[Datadog Logs] HTTP ${response.status} response from intake API`,
            await response.text()
          );
        }
      } catch (err) {
        console.warn('[Datadog Logs] Forward failed:', (err as Error).message);
      }
--- a/packages/monitoring/src/sentry.ts
+++ b/packages/monitoring/src/sentry.ts
@@ -83,7 +83,7 @@ export function setSentryContext(name: string, data: Record<string, unknown>) {
 export function getSentryHub() {
  try {
    const Sentry = require('@sentry/node');
-    return Sentry.getCurrentHub?.() || Sentry.hub;
+    return Sentry.getCurrentScope?.() || Sentry.getCurrentHub?.() || Sentry.hub;
  } catch {
    return null;
  }
--- a/packages/monitoring/tsconfig.tsbuildinfo
+++ b/packages/monitoring/tsconfig.tsbuildinfo
--- a/shieldai-workflow.md
+++ b/shieldai-workflow.md
@@ -0,0 +1,83 @@
 # ShieldAI Code Review Workflow
 ## Current State (as of May 2, 2026)
 ### PR Backlog Status
 - **Open PRs**: 0 (pending commits pushed to master)
 - **Pending commits**: 1 commit pushed (FRE-4604) — remaining 6 were previously pushed
 - **Last review cycle**: FRE-4500, FRE-4499, FRE-4612 (security findings — all done)
 - **Branch protection**: Configured (see `branch-protection-rules.yaml`)
 - **PR template**: Configured (`.gitea/pull_request_templates/default.md`)
 ### Resolved Bottlenecks
 1. ✅ PR-based workflow established with PR template
 2. ✅ Branch protection rules documented and configured
 3. ✅ Code review checklist integrated into PR template
 4. ✅ Security review findings integrated (FRE-4499, FRE-4500, FRE-4612 all done)
 ## PR Process
 1. **Feature branch creation** from `gt/master`
 2. **Development commits** with conventional commit format (include issue ID: `FRE-XXXX: description`)
 3. **PR creation** against `gt/master`
 4. **Required reviews**:
   - Code Reviewer — all PRs
   - Security Reviewer — for security-sensitive changes
 5. **CI checks** pass (lint, typecheck, test)
 6. **Merge** via squash or rebase
 ### Code Review Checklist
 - [ ] Security impact assessment
 - [ ] Test coverage verification
 - [ ] Type checking (TypeScript)
 - [ ] Linting compliance
 - [ ] Documentation updates
 - [ ] Breaking changes documented
 - [ ] Backward compatibility verified
 ### Branch Protection Rules
 See `branch-protection-rules.yaml` for the full configuration. Summary:
 - **Protected branch**: `gt/master`
 - **Required reviews**: 1 approved review before merge
 - **Required status checks**: lint, typecheck, test
 - **Enforce admins**: false (admins can bypass during emergencies)
 - **Allow force pushes**: true (for recovery scenarios)
 ## Review Assignment Policy
 | Change Type | Required Reviewers |
 |-------------|-------------------|
 | General code | Code Reviewer |
 | Security-critical | Code Reviewer + Security Reviewer |
 | API contracts | Code Reviewer + CTO |
 | Database schema | Code Reviewer + Senior Engineer |
 ## Review Pipeline
 ```
 Engineer implements → marks in_review → Security Reviewer reviews → Code Reviewer reviews → Done
 ```
 ## Metrics to Track
 - PR cycle time (creation to merge)
 - Review turnaround time
 - PR size (lines changed)
 - Review comments per PR
 - Merge conflict frequency
 ## Contribution Guidelines
 1. Always create a feature branch from `gt/master`
 2. Use conventional commit format: `type(scope): description (FRE-XXXX)`
 3. Include tests for new functionality
 4. Update documentation for API changes
 5. Run lint and typecheck before pushing
 6. Create PR with filled template before requesting review
 7. Address all review comments before merge
 ---
 *Updated from FRE-4556 audit, implemented in FRE-4661*