fix: address Code Reviewer findings for Datadog/Sentry integration FRE-4806

P1: Load dd-trace before other modules via datadog-init.ts entry point P1: Batch all CloudWatch metrics into single PutMetricDataCommand per request P2: Deduplicate warning logs with else-if for high latency vs error P3: Add response.ok check to Datadog log forwarding fetch P3: Update getSentryHub() to use getCurrentScope() for Sentry SDK 8.x Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-05-10 16:02:18 -04:00
parent a653c77959
commit 31e0b39794
9 changed files with 168 additions and 20 deletions
--- a/packages/api/src/middleware/monitoring.middleware.ts
+++ b/packages/api/src/middleware/monitoring.middleware.ts
@@ -1,5 +1,5 @@
 import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
-import { emitLatency, emitRequestCount, emitError } from '@shieldai/monitoring';
+import { emitBatchMetrics, emitError } from '@shieldai/monitoring';

 const SERVICE_NAME = process.env.DD_SERVICE || 'shieldai-api';

@@ -10,15 +10,38 @@ export async function monitoringMiddleware(fastify: FastifyInstance) {
    const method = request.method;
    const url = request.url;

-    // Emit request count
-    await emitRequestCount(SERVICE_NAME, statusCode);
+    // Batch all metrics into a single PutMetricDataCommand to avoid rate limits
+    await emitBatchMetrics({
+      serviceName: SERVICE_NAME,
+      data: [
+        {
+          metricName: 'api_requests',
+          value: 1,
+          unit: 'Count',
+          dimensions: { status_class: String(Math.floor(statusCode / 100)) + 'xx' },
+        },
+        {
+          metricName: 'api_latency',
+          value: responseTime,
+          unit: 'Milliseconds',
+          dimensions: { percentile: 'p50' },
+        },
+        {
+          metricName: 'api_latency',
+          value: responseTime,
+          unit: 'Milliseconds',
+          dimensions: { percentile: 'p95' },
+        },
+        {
+          metricName: 'api_latency',
+          value: responseTime,
+          unit: 'Milliseconds',
+          dimensions: { percentile: 'p99' },
+        },
+      ],
+    });

-    // Emit latency metrics
-    await emitLatency(SERVICE_NAME, responseTime, 'p50');
-    await emitLatency(SERVICE_NAME, responseTime, 'p95');
-    await emitLatency(SERVICE_NAME, responseTime, 'p99');
-
-    // Emit error metric for 5xx
+    // Emit error metric for 5xx (separate call since it has different dimensions)
    if (statusCode >= 500) {
      await emitError(SERVICE_NAME, 'server_error');
      fastify.log.warn({
@@ -31,8 +54,8 @@ export async function monitoringMiddleware(fastify: FastifyInstance) {
      });
    }

-    // Log high latency requests (>2s)
-    if (responseTime > 2000) {
+    // Log high latency requests (>2s) — only when not already logged as error
+    else if (responseTime > 2000) {
      fastify.log.warn({
        event: 'high_latency',
        method,
--- a/packages/api/src/server.ts
+++ b/packages/api/src/server.ts
@@ -1,3 +1,5 @@
+// dd-trace must be initialized before any other module is loaded for auto-instrumentation
+import '@shieldai/monitoring/datadog-init';
 import Fastify from "fastify";
 import cors from "@fastify/cors";
 import helmet from "@fastify/helmet";
@@ -11,13 +13,9 @@ import { darkwatchRoutes } from "./routes/darkwatch.routes";
 import { voiceprintRoutes } from "./routes/voiceprint.routes";
 import { correlationRoutes } from "./routes/correlation.routes";
 import { extensionRoutes } from "./routes/extension.routes";
-import { initDatadog, initSentry, initDatadogLogs, captureSentryError } from "@shieldai/monitoring";
+import { captureSentryError } from "@shieldai/monitoring";
 import { getCorsOrigins } from "./config/api.config";

-initDatadog();
-initSentry();
-initDatadogLogs();
-
 const app = Fastify({
  logger: {
    level: process.env.LOG_LEVEL || "info",
--- a/packages/monitoring/package.json
+++ b/packages/monitoring/package.json
@@ -18,6 +18,7 @@
    "typescript": "^5.7.0"
  },
  "exports": {
-    ".": "./src/index.ts"
+    ".": "./src/index.ts",
+    "./datadog-init": "./src/datadog-init.ts"
  }
 }
--- a/packages/monitoring/src/cloudwatch.ts
+++ b/packages/monitoring/src/cloudwatch.ts
@@ -62,6 +62,35 @@ export async function emitMetric(
  }
 }

+export async function emitBatchMetrics(metrics: {
+  serviceName: string;
+  data: { metricName: string; value: number; unit: StandardUnit; dimensions?: Record<string, string> }[];
+}) {
+  const cw = getClient();
+  if (!cw) return;
+
+  const metricData = metrics.data.map((m) => ({
+    MetricName: m.metricName,
+    Dimensions: [
+      { Name: 'service', Value: metrics.serviceName },
+      ...(m.dimensions ? Object.entries(m.dimensions).map(([n, v]) => ({ Name: n, Value: v })) : []),
+    ],
+    Value: m.value,
+    Unit: m.unit,
+  }));
+
+  const command = new PutMetricDataCommand({
+    Namespace: NAMESPACE,
+    MetricData: metricData,
+  });
+
+  try {
+    await cw.send(command);
+  } catch (err) {
+    console.warn('[CloudWatch] Batch metric emit failed:', (err as Error).message);
+  }
+}
+
 export async function emitLatency(
  serviceName: string,
  latencyMs: number,
--- a/packages/monitoring/src/datadog-init.ts
+++ b/packages/monitoring/src/datadog-init.ts
@@ -0,0 +1,8 @@
+import { getMonitoringConfig } from './config';
+import { initDatadog } from './datadog';
+import { initSentry } from './sentry';
+import { initDatadogLogs } from './datadog-logs';
+
+initDatadog();
+initSentry();
+initDatadogLogs();
--- a/packages/monitoring/src/datadog-logs.ts
+++ b/packages/monitoring/src/datadog-logs.ts
@@ -24,7 +24,7 @@ export function initDatadogLogs() {
          service,
        });

-        await fetch(`${logIntakeUrl}/api/v2/logs`, {
+        const response = await fetch(`${logIntakeUrl}/api/v2/logs`, {
          method: 'POST',
          headers: {
            'DD-API-KEY': process.env.DD_API_KEY!,
@@ -32,6 +32,12 @@ export function initDatadogLogs() {
          },
          body: payload,
        });
+        if (!response.ok) {
+          console.warn(
+            `[Datadog Logs] HTTP ${response.status} response from intake API`,
+            await response.text()
+          );
+        }
      } catch (err) {
        console.warn('[Datadog Logs] Forward failed:', (err as Error).message);
      }
--- a/packages/monitoring/src/sentry.ts
+++ b/packages/monitoring/src/sentry.ts
@@ -83,7 +83,7 @@ export function setSentryContext(name: string, data: Record<string, unknown>) {
 export function getSentryHub() {
  try {
    const Sentry = require('@sentry/node');
-    return Sentry.getCurrentHub?.() || Sentry.hub;
+    return Sentry.getCurrentScope?.() || Sentry.getCurrentHub?.() || Sentry.hub;
  } catch {
    return null;
  }
--- a/packages/monitoring/tsconfig.tsbuildinfo
+++ b/packages/monitoring/tsconfig.tsbuildinfo
--- a/shieldai-workflow.md
+++ b/shieldai-workflow.md
@@ -0,0 +1,83 @@
+# ShieldAI Code Review Workflow
+
+## Current State (as of May 2, 2026)
+
+### PR Backlog Status
+- **Open PRs**: 0 (pending commits pushed to master)
+- **Pending commits**: 1 commit pushed (FRE-4604) — remaining 6 were previously pushed
+- **Last review cycle**: FRE-4500, FRE-4499, FRE-4612 (security findings — all done)
+- **Branch protection**: Configured (see `branch-protection-rules.yaml`)
+- **PR template**: Configured (`.gitea/pull_request_templates/default.md`)
+
+### Resolved Bottlenecks
+1. ✅ PR-based workflow established with PR template
+2. ✅ Branch protection rules documented and configured
+3. ✅ Code review checklist integrated into PR template
+4. ✅ Security review findings integrated (FRE-4499, FRE-4500, FRE-4612 all done)
+
+## PR Process
+
+1. **Feature branch creation** from `gt/master`
+2. **Development commits** with conventional commit format (include issue ID: `FRE-XXXX: description`)
+3. **PR creation** against `gt/master`
+4. **Required reviews**:
+   - Code Reviewer — all PRs
+   - Security Reviewer — for security-sensitive changes
+5. **CI checks** pass (lint, typecheck, test)
+6. **Merge** via squash or rebase
+
+### Code Review Checklist
+
+- [ ] Security impact assessment
+- [ ] Test coverage verification
+- [ ] Type checking (TypeScript)
+- [ ] Linting compliance
+- [ ] Documentation updates
+- [ ] Breaking changes documented
+- [ ] Backward compatibility verified
+
+### Branch Protection Rules
+
+See `branch-protection-rules.yaml` for the full configuration. Summary:
+
+- **Protected branch**: `gt/master`
+- **Required reviews**: 1 approved review before merge
+- **Required status checks**: lint, typecheck, test
+- **Enforce admins**: false (admins can bypass during emergencies)
+- **Allow force pushes**: true (for recovery scenarios)
+
+## Review Assignment Policy
+
+| Change Type | Required Reviewers |
+|-------------|-------------------|
+| General code | Code Reviewer |
+| Security-critical | Code Reviewer + Security Reviewer |
+| API contracts | Code Reviewer + CTO |
+| Database schema | Code Reviewer + Senior Engineer |
+
+## Review Pipeline
+
+```
+Engineer implements → marks in_review → Security Reviewer reviews → Code Reviewer reviews → Done
+```
+
+## Metrics to Track
+
+- PR cycle time (creation to merge)
+- Review turnaround time
+- PR size (lines changed)
+- Review comments per PR
+- Merge conflict frequency
+
+## Contribution Guidelines
+
+1. Always create a feature branch from `gt/master`
+2. Use conventional commit format: `type(scope): description (FRE-XXXX)`
+3. Include tests for new functionality
+4. Update documentation for API changes
+5. Run lint and typecheck before pushing
+6. Create PR with filled template before requesting review
+7. Address all review comments before merge
+
+---
+*Updated from FRE-4556 audit, implemented in FRE-4661*