fix: address Code Reviewer findings for Datadog/Sentry integration FRE-4806

P1: Load dd-trace before other modules via datadog-init.ts entry point
P1: Batch all CloudWatch metrics into single PutMetricDataCommand per request
P2: Deduplicate warning logs with else-if for high latency vs error
P3: Add response.ok check to Datadog log forwarding fetch
P3: Update getSentryHub() to use getCurrentScope() for Sentry SDK 8.x

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
2026-05-10 16:02:18 -04:00
parent a653c77959
commit 31e0b39794
9 changed files with 168 additions and 20 deletions

View File

@@ -1,5 +1,5 @@
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify'; import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { emitLatency, emitRequestCount, emitError } from '@shieldai/monitoring'; import { emitBatchMetrics, emitError } from '@shieldai/monitoring';
const SERVICE_NAME = process.env.DD_SERVICE || 'shieldai-api'; const SERVICE_NAME = process.env.DD_SERVICE || 'shieldai-api';
@@ -10,15 +10,38 @@ export async function monitoringMiddleware(fastify: FastifyInstance) {
const method = request.method; const method = request.method;
const url = request.url; const url = request.url;
// Emit request count // Batch all metrics into a single PutMetricDataCommand to avoid rate limits
await emitRequestCount(SERVICE_NAME, statusCode); await emitBatchMetrics({
serviceName: SERVICE_NAME,
data: [
{
metricName: 'api_requests',
value: 1,
unit: 'Count',
dimensions: { status_class: String(Math.floor(statusCode / 100)) + 'xx' },
},
{
metricName: 'api_latency',
value: responseTime,
unit: 'Milliseconds',
dimensions: { percentile: 'p50' },
},
{
metricName: 'api_latency',
value: responseTime,
unit: 'Milliseconds',
dimensions: { percentile: 'p95' },
},
{
metricName: 'api_latency',
value: responseTime,
unit: 'Milliseconds',
dimensions: { percentile: 'p99' },
},
],
});
// Emit latency metrics // Emit error metric for 5xx (separate call since it has different dimensions)
await emitLatency(SERVICE_NAME, responseTime, 'p50');
await emitLatency(SERVICE_NAME, responseTime, 'p95');
await emitLatency(SERVICE_NAME, responseTime, 'p99');
// Emit error metric for 5xx
if (statusCode >= 500) { if (statusCode >= 500) {
await emitError(SERVICE_NAME, 'server_error'); await emitError(SERVICE_NAME, 'server_error');
fastify.log.warn({ fastify.log.warn({
@@ -31,8 +54,8 @@ export async function monitoringMiddleware(fastify: FastifyInstance) {
}); });
} }
// Log high latency requests (>2s) // Log high latency requests (>2s) — only when not already logged as error
if (responseTime > 2000) { else if (responseTime > 2000) {
fastify.log.warn({ fastify.log.warn({
event: 'high_latency', event: 'high_latency',
method, method,

View File

@@ -1,3 +1,5 @@
// dd-trace must be initialized before any other module is loaded for auto-instrumentation
import '@shieldai/monitoring/datadog-init';
import Fastify from "fastify"; import Fastify from "fastify";
import cors from "@fastify/cors"; import cors from "@fastify/cors";
import helmet from "@fastify/helmet"; import helmet from "@fastify/helmet";
@@ -11,13 +13,9 @@ import { darkwatchRoutes } from "./routes/darkwatch.routes";
import { voiceprintRoutes } from "./routes/voiceprint.routes"; import { voiceprintRoutes } from "./routes/voiceprint.routes";
import { correlationRoutes } from "./routes/correlation.routes"; import { correlationRoutes } from "./routes/correlation.routes";
import { extensionRoutes } from "./routes/extension.routes"; import { extensionRoutes } from "./routes/extension.routes";
import { initDatadog, initSentry, initDatadogLogs, captureSentryError } from "@shieldai/monitoring"; import { captureSentryError } from "@shieldai/monitoring";
import { getCorsOrigins } from "./config/api.config"; import { getCorsOrigins } from "./config/api.config";
initDatadog();
initSentry();
initDatadogLogs();
const app = Fastify({ const app = Fastify({
logger: { logger: {
level: process.env.LOG_LEVEL || "info", level: process.env.LOG_LEVEL || "info",

View File

@@ -18,6 +18,7 @@
"typescript": "^5.7.0" "typescript": "^5.7.0"
}, },
"exports": { "exports": {
".": "./src/index.ts" ".": "./src/index.ts",
"./datadog-init": "./src/datadog-init.ts"
} }
} }

View File

@@ -62,6 +62,35 @@ export async function emitMetric(
} }
} }
export async function emitBatchMetrics(metrics: {
serviceName: string;
data: { metricName: string; value: number; unit: StandardUnit; dimensions?: Record<string, string> }[];
}) {
const cw = getClient();
if (!cw) return;
const metricData = metrics.data.map((m) => ({
MetricName: m.metricName,
Dimensions: [
{ Name: 'service', Value: metrics.serviceName },
...(m.dimensions ? Object.entries(m.dimensions).map(([n, v]) => ({ Name: n, Value: v })) : []),
],
Value: m.value,
Unit: m.unit,
}));
const command = new PutMetricDataCommand({
Namespace: NAMESPACE,
MetricData: metricData,
});
try {
await cw.send(command);
} catch (err) {
console.warn('[CloudWatch] Batch metric emit failed:', (err as Error).message);
}
}
export async function emitLatency( export async function emitLatency(
serviceName: string, serviceName: string,
latencyMs: number, latencyMs: number,

View File

@@ -0,0 +1,8 @@
import { getMonitoringConfig } from './config';
import { initDatadog } from './datadog';
import { initSentry } from './sentry';
import { initDatadogLogs } from './datadog-logs';
initDatadog();
initSentry();
initDatadogLogs();

View File

@@ -24,7 +24,7 @@ export function initDatadogLogs() {
service, service,
}); });
await fetch(`${logIntakeUrl}/api/v2/logs`, { const response = await fetch(`${logIntakeUrl}/api/v2/logs`, {
method: 'POST', method: 'POST',
headers: { headers: {
'DD-API-KEY': process.env.DD_API_KEY!, 'DD-API-KEY': process.env.DD_API_KEY!,
@@ -32,6 +32,12 @@ export function initDatadogLogs() {
}, },
body: payload, body: payload,
}); });
if (!response.ok) {
console.warn(
`[Datadog Logs] HTTP ${response.status} response from intake API`,
await response.text()
);
}
} catch (err) { } catch (err) {
console.warn('[Datadog Logs] Forward failed:', (err as Error).message); console.warn('[Datadog Logs] Forward failed:', (err as Error).message);
} }

View File

@@ -83,7 +83,7 @@ export function setSentryContext(name: string, data: Record<string, unknown>) {
export function getSentryHub() { export function getSentryHub() {
try { try {
const Sentry = require('@sentry/node'); const Sentry = require('@sentry/node');
return Sentry.getCurrentHub?.() || Sentry.hub; return Sentry.getCurrentScope?.() || Sentry.getCurrentHub?.() || Sentry.hub;
} catch { } catch {
return null; return null;
} }

File diff suppressed because one or more lines are too long

83
shieldai-workflow.md Normal file
View File

@@ -0,0 +1,83 @@
# ShieldAI Code Review Workflow
## Current State (as of May 2, 2026)
### PR Backlog Status
- **Open PRs**: 0 (pending commits pushed to master)
- **Pending commits**: 1 commit pushed (FRE-4604) — remaining 6 were previously pushed
- **Last review cycle**: FRE-4500, FRE-4499, FRE-4612 (security findings — all done)
- **Branch protection**: Configured (see `branch-protection-rules.yaml`)
- **PR template**: Configured (`.gitea/pull_request_templates/default.md`)
### Resolved Bottlenecks
1. ✅ PR-based workflow established with PR template
2. ✅ Branch protection rules documented and configured
3. ✅ Code review checklist integrated into PR template
4. ✅ Security review findings integrated (FRE-4499, FRE-4500, FRE-4612 all done)
## PR Process
1. **Feature branch creation** from `gt/master`
2. **Development commits** with conventional commit format (include issue ID: `FRE-XXXX: description`)
3. **PR creation** against `gt/master`
4. **Required reviews**:
- Code Reviewer — all PRs
- Security Reviewer — for security-sensitive changes
5. **CI checks** pass (lint, typecheck, test)
6. **Merge** via squash or rebase
### Code Review Checklist
- [ ] Security impact assessment
- [ ] Test coverage verification
- [ ] Type checking (TypeScript)
- [ ] Linting compliance
- [ ] Documentation updates
- [ ] Breaking changes documented
- [ ] Backward compatibility verified
### Branch Protection Rules
See `branch-protection-rules.yaml` for the full configuration. Summary:
- **Protected branch**: `gt/master`
- **Required reviews**: 1 approved review before merge
- **Required status checks**: lint, typecheck, test
- **Enforce admins**: false (admins can bypass during emergencies)
- **Allow force pushes**: true (for recovery scenarios)
## Review Assignment Policy
| Change Type | Required Reviewers |
|-------------|-------------------|
| General code | Code Reviewer |
| Security-critical | Code Reviewer + Security Reviewer |
| API contracts | Code Reviewer + CTO |
| Database schema | Code Reviewer + Senior Engineer |
## Review Pipeline
```
Engineer implements → marks in_review → Security Reviewer reviews → Code Reviewer reviews → Done
```
## Metrics to Track
- PR cycle time (creation to merge)
- Review turnaround time
- PR size (lines changed)
- Review comments per PR
- Merge conflict frequency
## Contribution Guidelines
1. Always create a feature branch from `gt/master`
2. Use conventional commit format: `type(scope): description (FRE-XXXX)`
3. Include tests for new functionality
4. Update documentation for API changes
5. Run lint and typecheck before pushing
6. Create PR with filled template before requesting review
7. Address all review comments before merge
---
*Updated from FRE-4556 audit, implemented in FRE-4661*