fix: address Code Reviewer findings for Datadog/Sentry integration FRE-4806
P1: Load dd-trace before other modules via datadog-init.ts entry point P1: Batch all CloudWatch metrics into single PutMetricDataCommand per request P2: Deduplicate warning logs with else-if for high latency vs error P3: Add response.ok check to Datadog log forwarding fetch P3: Update getSentryHub() to use getCurrentScope() for Sentry SDK 8.x Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { emitLatency, emitRequestCount, emitError } from '@shieldai/monitoring';
|
||||
import { emitBatchMetrics, emitError } from '@shieldai/monitoring';
|
||||
|
||||
const SERVICE_NAME = process.env.DD_SERVICE || 'shieldai-api';
|
||||
|
||||
@@ -10,15 +10,38 @@ export async function monitoringMiddleware(fastify: FastifyInstance) {
|
||||
const method = request.method;
|
||||
const url = request.url;
|
||||
|
||||
// Emit request count
|
||||
await emitRequestCount(SERVICE_NAME, statusCode);
|
||||
// Batch all metrics into a single PutMetricDataCommand to avoid rate limits
|
||||
await emitBatchMetrics({
|
||||
serviceName: SERVICE_NAME,
|
||||
data: [
|
||||
{
|
||||
metricName: 'api_requests',
|
||||
value: 1,
|
||||
unit: 'Count',
|
||||
dimensions: { status_class: String(Math.floor(statusCode / 100)) + 'xx' },
|
||||
},
|
||||
{
|
||||
metricName: 'api_latency',
|
||||
value: responseTime,
|
||||
unit: 'Milliseconds',
|
||||
dimensions: { percentile: 'p50' },
|
||||
},
|
||||
{
|
||||
metricName: 'api_latency',
|
||||
value: responseTime,
|
||||
unit: 'Milliseconds',
|
||||
dimensions: { percentile: 'p95' },
|
||||
},
|
||||
{
|
||||
metricName: 'api_latency',
|
||||
value: responseTime,
|
||||
unit: 'Milliseconds',
|
||||
dimensions: { percentile: 'p99' },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Emit latency metrics
|
||||
await emitLatency(SERVICE_NAME, responseTime, 'p50');
|
||||
await emitLatency(SERVICE_NAME, responseTime, 'p95');
|
||||
await emitLatency(SERVICE_NAME, responseTime, 'p99');
|
||||
|
||||
// Emit error metric for 5xx
|
||||
// Emit error metric for 5xx (separate call since it has different dimensions)
|
||||
if (statusCode >= 500) {
|
||||
await emitError(SERVICE_NAME, 'server_error');
|
||||
fastify.log.warn({
|
||||
@@ -31,8 +54,8 @@ export async function monitoringMiddleware(fastify: FastifyInstance) {
|
||||
});
|
||||
}
|
||||
|
||||
// Log high latency requests (>2s)
|
||||
if (responseTime > 2000) {
|
||||
// Log high latency requests (>2s) — only when not already logged as error
|
||||
else if (responseTime > 2000) {
|
||||
fastify.log.warn({
|
||||
event: 'high_latency',
|
||||
method,
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// dd-trace must be initialized before any other module is loaded for auto-instrumentation
|
||||
import '@shieldai/monitoring/datadog-init';
|
||||
import Fastify from "fastify";
|
||||
import cors from "@fastify/cors";
|
||||
import helmet from "@fastify/helmet";
|
||||
@@ -11,13 +13,9 @@ import { darkwatchRoutes } from "./routes/darkwatch.routes";
|
||||
import { voiceprintRoutes } from "./routes/voiceprint.routes";
|
||||
import { correlationRoutes } from "./routes/correlation.routes";
|
||||
import { extensionRoutes } from "./routes/extension.routes";
|
||||
import { initDatadog, initSentry, initDatadogLogs, captureSentryError } from "@shieldai/monitoring";
|
||||
import { captureSentryError } from "@shieldai/monitoring";
|
||||
import { getCorsOrigins } from "./config/api.config";
|
||||
|
||||
initDatadog();
|
||||
initSentry();
|
||||
initDatadogLogs();
|
||||
|
||||
const app = Fastify({
|
||||
logger: {
|
||||
level: process.env.LOG_LEVEL || "info",
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
"typescript": "^5.7.0"
|
||||
},
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
".": "./src/index.ts",
|
||||
"./datadog-init": "./src/datadog-init.ts"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,6 +62,35 @@ export async function emitMetric(
|
||||
}
|
||||
}
|
||||
|
||||
export async function emitBatchMetrics(metrics: {
|
||||
serviceName: string;
|
||||
data: { metricName: string; value: number; unit: StandardUnit; dimensions?: Record<string, string> }[];
|
||||
}) {
|
||||
const cw = getClient();
|
||||
if (!cw) return;
|
||||
|
||||
const metricData = metrics.data.map((m) => ({
|
||||
MetricName: m.metricName,
|
||||
Dimensions: [
|
||||
{ Name: 'service', Value: metrics.serviceName },
|
||||
...(m.dimensions ? Object.entries(m.dimensions).map(([n, v]) => ({ Name: n, Value: v })) : []),
|
||||
],
|
||||
Value: m.value,
|
||||
Unit: m.unit,
|
||||
}));
|
||||
|
||||
const command = new PutMetricDataCommand({
|
||||
Namespace: NAMESPACE,
|
||||
MetricData: metricData,
|
||||
});
|
||||
|
||||
try {
|
||||
await cw.send(command);
|
||||
} catch (err) {
|
||||
console.warn('[CloudWatch] Batch metric emit failed:', (err as Error).message);
|
||||
}
|
||||
}
|
||||
|
||||
export async function emitLatency(
|
||||
serviceName: string,
|
||||
latencyMs: number,
|
||||
|
||||
8
packages/monitoring/src/datadog-init.ts
Normal file
8
packages/monitoring/src/datadog-init.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { getMonitoringConfig } from './config';
|
||||
import { initDatadog } from './datadog';
|
||||
import { initSentry } from './sentry';
|
||||
import { initDatadogLogs } from './datadog-logs';
|
||||
|
||||
initDatadog();
|
||||
initSentry();
|
||||
initDatadogLogs();
|
||||
@@ -24,7 +24,7 @@ export function initDatadogLogs() {
|
||||
service,
|
||||
});
|
||||
|
||||
await fetch(`${logIntakeUrl}/api/v2/logs`, {
|
||||
const response = await fetch(`${logIntakeUrl}/api/v2/logs`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'DD-API-KEY': process.env.DD_API_KEY!,
|
||||
@@ -32,6 +32,12 @@ export function initDatadogLogs() {
|
||||
},
|
||||
body: payload,
|
||||
});
|
||||
if (!response.ok) {
|
||||
console.warn(
|
||||
`[Datadog Logs] HTTP ${response.status} response from intake API`,
|
||||
await response.text()
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('[Datadog Logs] Forward failed:', (err as Error).message);
|
||||
}
|
||||
|
||||
@@ -83,7 +83,7 @@ export function setSentryContext(name: string, data: Record<string, unknown>) {
|
||||
export function getSentryHub() {
|
||||
try {
|
||||
const Sentry = require('@sentry/node');
|
||||
return Sentry.getCurrentHub?.() || Sentry.hub;
|
||||
return Sentry.getCurrentScope?.() || Sentry.getCurrentHub?.() || Sentry.hub;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
83
shieldai-workflow.md
Normal file
83
shieldai-workflow.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# ShieldAI Code Review Workflow
|
||||
|
||||
## Current State (as of May 2, 2026)
|
||||
|
||||
### PR Backlog Status
|
||||
- **Open PRs**: 0 (pending commits pushed to master)
|
||||
- **Pending commits**: 1 commit pushed (FRE-4604) — remaining 6 were previously pushed
|
||||
- **Last review cycle**: FRE-4500, FRE-4499, FRE-4612 (security findings — all done)
|
||||
- **Branch protection**: Configured (see `branch-protection-rules.yaml`)
|
||||
- **PR template**: Configured (`.gitea/pull_request_templates/default.md`)
|
||||
|
||||
### Resolved Bottlenecks
|
||||
1. ✅ PR-based workflow established with PR template
|
||||
2. ✅ Branch protection rules documented and configured
|
||||
3. ✅ Code review checklist integrated into PR template
|
||||
4. ✅ Security review findings integrated (FRE-4499, FRE-4500, FRE-4612 all done)
|
||||
|
||||
## PR Process
|
||||
|
||||
1. **Feature branch creation** from `gt/master`
|
||||
2. **Development commits** with conventional commit format (include issue ID: `FRE-XXXX: description`)
|
||||
3. **PR creation** against `gt/master`
|
||||
4. **Required reviews**:
|
||||
- Code Reviewer — all PRs
|
||||
- Security Reviewer — for security-sensitive changes
|
||||
5. **CI checks** pass (lint, typecheck, test)
|
||||
6. **Merge** via squash or rebase
|
||||
|
||||
### Code Review Checklist
|
||||
|
||||
- [ ] Security impact assessment
|
||||
- [ ] Test coverage verification
|
||||
- [ ] Type checking (TypeScript)
|
||||
- [ ] Linting compliance
|
||||
- [ ] Documentation updates
|
||||
- [ ] Breaking changes documented
|
||||
- [ ] Backward compatibility verified
|
||||
|
||||
### Branch Protection Rules
|
||||
|
||||
See `branch-protection-rules.yaml` for the full configuration. Summary:
|
||||
|
||||
- **Protected branch**: `gt/master`
|
||||
- **Required reviews**: 1 approved review before merge
|
||||
- **Required status checks**: lint, typecheck, test
|
||||
- **Enforce admins**: false (admins can bypass during emergencies)
|
||||
- **Allow force pushes**: true (for recovery scenarios)
|
||||
|
||||
## Review Assignment Policy
|
||||
|
||||
| Change Type | Required Reviewers |
|
||||
|-------------|-------------------|
|
||||
| General code | Code Reviewer |
|
||||
| Security-critical | Code Reviewer + Security Reviewer |
|
||||
| API contracts | Code Reviewer + CTO |
|
||||
| Database schema | Code Reviewer + Senior Engineer |
|
||||
|
||||
## Review Pipeline
|
||||
|
||||
```
|
||||
Engineer implements → marks in_review → Security Reviewer reviews → Code Reviewer reviews → Done
|
||||
```
|
||||
|
||||
## Metrics to Track
|
||||
|
||||
- PR cycle time (creation to merge)
|
||||
- Review turnaround time
|
||||
- PR size (lines changed)
|
||||
- Review comments per PR
|
||||
- Merge conflict frequency
|
||||
|
||||
## Contribution Guidelines
|
||||
|
||||
1. Always create a feature branch from `gt/master`
|
||||
2. Use conventional commit format: `type(scope): description (FRE-XXXX)`
|
||||
3. Include tests for new functionality
|
||||
4. Update documentation for API changes
|
||||
5. Run lint and typecheck before pushing
|
||||
6. Create PR with filled template before requesting review
|
||||
7. Address all review comments before merge
|
||||
|
||||
---
|
||||
*Updated from FRE-4556 audit, implemented in FRE-4661*
|
||||
Reference in New Issue
Block a user