FRE-4529: Transfer ShieldAI code from FrenoCorp repo
Transferred ShieldAI-related files mistakenly placed in ~/code/FrenoCorp:
- Services: spamshield (feature-flags, audit-logger, error-handler), voiceprint (config, service, feature-flags), darkwatch (pipeline, scan, scheduler, watchlist, webhook)
- Packages: shared-analytics, shared-auth, shared-ui, shared-utils (new); shared-billing, jobs supplemented with unique FC files
- Server: alerts (FC version newer), routes (spamshield, darkwatch, voiceprint)
- Config: turbo.json, tsconfig.base.json, vite/vitest configs, drizzle, Dockerfile
- VoicePrint ML service
- Examples
Pending: apps/{api,web,mobile}/ structured merge, shared-db/db mapping
Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
174
services/darkwatch/src/alert.pipeline.ts
Normal file
174
services/darkwatch/src/alert.pipeline.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
import { prisma, AlertType, AlertSeverity } from '@shieldsai/shared-db';
|
||||
import {
|
||||
NotificationService,
|
||||
NotificationPriority,
|
||||
loadNotificationConfig,
|
||||
} from '@shieldsai/shared-notifications';
|
||||
|
||||
const ALERT_DEDUP_WINDOW_MS = 24 * 60 * 60 * 1000;
|
||||
|
||||
export class AlertPipeline {
|
||||
private notificationService: NotificationService;
|
||||
|
||||
constructor() {
|
||||
this.notificationService = new NotificationService(loadNotificationConfig());
|
||||
}
|
||||
|
||||
async processNewExposures(exposureIds: string[]) {
|
||||
const exposures = await prisma.exposure.findMany({
|
||||
where: { id: { in: exposureIds }, isFirstTime: true },
|
||||
include: {
|
||||
subscription: {
|
||||
select: {
|
||||
id: true,
|
||||
userId: true,
|
||||
tier: true,
|
||||
},
|
||||
},
|
||||
watchlistItem: true,
|
||||
},
|
||||
});
|
||||
|
||||
const alertsCreated: Awaited<ReturnType<typeof prisma.alert.create>>[] = [];
|
||||
|
||||
for (const exposure of exposures) {
|
||||
const dedupKey = `exposure:${exposure.subscriptionId}:${exposure.source}:${exposure.identifierHash}`;
|
||||
|
||||
const recentAlert = await prisma.alert.findFirst({
|
||||
where: {
|
||||
subscriptionId: exposure.subscriptionId,
|
||||
type: AlertType.exposure_detected,
|
||||
createdAt: {
|
||||
gte: new Date(Date.now() - ALERT_DEDUP_WINDOW_MS),
|
||||
},
|
||||
},
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (recentAlert) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const alert = await prisma.alert.create({
|
||||
data: {
|
||||
subscriptionId: exposure.subscriptionId,
|
||||
userId: exposure.subscription.userId,
|
||||
exposureId: exposure.id,
|
||||
type: AlertType.exposure_detected,
|
||||
title: this.buildTitle(exposure),
|
||||
message: this.buildMessage(exposure),
|
||||
severity: this.mapSeverity(exposure.severity),
|
||||
channel: this.getChannelsForTier(exposure.subscription.tier),
|
||||
},
|
||||
});
|
||||
|
||||
alertsCreated.push(alert);
|
||||
|
||||
await this.dispatchNotification(alert, exposure);
|
||||
}
|
||||
|
||||
return alertsCreated;
|
||||
}
|
||||
|
||||
async dispatchScanCompleteAlert(
|
||||
subscriptionId: string,
|
||||
userId: string,
|
||||
exposuresFound: number
|
||||
) {
|
||||
const subscription = await prisma.subscription.findUnique({
|
||||
where: { id: subscriptionId },
|
||||
select: { tier: true },
|
||||
});
|
||||
|
||||
if (!subscription) return;
|
||||
|
||||
const alert = await prisma.alert.create({
|
||||
data: {
|
||||
subscriptionId,
|
||||
userId,
|
||||
type: AlertType.scan_complete,
|
||||
title: 'DarkWatch Scan Complete',
|
||||
message: `Scan found ${exposuresFound} new exposure${exposuresFound === 1 ? '' : 's'}.`,
|
||||
severity: exposuresFound > 0 ? 'warning' : 'info',
|
||||
channel: this.getChannelsForTier(subscription.tier),
|
||||
},
|
||||
});
|
||||
|
||||
await this.dispatchNotification(alert, {
|
||||
source: 'hibp',
|
||||
severity: 'info',
|
||||
identifier: '',
|
||||
dataType: 'email',
|
||||
} as any);
|
||||
|
||||
return alert;
|
||||
}
|
||||
|
||||
private async dispatchNotification(
|
||||
alert: {
|
||||
userId: string;
|
||||
channel: string[];
|
||||
title: string;
|
||||
message: string;
|
||||
severity: AlertSeverity;
|
||||
},
|
||||
exposure: { source: string; severity: string; identifier: string; dataType: string }
|
||||
) {
|
||||
try {
|
||||
if (!this.notificationService.isFullyConfigured()) return;
|
||||
|
||||
await this.notificationService.sendMultiChannelNotification(
|
||||
{
|
||||
userId: alert.userId,
|
||||
},
|
||||
alert.channel as any,
|
||||
alert.title,
|
||||
`<p>${alert.message}</p>
|
||||
<p><strong>Source:</strong> ${exposure.source}</p>
|
||||
<p><strong>Severity:</strong> ${exposure.severity}</p>
|
||||
<p><strong>Type:</strong> ${exposure.dataType}</p>`,
|
||||
alert.severity === 'critical'
|
||||
? NotificationPriority.HIGH
|
||||
: NotificationPriority.NORMAL
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('[AlertPipeline] Notification dispatch error:', error);
|
||||
}
|
||||
}
|
||||
|
||||
private buildTitle(exposure: {
|
||||
source: string;
|
||||
dataType: string;
|
||||
severity: string;
|
||||
}): string {
|
||||
return `${exposure.severity.toUpperCase()}: ${exposure.dataType} exposure on ${exposure.source}`;
|
||||
}
|
||||
|
||||
private buildMessage(exposure: {
|
||||
identifier: string;
|
||||
source: string;
|
||||
severity: string;
|
||||
dataType: string;
|
||||
}): string {
|
||||
const masked = exposure.identifier.includes('@')
|
||||
? exposure.identifier.replace(/(?<=.{2}).*(?=@)/, '***')
|
||||
: exposure.identifier.slice(0, 3) + '***';
|
||||
|
||||
return `Your ${exposure.dataType} (${masked}) was found in a ${exposure.source} breach with ${exposure.severity} severity.`;
|
||||
}
|
||||
|
||||
private mapSeverity(severity: string): AlertSeverity {
|
||||
return severity as AlertSeverity;
|
||||
}
|
||||
|
||||
private getChannelsForTier(tier: string): string[] {
|
||||
const channelMap: Record<string, string[]> = {
|
||||
basic: ['email'],
|
||||
plus: ['email', 'push'],
|
||||
premium: ['email', 'push', 'sms'],
|
||||
};
|
||||
return channelMap[tier] || ['email'];
|
||||
}
|
||||
}
|
||||
|
||||
export const alertPipeline = new AlertPipeline();
|
||||
@@ -1,7 +1,5 @@
|
||||
export * from "./watchlist/WatchListService";
|
||||
export * from "./hibp/HIBPService";
|
||||
export * from "./matching/MatchingEngine";
|
||||
export * from "./alerts/AlertPipeline";
|
||||
export * from "./scanner/ScanService";
|
||||
export * from "./scheduler/ScanScheduler";
|
||||
export * from "./webhooks/WebhookHandler";
|
||||
export { watchlistService } from './watchlist.service';
|
||||
export { scanService } from './scan.service';
|
||||
export { schedulerService } from './scheduler.service';
|
||||
export { webhookService } from './webhook.service';
|
||||
export { alertPipeline } from './alert.pipeline';
|
||||
|
||||
220
services/darkwatch/src/scan.service.ts
Normal file
220
services/darkwatch/src/scan.service.ts
Normal file
@@ -0,0 +1,220 @@
|
||||
import { prisma, ExposureSource, ExposureSeverity, WatchlistType } from '@shieldsai/shared-db';
|
||||
import { createHash } from 'crypto';
|
||||
|
||||
function hashIdentifier(identifier: string): string {
|
||||
return createHash('sha256').update(identifier.toLowerCase().trim()).digest('hex');
|
||||
}
|
||||
|
||||
function determineSeverity(
|
||||
source: ExposureSource,
|
||||
dataType: WatchlistType
|
||||
): ExposureSeverity {
|
||||
const criticalSources = [ExposureSource.darkWebForum, ExposureSource.honeypot];
|
||||
const warningSources = [ExposureSource.hibp, ExposureSource.shodan];
|
||||
const criticalTypes = [WatchlistType.ssn];
|
||||
|
||||
if (criticalTypes.includes(dataType)) return ExposureSeverity.critical;
|
||||
if (criticalSources.includes(source)) return ExposureSeverity.critical;
|
||||
if (warningSources.includes(source)) return ExposureSeverity.warning;
|
||||
return ExposureSeverity.info;
|
||||
}
|
||||
|
||||
export class ScanService {
|
||||
async checkHIBP(email: string): Promise<{ exposed: boolean; sources: string[] }> {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`https://hibp.com/api/v2/${encodeURIComponent(email)}`,
|
||||
{
|
||||
headers: {
|
||||
'hibp-api-key': process.env.HIBP_API_KEY || '',
|
||||
Accept: 'application/json',
|
||||
},
|
||||
signal: AbortSignal.timeout(15000),
|
||||
}
|
||||
);
|
||||
|
||||
if (response.status === 404) {
|
||||
return { exposed: false, sources: [] };
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(`[ScanService:HIBP] Status ${response.status} for ${email}`);
|
||||
return { exposed: false, sources: [] };
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const sources = Array.isArray(data)
|
||||
? data.map((p: { Name: string }) => p.Name)
|
||||
: [];
|
||||
|
||||
return { exposed: sources.length > 0, sources };
|
||||
} catch (error) {
|
||||
console.error('[ScanService:HIBP] Error:', error);
|
||||
return { exposed: false, sources: [] };
|
||||
}
|
||||
}
|
||||
|
||||
async checkShodan(domain: string): Promise<{ exposed: boolean; ports: string[]; ips: string[] }> {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`https://api.shodan.io/shodan/host/${encodeURIComponent(domain)}`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${process.env.SHODAN_API_KEY || ''}`,
|
||||
},
|
||||
signal: AbortSignal.timeout(15000),
|
||||
}
|
||||
);
|
||||
|
||||
if (response.status === 404) {
|
||||
return { exposed: false, ports: [], ips: [] };
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(`[ScanService:Shodan] Status ${response.status} for ${domain}`);
|
||||
return { exposed: false, ports: [], ips: [] };
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return {
|
||||
exposed: !!data.ip_str,
|
||||
ports: data.ports?.map(String) || [],
|
||||
ips: [data.ip_str || ''],
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[ScanService:Shodan] Error:', error);
|
||||
return { exposed: false, ports: [], ips: [] };
|
||||
}
|
||||
}
|
||||
|
||||
async processSubscriptionScan(
|
||||
subscriptionId: string,
|
||||
watchlistItems: Awaited<ReturnType<ScanService['getWatchlistItems']>>
|
||||
): Promise<{ exposuresCreated: number; exposuresUpdated: number }> {
|
||||
let exposuresCreated = 0;
|
||||
let exposuresUpdated = 0;
|
||||
|
||||
for (const item of watchlistItems) {
|
||||
const identifier = item.value;
|
||||
const identifierHash = hashIdentifier(identifier);
|
||||
|
||||
switch (item.type) {
|
||||
case WatchlistType.email: {
|
||||
const hibpResult = await this.checkHIBP(identifier);
|
||||
if (hibpResult.exposed) {
|
||||
for (const source of hibpResult.sources) {
|
||||
const existing = await prisma.exposure.findFirst({
|
||||
where: {
|
||||
subscriptionId,
|
||||
source: ExposureSource.hibp,
|
||||
identifierHash,
|
||||
metadata: { path: ['dbName'], equals: source },
|
||||
},
|
||||
});
|
||||
|
||||
if (existing) {
|
||||
await prisma.exposure.update({
|
||||
where: { id: existing.id },
|
||||
data: { detectedAt: new Date() },
|
||||
});
|
||||
exposuresUpdated++;
|
||||
} else {
|
||||
await prisma.exposure.create({
|
||||
data: {
|
||||
subscriptionId,
|
||||
watchlistItemId: item.id,
|
||||
source: ExposureSource.hibp,
|
||||
dataType: item.type,
|
||||
identifier,
|
||||
identifierHash,
|
||||
severity: determineSeverity(ExposureSource.hibp, item.type),
|
||||
isFirstTime: true,
|
||||
metadata: { dbName: source },
|
||||
detectedAt: new Date(),
|
||||
},
|
||||
});
|
||||
exposuresCreated++;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case WatchlistType.domain: {
|
||||
const shodanResult = await this.checkShodan(identifier);
|
||||
if (shodanResult.exposed) {
|
||||
const existing = await prisma.exposure.findFirst({
|
||||
where: {
|
||||
subscriptionId,
|
||||
source: ExposureSource.shodan,
|
||||
identifierHash,
|
||||
},
|
||||
});
|
||||
|
||||
if (existing) {
|
||||
await prisma.exposure.update({
|
||||
where: { id: existing.id },
|
||||
data: {
|
||||
detectedAt: new Date(),
|
||||
metadata: { ports: shodanResult.ports, ips: shodanResult.ips },
|
||||
},
|
||||
});
|
||||
exposuresUpdated++;
|
||||
} else {
|
||||
await prisma.exposure.create({
|
||||
data: {
|
||||
subscriptionId,
|
||||
watchlistItemId: item.id,
|
||||
source: ExposureSource.shodan,
|
||||
dataType: item.type,
|
||||
identifier,
|
||||
identifierHash,
|
||||
severity: determineSeverity(ExposureSource.shodan, item.type),
|
||||
isFirstTime: true,
|
||||
metadata: { ports: shodanResult.ports, ips: shodanResult.ips },
|
||||
detectedAt: new Date(),
|
||||
},
|
||||
});
|
||||
exposuresCreated++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
const existing = await prisma.exposure.findFirst({
|
||||
where: { subscriptionId, watchlistItemId: item.id, identifierHash },
|
||||
});
|
||||
|
||||
if (!existing) {
|
||||
await prisma.exposure.create({
|
||||
data: {
|
||||
subscriptionId,
|
||||
watchlistItemId: item.id,
|
||||
source: ExposureSource.darkWebForum,
|
||||
dataType: item.type,
|
||||
identifier,
|
||||
identifierHash,
|
||||
severity: determineSeverity(ExposureSource.darkWebForum, item.type),
|
||||
isFirstTime: true,
|
||||
detectedAt: new Date(),
|
||||
},
|
||||
});
|
||||
exposuresCreated++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { exposuresCreated, exposuresUpdated };
|
||||
}
|
||||
|
||||
async getWatchlistItems(subscriptionId: string) {
|
||||
return prisma.watchlistItem.findMany({
|
||||
where: { subscriptionId, isActive: true },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export const scanService = new ScanService();
|
||||
155
services/darkwatch/src/scheduler.service.ts
Normal file
155
services/darkwatch/src/scheduler.service.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
import { prisma, SubscriptionTier, SubscriptionStatus } from '@shieldsai/shared-db';
|
||||
import { tierConfig } from '@shieldsai/shared-billing';
|
||||
import { darkwatchScanQueue } from '@shieldsai/jobs';
|
||||
import { randomUUID } from 'crypto';
|
||||
|
||||
const CRON_EXPRESSIONS = {
|
||||
daily: '0 0 * * *',
|
||||
hourly: '0 * * * *',
|
||||
realtime: null,
|
||||
};
|
||||
|
||||
export class SchedulerService {
|
||||
async scheduleSubscriptionScans() {
|
||||
const activeSubscriptions = await prisma.subscription.findMany({
|
||||
where: {
|
||||
tier: { in: [SubscriptionTier.basic, SubscriptionTier.plus, SubscriptionTier.premium] },
|
||||
status: SubscriptionStatus.active,
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
tier: true,
|
||||
userId: true,
|
||||
},
|
||||
});
|
||||
|
||||
const jobsEnqueued = [];
|
||||
|
||||
for (const subscription of activeSubscriptions) {
|
||||
const frequency = tierConfig[subscription.tier].features.darkWebScanFrequency;
|
||||
const cron = CRON_EXPRESSIONS[frequency];
|
||||
|
||||
if (!cron) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const jobKey = `scheduled-scan:${subscription.id}`;
|
||||
|
||||
try {
|
||||
await darkwatchScanQueue.add(
|
||||
'scheduled-scan',
|
||||
{
|
||||
subscriptionId: subscription.id,
|
||||
tier: subscription.tier,
|
||||
scanType: 'scheduled',
|
||||
},
|
||||
{
|
||||
jobId: jobKey,
|
||||
repeat: {
|
||||
every: frequency === 'daily'
|
||||
? 24 * 60 * 60 * 1000
|
||||
: 60 * 60 * 1000,
|
||||
},
|
||||
priority: subscription.tier === SubscriptionTier.premium ? 1 : 3,
|
||||
}
|
||||
);
|
||||
|
||||
jobsEnqueued.push({
|
||||
subscriptionId: subscription.id,
|
||||
tier: subscription.tier,
|
||||
frequency,
|
||||
});
|
||||
} catch (error) {
|
||||
if ((error as Error).message?.includes('Duplicate')) {
|
||||
continue;
|
||||
}
|
||||
console.error(
|
||||
`[SchedulerService] Failed to schedule scan for ${subscription.id}:`,
|
||||
error
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return jobsEnqueued;
|
||||
}
|
||||
|
||||
async enqueueOnDemandScan(subscriptionId: string) {
|
||||
const subscription = await prisma.subscription.findUnique({
|
||||
where: { id: subscriptionId },
|
||||
select: { id: true, tier: true },
|
||||
});
|
||||
|
||||
if (!subscription) {
|
||||
throw new Error(`Subscription ${subscriptionId} not found`);
|
||||
}
|
||||
|
||||
return darkwatchScanQueue.add(
|
||||
'on-demand-scan',
|
||||
{
|
||||
subscriptionId,
|
||||
tier: subscription.tier,
|
||||
scanType: 'on-demand',
|
||||
},
|
||||
{
|
||||
priority: 1,
|
||||
jobId: `on-demand-scan:${subscriptionId}:${randomUUID()}`,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
async enqueueRealtimeTrigger(subscriptionId: string, sourceData: Record<string, unknown>) {
|
||||
const subscription = await prisma.subscription.findUnique({
|
||||
where: { id: subscriptionId },
|
||||
select: { id: true, tier: true },
|
||||
});
|
||||
|
||||
if (!subscription || subscription.tier !== SubscriptionTier.premium) {
|
||||
throw new Error('Realtime triggers require Premium tier');
|
||||
}
|
||||
|
||||
return darkwatchScanQueue.add(
|
||||
'realtime-trigger',
|
||||
{
|
||||
subscriptionId,
|
||||
tier: subscription.tier,
|
||||
scanType: 'realtime',
|
||||
sourceData,
|
||||
},
|
||||
{
|
||||
priority: 0,
|
||||
jobId: `realtime-trigger:${subscriptionId}:${randomUUID()}`,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
async rescheduleAll() {
|
||||
const repeatableJobs = await darkwatchScanQueue.getRepeatableJobs();
|
||||
|
||||
for (const job of repeatableJobs) {
|
||||
await darkwatchScanQueue.removeRepeatableByKey(job.key);
|
||||
}
|
||||
|
||||
return this.scheduleSubscriptionScans();
|
||||
}
|
||||
|
||||
async getScanSchedule(subscriptionId: string) {
|
||||
const subscription = await prisma.subscription.findUnique({
|
||||
where: { id: subscriptionId },
|
||||
select: { tier: true },
|
||||
});
|
||||
|
||||
if (!subscription) return null;
|
||||
|
||||
const frequency = tierConfig[subscription.tier].features.darkWebScanFrequency;
|
||||
|
||||
return {
|
||||
subscriptionId,
|
||||
tier: subscription.tier,
|
||||
frequency,
|
||||
cron: CRON_EXPRESSIONS[frequency],
|
||||
nextRun: frequency === 'realtime' ? 'event-driven' : CRON_EXPRESSIONS[frequency],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export const schedulerService = new SchedulerService();
|
||||
97
services/darkwatch/src/watchlist.service.ts
Normal file
97
services/darkwatch/src/watchlist.service.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import { prisma, WatchlistType } from '@shieldsai/shared-db';
|
||||
import { createHash } from 'crypto';
|
||||
|
||||
export function normalizeValue(type: WatchlistType, value: string): string {
|
||||
const trimmed = value.trim().toLowerCase();
|
||||
switch (type) {
|
||||
case WatchlistType.email:
|
||||
return trimmed.replace(/\s+/g, '');
|
||||
case WatchlistType.phoneNumber:
|
||||
return trimmed.replace(/[\s\-\(\)]/g, '');
|
||||
case WatchlistType.ssn:
|
||||
return trimmed.replace(/-/g, '');
|
||||
case WatchlistType.address:
|
||||
return trimmed;
|
||||
case WatchlistType.domain:
|
||||
return trimmed.replace(/^https?:\/\//, '').replace(/\/.*$/, '');
|
||||
default:
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
|
||||
export function hashValue(value: string): string {
|
||||
return createHash('sha256').update(value).digest('hex');
|
||||
}
|
||||
|
||||
export class WatchlistService {
|
||||
async addItem(
|
||||
subscriptionId: string,
|
||||
type: WatchlistType,
|
||||
value: string,
|
||||
maxItems: number
|
||||
) {
|
||||
const normalized = normalizeValue(type, value);
|
||||
const itemHash = hashValue(normalized);
|
||||
|
||||
const currentCount = await prisma.watchlistItem.count({
|
||||
where: { subscriptionId, isActive: true },
|
||||
});
|
||||
|
||||
if (currentCount >= maxItems) {
|
||||
throw new Error(
|
||||
`Watchlist limit reached (${maxItems} items). Upgrade your plan to add more.`
|
||||
);
|
||||
}
|
||||
|
||||
const existing = await prisma.watchlistItem.findFirst({
|
||||
where: { subscriptionId, type, hash: itemHash },
|
||||
});
|
||||
|
||||
if (existing) {
|
||||
if (!existing.isActive) {
|
||||
return prisma.watchlistItem.update({
|
||||
where: { id: existing.id },
|
||||
data: { isActive: true },
|
||||
});
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
|
||||
return prisma.watchlistItem.create({
|
||||
data: {
|
||||
subscriptionId,
|
||||
type,
|
||||
value: normalized,
|
||||
hash: itemHash,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async getItems(subscriptionId: string) {
|
||||
return prisma.watchlistItem.findMany({
|
||||
where: { subscriptionId, isActive: true },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
}
|
||||
|
||||
async removeItem(id: string, subscriptionId: string) {
|
||||
return prisma.watchlistItem.update({
|
||||
where: { id },
|
||||
data: { isActive: false },
|
||||
});
|
||||
}
|
||||
|
||||
async getActiveItemsForScan(subscriptionId: string) {
|
||||
return prisma.watchlistItem.findMany({
|
||||
where: { subscriptionId, isActive: true },
|
||||
});
|
||||
}
|
||||
|
||||
async getItemCount(subscriptionId: string) {
|
||||
return prisma.watchlistItem.count({
|
||||
where: { subscriptionId, isActive: true },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export const watchlistService = new WatchlistService();
|
||||
226
services/darkwatch/src/webhook.service.ts
Normal file
226
services/darkwatch/src/webhook.service.ts
Normal file
@@ -0,0 +1,226 @@
|
||||
import { prisma, ExposureSource, ExposureSeverity, WatchlistType, AlertType, AlertSeverity } from '@shieldsai/shared-db';
|
||||
import { createHash } from 'crypto';
|
||||
import { mixpanelService, EventType } from '@shieldsai/shared-analytics';
|
||||
|
||||
function hashIdentifier(identifier: string): string {
|
||||
return createHash('sha256').update(identifier.toLowerCase().trim()).digest('hex');
|
||||
}
|
||||
|
||||
function determineSeverity(
|
||||
source: ExposureSource,
|
||||
dataType: WatchlistType
|
||||
): ExposureSeverity {
|
||||
const criticalSources = [ExposureSource.darkWebForum, ExposureSource.honeypot];
|
||||
const warningSources = [ExposureSource.hibp, ExposureSource.shodan];
|
||||
const criticalTypes = [WatchlistType.ssn];
|
||||
|
||||
if (criticalTypes.includes(dataType)) return ExposureSeverity.critical;
|
||||
if (criticalSources.includes(source)) return ExposureSeverity.critical;
|
||||
if (warningSources.includes(source)) return ExposureSeverity.warning;
|
||||
return ExposureSeverity.info;
|
||||
}
|
||||
|
||||
export interface WebhookPayload {
|
||||
source: string;
|
||||
identifier: string;
|
||||
identifierType: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
timestamp?: string;
|
||||
}
|
||||
|
||||
export class WebhookService {
|
||||
async processExternalWebhook(payload: WebhookPayload): Promise<{
|
||||
exposuresCreated: number;
|
||||
alertsCreated: number;
|
||||
}> {
|
||||
const source = this.mapSource(payload.source);
|
||||
const dataType = this.mapDataType(payload.identifierType);
|
||||
const identifier = payload.identifier.toLowerCase().trim();
|
||||
const identifierHash = hashIdentifier(identifier);
|
||||
const severity = determineSeverity(source, dataType);
|
||||
|
||||
const matchingItems = await prisma.watchlistItem.findMany({
|
||||
where: {
|
||||
isActive: true,
|
||||
OR: [
|
||||
{ hash: identifierHash, type: dataType },
|
||||
{ value: identifier, type: dataType },
|
||||
],
|
||||
},
|
||||
include: {
|
||||
subscription: {
|
||||
select: {
|
||||
id: true,
|
||||
tier: true,
|
||||
userId: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
let exposuresCreated = 0;
|
||||
let alertsCreated = 0;
|
||||
|
||||
for (const item of matchingItems) {
|
||||
const existing = await prisma.exposure.findFirst({
|
||||
where: {
|
||||
subscriptionId: item.subscriptionId,
|
||||
source,
|
||||
identifierHash,
|
||||
},
|
||||
});
|
||||
|
||||
if (existing) {
|
||||
await prisma.exposure.update({
|
||||
where: { id: existing.id },
|
||||
data: { detectedAt: new Date() },
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const exposure = await prisma.exposure.create({
|
||||
data: {
|
||||
subscriptionId: item.subscriptionId,
|
||||
watchlistItemId: item.id,
|
||||
source,
|
||||
dataType,
|
||||
identifier,
|
||||
identifierHash,
|
||||
severity,
|
||||
isFirstTime: true,
|
||||
metadata: payload.metadata || {},
|
||||
detectedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
exposuresCreated++;
|
||||
|
||||
const alertChannels = this.getAlertChannelsForTier(item.subscription.tier);
|
||||
|
||||
await prisma.alert.create({
|
||||
data: {
|
||||
subscriptionId: item.subscriptionId,
|
||||
userId: item.subscription.userId,
|
||||
exposureId: exposure.id,
|
||||
type: AlertType.exposure_detected,
|
||||
title: `New Exposure Detected: ${this.getSourceLabel(source)}`,
|
||||
message: this.buildAlertMessage(identifier, source, severity),
|
||||
severity: this.mapAlertSeverity(severity),
|
||||
channel: alertChannels,
|
||||
},
|
||||
});
|
||||
|
||||
alertsCreated++;
|
||||
|
||||
await mixpanelService.track(EventType.EXPOSURE_DETECTED, {
|
||||
userId: item.subscription.userId,
|
||||
exposureType: dataType,
|
||||
severity,
|
||||
source,
|
||||
subscriptionTier: item.subscription.tier,
|
||||
});
|
||||
}
|
||||
|
||||
return { exposuresCreated, alertsCreated };
|
||||
}
|
||||
|
||||
async verifyWebhookSignature(
|
||||
body: string,
|
||||
signature: string,
|
||||
timestamp: string
|
||||
): Promise<boolean> {
|
||||
const webhookSecret = process.env.DARKWATCH_WEBHOOK_SECRET;
|
||||
if (!webhookSecret) {
|
||||
console.warn('[WebhookService] DARKWATCH_WEBHOOK_SECRET not set — signature verification skipped');
|
||||
return false;
|
||||
}
|
||||
|
||||
const expected = createHash('sha256')
|
||||
.update(`${timestamp}:${body}`)
|
||||
.digest('hex');
|
||||
|
||||
return expected === signature;
|
||||
}
|
||||
|
||||
private mapSource(source: string): ExposureSource {
|
||||
const sourceMap: Record<string, ExposureSource> = {
|
||||
hibp: ExposureSource.hibp,
|
||||
'haveibeenpwned': ExposureSource.hibp,
|
||||
securitytrails: ExposureSource.securityTrails,
|
||||
censys: ExposureSource.censys,
|
||||
'darkweb-forum': ExposureSource.darkWebForum,
|
||||
'darkweb': ExposureSource.darkWebForum,
|
||||
shodan: ExposureSource.shodan,
|
||||
honeypot: ExposureSource.honeypot,
|
||||
};
|
||||
|
||||
const normalized = source.toLowerCase().replace(/\s+/g, '');
|
||||
const mapped = sourceMap[normalized];
|
||||
if (!mapped) {
|
||||
console.warn(`[WebhookService] Unknown source "${source}", falling back to darkWebForum`);
|
||||
}
|
||||
return mapped || ExposureSource.darkWebForum;
|
||||
}
|
||||
|
||||
private mapDataType(type: string): WatchlistType {
|
||||
const typeMap: Record<string, WatchlistType> = {
|
||||
email: WatchlistType.email,
|
||||
phone: WatchlistType.phoneNumber,
|
||||
phonenumber: WatchlistType.phoneNumber,
|
||||
ssn: WatchlistType.ssn,
|
||||
address: WatchlistType.address,
|
||||
domain: WatchlistType.domain,
|
||||
};
|
||||
|
||||
const normalized = type.toLowerCase().trim();
|
||||
return typeMap[normalized] || WatchlistType.email;
|
||||
}
|
||||
|
||||
private getAlertChannelsForTier(tier: string): string[] {
|
||||
const channelMap: Record<string, string[]> = {
|
||||
basic: ['email'],
|
||||
plus: ['email', 'push'],
|
||||
premium: ['email', 'push', 'sms'],
|
||||
};
|
||||
return channelMap[tier] || ['email'];
|
||||
}
|
||||
|
||||
private mapAlertSeverity(severity: ExposureSeverity): AlertSeverity {
|
||||
return severity as AlertSeverity;
|
||||
}
|
||||
|
||||
private getSourceLabel(source: ExposureSource): string {
|
||||
const labels: Record<ExposureSource, string> = {
|
||||
[ExposureSource.hibp]: 'Have I Been Pwned',
|
||||
[ExposureSource.securityTrails]: 'SecurityTrails',
|
||||
[ExposureSource.censys]: 'Censys',
|
||||
[ExposureSource.darkWebForum]: 'Dark Web Forum',
|
||||
[ExposureSource.shodan]: 'Shodan',
|
||||
[ExposureSource.honeypot]: 'Honeypot',
|
||||
};
|
||||
return labels[source] || source;
|
||||
}
|
||||
|
||||
private buildAlertMessage(
|
||||
identifier: string,
|
||||
source: ExposureSource,
|
||||
severity: ExposureSeverity
|
||||
): string {
|
||||
const masked = this.maskIdentifier(identifier);
|
||||
return `${severity.toUpperCase()}: "${masked}" found in ${this.getSourceLabel(source)}.`;
|
||||
}
|
||||
|
||||
private maskIdentifier(identifier: string): string {
|
||||
if (identifier.includes('@')) {
|
||||
const [user, domain] = identifier.split('@');
|
||||
const maskedUser = user.slice(0, 2) + '***' + user.slice(-1);
|
||||
return `${maskedUser}@${domain}`;
|
||||
}
|
||||
if (identifier.length > 8) {
|
||||
return identifier.slice(0, 3) + '***' + identifier.slice(-2);
|
||||
}
|
||||
return identifier;
|
||||
}
|
||||
}
|
||||
|
||||
export const webhookService = new WebhookService();
|
||||
227
services/spamshield/src/feature-flags.ts
Normal file
227
services/spamshield/src/feature-flags.ts
Normal file
@@ -0,0 +1,227 @@
|
||||
/**
|
||||
* Feature Flag Management System
|
||||
* Centralized feature flag handling with type safety and runtime updates
|
||||
*/
|
||||
|
||||
import type { z } from 'zod';
|
||||
|
||||
/**
|
||||
* Type for feature flag values
|
||||
*/
|
||||
export type FeatureFlagValue = boolean | string | number;
|
||||
|
||||
/**
|
||||
* Interface for a feature flag definition
|
||||
*/
|
||||
export interface FeatureFlag<T = FeatureFlagValue> {
|
||||
key: string;
|
||||
defaultValue: T;
|
||||
description?: string;
|
||||
allowedValues?: T[]; // For enum-like flags
|
||||
category?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Feature flag registry - stores all defined flags
|
||||
*/
|
||||
export interface FeatureFlagRegistry {
|
||||
[key: string]: FeatureFlag;
|
||||
}
|
||||
|
||||
/**
|
||||
* Feature flag resolver - handles flag resolution logic
|
||||
*/
|
||||
export class FeatureFlagResolver {
|
||||
private flags: FeatureFlagRegistry;
|
||||
private resolvedCache: Map<string, FeatureFlagValue> = new Map();
|
||||
|
||||
constructor(flags: FeatureFlagRegistry) {
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a feature flag value
|
||||
* Priority: Environment > Cache > Default
|
||||
*/
|
||||
resolve<T>(key: string, defaultValue: T): T {
|
||||
// Check cache first
|
||||
if (this.resolvedCache.has(key)) {
|
||||
return this.resolvedCache.get(key)! as T;
|
||||
}
|
||||
|
||||
// Check environment variable (allows runtime updates)
|
||||
const envValue = process.env[`FLAG_${key.toUpperCase()}`];
|
||||
if (envValue !== undefined) {
|
||||
// Try to parse as JSON first, then as boolean, then as string
|
||||
let parsed: FeatureFlagValue;
|
||||
try {
|
||||
parsed = JSON.parse(envValue);
|
||||
} catch {
|
||||
parsed = envValue.toLowerCase() === 'true' ? true :
|
||||
envValue.toLowerCase() === 'false' ? false :
|
||||
envValue;
|
||||
}
|
||||
|
||||
// Validate against allowed values if defined
|
||||
const flag = this.flags[key];
|
||||
if (flag && flag.allowedValues && !flag.allowedValues.includes(parsed)) {
|
||||
console.warn(`Invalid value for flag ${key}: ${parsed}. Using default.`);
|
||||
parsed = defaultValue as FeatureFlagValue;
|
||||
}
|
||||
|
||||
this.resolvedCache.set(key, parsed);
|
||||
return parsed as T;
|
||||
}
|
||||
|
||||
// Use cached value if available
|
||||
if (this.resolvedCache.has(key)) {
|
||||
return this.resolvedCache.get(key)! as T;
|
||||
}
|
||||
|
||||
// Return default
|
||||
this.resolvedCache.set(key, defaultValue as FeatureFlagValue);
|
||||
return defaultValue as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a flag is enabled (boolean check)
|
||||
*/
|
||||
isEnabled<T>(key: string, defaultValue: T): T {
|
||||
return this.resolve(key, defaultValue) as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get flag definition
|
||||
*/
|
||||
getDefinition(key: string): FeatureFlag | undefined {
|
||||
return this.flags[key];
|
||||
}
|
||||
|
||||
/**
|
||||
* List all registered flags
|
||||
*/
|
||||
getAllFlags(): FeatureFlagRegistry {
|
||||
return { ...this.flags };
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the resolution cache (useful for testing)
|
||||
*/
|
||||
clearCache(): void {
|
||||
this.resolvedCache.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Feature flag configuration with pre-defined flags
|
||||
*/
|
||||
export const featureFlags: FeatureFlagRegistry = {
|
||||
// SpamShield Feature Flags
|
||||
'spamshield.enable.number.reputation': {
|
||||
key: 'spamshield_enable_number_reputation',
|
||||
defaultValue: true,
|
||||
description: 'Enable number reputation checking (Hiya API integration)',
|
||||
category: 'spamshield',
|
||||
},
|
||||
'spamshield.enable.content.classification': {
|
||||
key: 'spamshield_enable_content_classification',
|
||||
defaultValue: true,
|
||||
description: 'Enable SMS content classification (BERT model)',
|
||||
category: 'spamshield',
|
||||
},
|
||||
'spamshield.enable.behavioral.analysis': {
|
||||
key: 'spamshield_enable_behavioral_analysis',
|
||||
defaultValue: true,
|
||||
description: 'Enable call behavioral analysis',
|
||||
category: 'spamshield',
|
||||
},
|
||||
'spamshield.enable.community.intelligence': {
|
||||
key: 'spamshield_enable_community_intelligence',
|
||||
defaultValue: true,
|
||||
description: 'Enable community intelligence sharing',
|
||||
category: 'spamshield',
|
||||
},
|
||||
'spamshield.enable.real.time.blocking': {
|
||||
key: 'spamshield_enable_real_time_blocking',
|
||||
defaultValue: true,
|
||||
description: 'Enable real-time spam blocking',
|
||||
category: 'spamshield',
|
||||
},
|
||||
'spamshield.enable.multiple.sources': {
|
||||
key: 'spamshield_enable_multiple_sources',
|
||||
defaultValue: false,
|
||||
description: 'Enable multiple reputation source aggregation (Truecaller, etc.)',
|
||||
category: 'spamshield',
|
||||
},
|
||||
'spamshield.enable.ml.classifier': {
|
||||
key: 'spamshield_enable_ml_classifier',
|
||||
defaultValue: false,
|
||||
description: 'Enable ML-based spam classification',
|
||||
category: 'spamshield',
|
||||
},
|
||||
|
||||
// VoicePrint Feature Flags
|
||||
'voiceprint.enable.ml.service': {
|
||||
key: 'voiceprint_enable_ml_service',
|
||||
defaultValue: false,
|
||||
description: 'Enable ML service integration for voice analysis',
|
||||
category: 'voiceprint',
|
||||
},
|
||||
'voiceprint.enable.faiss.index': {
|
||||
key: 'voiceprint_enable_faiss_index',
|
||||
defaultValue: true,
|
||||
description: 'Enable FAISS index for voice matching',
|
||||
category: 'voiceprint',
|
||||
},
|
||||
'voiceprint.enable.batch.analysis': {
|
||||
key: 'voiceprint_enable_batch_analysis',
|
||||
defaultValue: true,
|
||||
description: 'Enable batch voice analysis',
|
||||
category: 'voiceprint',
|
||||
},
|
||||
'voiceprint.enable.realtime.analysis': {
|
||||
key: 'voiceprint_enable_realtime_analysis',
|
||||
defaultValue: false,
|
||||
description: 'Enable real-time voice analysis',
|
||||
category: 'voiceprint',
|
||||
},
|
||||
'voiceprint.enable.mock.model': {
|
||||
key: 'voiceprint_enable_mock_model',
|
||||
defaultValue: true,
|
||||
description: 'Enable mock model for development',
|
||||
category: 'voiceprint',
|
||||
},
|
||||
|
||||
// General Platform Flags
|
||||
'platform.enable.audit.logs': {
|
||||
key: 'platform_enable_audit_logs',
|
||||
defaultValue: true,
|
||||
description: 'Enable comprehensive audit logging',
|
||||
category: 'platform',
|
||||
},
|
||||
'platform.enable.kpi.tracking': {
|
||||
key: 'platform_enable_kpi_tracking',
|
||||
defaultValue: true,
|
||||
description: 'Enable KPI snapshot tracking',
|
||||
category: 'platform',
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Create a resolver instance with the default flags
|
||||
*/
|
||||
export const featureFlagResolver = new FeatureFlagResolver(featureFlags);
|
||||
|
||||
/**
|
||||
* Convenience function for quick flag checks
|
||||
*/
|
||||
export function isFeatureEnabled<T>(key: string, defaultValue: T): T {
|
||||
return featureFlagResolver.isEnabled(key, defaultValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a flag is enabled with type safety
|
||||
*/
|
||||
export function checkFlag<T>(key: string, defaultValue: T): T {
|
||||
return featureFlagResolver.resolve(key, defaultValue);
|
||||
}
|
||||
118
services/spamshield/src/spamshield.audit-logger.ts
Normal file
118
services/spamshield/src/spamshield.audit-logger.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
import { createHash } from 'crypto';
|
||||
|
||||
export type AuditClassificationType = 'sms' | 'call';
|
||||
|
||||
export interface AuditClassificationEntry {
|
||||
id: string;
|
||||
timestamp: string;
|
||||
type: AuditClassificationType;
|
||||
phoneNumberHash: string;
|
||||
decision: 'spam' | 'ham' | 'block' | 'flag' | 'allow';
|
||||
confidence: number;
|
||||
reasons: string[];
|
||||
featureFlags: Record<string, boolean>;
|
||||
metadata?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
const MAX_AUDIT_LOG_SIZE = 10_000;
|
||||
|
||||
class AuditLogger {
|
||||
private entries: AuditClassificationEntry[] = [];
|
||||
|
||||
logClassification(entry: Omit<AuditClassificationEntry, 'id' | 'timestamp'>): AuditClassificationEntry {
|
||||
const record: AuditClassificationEntry = {
|
||||
id: `audit-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
||||
timestamp: new Date().toISOString(),
|
||||
...entry,
|
||||
};
|
||||
|
||||
this.entries.push(record);
|
||||
|
||||
if (this.entries.length > MAX_AUDIT_LOG_SIZE) {
|
||||
this.entries.shift();
|
||||
}
|
||||
|
||||
console.log(
|
||||
`[SpamShield:Audit] type=${record.type} decision=${record.decision} ` +
|
||||
`confidence=${record.confidence.toFixed(3)} reasons=${record.reasons.join(',') || 'none'} ` +
|
||||
`phoneHash=${record.phoneNumberHash}`
|
||||
);
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
getEntries(
|
||||
filters?: {
|
||||
type?: AuditClassificationType;
|
||||
decision?: string;
|
||||
startDate?: Date;
|
||||
endDate?: Date;
|
||||
limit?: number;
|
||||
}
|
||||
): AuditClassificationEntry[] {
|
||||
let results = this.entries;
|
||||
|
||||
if (filters?.type) {
|
||||
results = results.filter(e => e.type === filters.type);
|
||||
}
|
||||
|
||||
if (filters?.decision) {
|
||||
results = results.filter(e => e.decision === filters.decision);
|
||||
}
|
||||
|
||||
if (filters?.startDate) {
|
||||
results = results.filter(e => new Date(e.timestamp) >= filters.startDate!);
|
||||
}
|
||||
|
||||
if (filters?.endDate) {
|
||||
results = results.filter(e => new Date(e.timestamp) <= filters.endDate!);
|
||||
}
|
||||
|
||||
if (filters?.limit) {
|
||||
results = results.slice(-filters.limit);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
getSummary(): {
|
||||
totalEntries: number;
|
||||
spamCount: number;
|
||||
hamCount: number;
|
||||
blockCount: number;
|
||||
flagCount: number;
|
||||
allowCount: number;
|
||||
avgConfidence: number;
|
||||
} {
|
||||
const spamCount = this.entries.filter(e => e.decision === 'spam' || e.decision === 'block').length;
|
||||
const hamCount = this.entries.filter(e => e.decision === 'ham' || e.decision === 'allow').length;
|
||||
const blockCount = this.entries.filter(e => e.decision === 'block').length;
|
||||
const flagCount = this.entries.filter(e => e.decision === 'flag').length;
|
||||
const allowCount = this.entries.filter(e => e.decision === 'allow').length;
|
||||
const avgConfidence =
|
||||
this.entries.length > 0
|
||||
? this.entries.reduce((s, e) => s + e.confidence, 0) / this.entries.length
|
||||
: 0;
|
||||
|
||||
return {
|
||||
totalEntries: this.entries.length,
|
||||
spamCount,
|
||||
hamCount,
|
||||
blockCount,
|
||||
flagCount,
|
||||
allowCount,
|
||||
avgConfidence: Math.round(avgConfidence * 1000) / 1000,
|
||||
};
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.entries = [];
|
||||
}
|
||||
}
|
||||
|
||||
export const spamAuditLogger = new AuditLogger();
|
||||
|
||||
export function hashPhoneNumber(phoneNumber: string): string {
|
||||
const hash = createHash('sha256').update(phoneNumber.trim()).digest('hex');
|
||||
return `sha256_${hash}`;
|
||||
}
|
||||
118
services/spamshield/src/spamshield.error-handler.ts
Normal file
118
services/spamshield/src/spamshield.error-handler.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
import { FastifyReply } from 'fastify';
|
||||
import { SpamErrorCode, HttpStatus, SpamErrorResponse } from './spamshield.config';
|
||||
|
||||
export { SpamErrorCode, HttpStatus };
|
||||
export type { SpamErrorResponse };
|
||||
|
||||
/**
|
||||
* Standardized error response builder for SpamShield API
|
||||
*/
|
||||
export class ErrorHandler {
|
||||
/**
|
||||
* Create a standard error response
|
||||
*/
|
||||
static create(
|
||||
code: SpamErrorCode,
|
||||
message: string,
|
||||
options?: {
|
||||
field?: string;
|
||||
requestId?: string;
|
||||
additionalData?: Record<string, unknown>;
|
||||
}
|
||||
): SpamErrorResponse {
|
||||
return {
|
||||
error: {
|
||||
code,
|
||||
message,
|
||||
...(options?.field && { field: options.field }),
|
||||
timestamp: new Date().toISOString(),
|
||||
...(options?.requestId && { requestId: options.requestId }),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a standard error response with appropriate HTTP status code
|
||||
*/
|
||||
static send(
|
||||
reply: FastifyReply,
|
||||
code: SpamErrorCode,
|
||||
message: string,
|
||||
options?: {
|
||||
field?: string;
|
||||
status?: number;
|
||||
requestId?: string;
|
||||
}
|
||||
): void {
|
||||
const status = options?.status ?? this.getStatusForCode(code);
|
||||
const errorResponse = this.create(code, message, {
|
||||
field: options?.field,
|
||||
requestId: options?.requestId,
|
||||
});
|
||||
reply.code(status).send(errorResponse);
|
||||
}
|
||||
|
||||
/**
|
||||
* Map error codes to HTTP status codes
|
||||
*/
|
||||
private static getStatusForCode(code: SpamErrorCode): number {
|
||||
const statusMap: Record<SpamErrorCode, number> = {
|
||||
// Client errors
|
||||
[SpamErrorCode.INVALID_REQUEST]: HttpStatus.BAD_REQUEST,
|
||||
[SpamErrorCode.MISSING_REQUIRED_FIELD]: HttpStatus.BAD_REQUEST,
|
||||
[SpamErrorCode.UNAUTHORIZED]: HttpStatus.UNAUTHORIZED,
|
||||
[SpamErrorCode.NOT_FOUND]: HttpStatus.NOT_FOUND,
|
||||
[SpamErrorCode.VALIDATION_ERROR]: HttpStatus.BAD_REQUEST,
|
||||
|
||||
// Server errors
|
||||
[SpamErrorCode.CLASSIFICATION_FAILED]: HttpStatus.UNPROCESSABLE_ENTITY,
|
||||
[SpamErrorCode.REPUTATION_CHECK_FAILED]: HttpStatus.UNPROCESSABLE_ENTITY,
|
||||
[SpamErrorCode.ANALYSIS_FAILED]: HttpStatus.UNPROCESSABLE_ENTITY,
|
||||
[SpamErrorCode.FEEDBACK_RECORD_FAILED]: HttpStatus.UNPROCESSABLE_ENTITY,
|
||||
[SpamErrorCode.DATABASE_ERROR]: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
[SpamErrorCode.RATE_LIMIT_EXCEEDED]: HttpStatus.TOO_MANY_REQUESTS,
|
||||
[SpamErrorCode.SERVICE_UNAVAILABLE]: HttpStatus.SERVICE_UNAVAILABLE,
|
||||
};
|
||||
return statusMap[code] ?? HttpStatus.INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate required string field
|
||||
*/
|
||||
static validateRequiredField(
|
||||
value: unknown,
|
||||
fieldName: string
|
||||
): { isValid: boolean; error?: { code: SpamErrorCode; message: string; field: string } } {
|
||||
if (!value || typeof value !== 'string' || value.trim() === '') {
|
||||
return {
|
||||
isValid: false,
|
||||
error: {
|
||||
code: SpamErrorCode.MISSING_REQUIRED_FIELD,
|
||||
message: `${fieldName} is required`,
|
||||
field: fieldName,
|
||||
},
|
||||
};
|
||||
}
|
||||
return { isValid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate boolean field
|
||||
*/
|
||||
static validateBooleanField(
|
||||
value: unknown,
|
||||
fieldName: string
|
||||
): { isValid: boolean; error?: { code: SpamErrorCode; message: string; field: string } } {
|
||||
if (value === undefined || value === null || typeof value !== 'boolean') {
|
||||
return {
|
||||
isValid: false,
|
||||
error: {
|
||||
code: SpamErrorCode.VALIDATION_ERROR,
|
||||
message: `${fieldName} must be a boolean`,
|
||||
field: fieldName,
|
||||
},
|
||||
};
|
||||
}
|
||||
return { isValid: true };
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,30 @@
|
||||
export * from "./preprocessor/AudioPreprocessor";
|
||||
export * from "./enrollment/VoiceEnrollmentService";
|
||||
export * from "./analysis/AnalysisService";
|
||||
export * from "./analysis/BatchAnalysisService";
|
||||
export * from "./embedding/EmbeddingService";
|
||||
export * from "./indexer/FAISSIndex";
|
||||
// Config
|
||||
export {
|
||||
voicePrintEnv,
|
||||
VoicePrintSource,
|
||||
AnalysisJobStatus,
|
||||
DetectionType,
|
||||
ConfidenceLevel,
|
||||
audioPreprocessingConfig,
|
||||
voicePrintFeatureFlags,
|
||||
voicePrintRateLimits,
|
||||
checkFlag,
|
||||
isFeatureEnabled,
|
||||
} from './voiceprint.config';
|
||||
|
||||
|
||||
|
||||
// Services
|
||||
export {
|
||||
AudioPreprocessor,
|
||||
VoiceEnrollmentService,
|
||||
AnalysisService,
|
||||
BatchAnalysisService,
|
||||
EmbeddingService,
|
||||
FAISSIndex,
|
||||
audioPreprocessor,
|
||||
voiceEnrollmentService,
|
||||
analysisService,
|
||||
batchAnalysisService,
|
||||
embeddingService,
|
||||
} from './voiceprint.service';
|
||||
|
||||
102
services/voiceprint/src/voiceprint.config.ts
Normal file
102
services/voiceprint/src/voiceprint.config.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
import { z } from 'zod';
|
||||
import { checkFlag } from './voiceprint.feature-flags';
|
||||
|
||||
// Environment variables for VoicePrint
|
||||
const envSchema = z.object({
|
||||
ECAPA_TDNN_MODEL_PATH: z.string().default('./models/ecapa-tdnn'),
|
||||
ML_SERVICE_URL: z.string().default('http://localhost:8001'),
|
||||
FAISS_INDEX_PATH: z.string().default('./data/voiceprint_faiss.index'),
|
||||
AUDIO_STORAGE_BUCKET: z.string().default('voiceprint-audio'),
|
||||
AUDIO_STORAGE_ENDPOINT: z.string().default('http://localhost:9000'),
|
||||
SYNTHETIC_THRESHOLD: z.string().transform(Number).default(0.75),
|
||||
ENROLLMENT_MIN_DURATION_SEC: z.string().transform(Number).default(3),
|
||||
ENROLLMENT_MAX_DURATION_SEC: z.string().transform(Number).default(60),
|
||||
EMBEDDING_DIMENSIONS: z.string().transform(Number).default(192),
|
||||
BATCH_MAX_FILES: z.string().transform(Number).default(20),
|
||||
ANALYSIS_TIMEOUT_MS: z.string().transform(Number).default(30000),
|
||||
});
|
||||
|
||||
export const voicePrintEnv = envSchema.parse({
|
||||
ECAPA_TDNN_MODEL_PATH: process.env.ECAPA_TDNN_MODEL_PATH,
|
||||
ML_SERVICE_URL: process.env.ML_SERVICE_URL,
|
||||
FAISS_INDEX_PATH: process.env.FAISS_INDEX_PATH,
|
||||
AUDIO_STORAGE_BUCKET: process.env.AUDIO_STORAGE_BUCKET,
|
||||
AUDIO_STORAGE_ENDPOINT: process.env.AUDIO_STORAGE_ENDPOINT,
|
||||
SYNTHETIC_THRESHOLD: process.env.SYNTHETIC_THRESHOLD,
|
||||
ENROLLMENT_MIN_DURATION_SEC: process.env.ENROLLMENT_MIN_DURATION_SEC,
|
||||
ENROLLMENT_MAX_DURATION_SEC: process.env.ENROLLMENT_MAX_DURATION_SEC,
|
||||
EMBEDDING_DIMENSIONS: process.env.EMBEDDING_DIMENSIONS,
|
||||
BATCH_MAX_FILES: process.env.BATCH_MAX_FILES,
|
||||
ANALYSIS_TIMEOUT_MS: process.env.ANALYSIS_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
// Audio source types
|
||||
export enum VoicePrintSource {
|
||||
UPLOAD = 'upload',
|
||||
S3 = 's3',
|
||||
URL = 'url',
|
||||
REALTIME = 'realtime',
|
||||
}
|
||||
|
||||
// Analysis job status
|
||||
export enum AnalysisJobStatus {
|
||||
PENDING = 'pending',
|
||||
PROCESSING = 'processing',
|
||||
COMPLETED = 'completed',
|
||||
FAILED = 'failed',
|
||||
CANCELLED = 'cancelled',
|
||||
}
|
||||
|
||||
// Detection result types
|
||||
export enum DetectionType {
|
||||
SYNTHETIC_VOICE = 'synthetic_voice',
|
||||
VOICE_CLONE = 'voice_clone',
|
||||
DEEPFAKE = 'deepfake',
|
||||
NATURAL = 'natural',
|
||||
}
|
||||
|
||||
// Confidence levels
|
||||
export enum ConfidenceLevel {
|
||||
LOW = 'low',
|
||||
MEDIUM = 'medium',
|
||||
HIGH = 'high',
|
||||
VERY_HIGH = 'very_high',
|
||||
}
|
||||
|
||||
// Audio preprocessing configuration
|
||||
export const audioPreprocessingConfig = {
|
||||
sampleRate: 16000,
|
||||
channels: 1,
|
||||
bitDepth: 16,
|
||||
vadThreshold: 0.5,
|
||||
noiseReduction: true,
|
||||
maxSilenceDurationMs: 500,
|
||||
};
|
||||
|
||||
// Feature flags - use centralized system
|
||||
export const voicePrintFeatureFlags = {
|
||||
enableMLService: checkFlag('voiceprint.enable.ml.service', false),
|
||||
enableFAISSIndex: checkFlag('voiceprint.enable.faiss.index', true),
|
||||
enableBatchAnalysis: checkFlag('voiceprint.enable.batch.analysis', true),
|
||||
enableRealtimeAnalysis: checkFlag('voiceprint.enable.realtime.analysis', false),
|
||||
enableMockModel: checkFlag('voiceprint.enable.mock.model', true),
|
||||
};
|
||||
|
||||
// Rate limits for voice analysis
|
||||
export const voicePrintRateLimits = {
|
||||
basic: {
|
||||
analysesPerMinute: 5,
|
||||
enrollmentsPerDay: 10,
|
||||
maxAudioFileSizeMB: 50,
|
||||
},
|
||||
plus: {
|
||||
analysesPerMinute: 30,
|
||||
enrollmentsPerDay: 50,
|
||||
maxAudioFileSizeMB: 200,
|
||||
},
|
||||
premium: {
|
||||
analysesPerMinute: 100,
|
||||
enrollmentsPerDay: 500,
|
||||
maxAudioFileSizeMB: 500,
|
||||
},
|
||||
};
|
||||
7
services/voiceprint/src/voiceprint.feature-flags.ts
Normal file
7
services/voiceprint/src/voiceprint.feature-flags.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
/**
|
||||
* VoicePrint Feature Flags
|
||||
* Re-exports the checkFlag function from the centralized feature flag system
|
||||
*/
|
||||
|
||||
// Re-export the checkFlag function from the spamshield feature flags module
|
||||
export { checkFlag } from '../spamshield/feature-flags';
|
||||
594
services/voiceprint/src/voiceprint.service.ts
Normal file
594
services/voiceprint/src/voiceprint.service.ts
Normal file
@@ -0,0 +1,594 @@
|
||||
import { prisma, VoiceEnrollment, VoiceAnalysis } from '@shieldsai/shared-db';
|
||||
import {
|
||||
voicePrintEnv,
|
||||
AnalysisJobStatus,
|
||||
DetectionType,
|
||||
ConfidenceLevel,
|
||||
audioPreprocessingConfig,
|
||||
voicePrintFeatureFlags,
|
||||
} from './voiceprint.config';
|
||||
import { checkFlag } from './voiceprint.feature-flags';
|
||||
|
||||
// Audio preprocessing service
|
||||
export class AudioPreprocessor {
|
||||
/**
|
||||
* Normalize audio to 16kHz mono with VAD and noise reduction.
|
||||
* Returns preprocessing metadata and the processed audio buffer.
|
||||
*/
|
||||
async preprocess(
|
||||
audioBuffer: Buffer,
|
||||
options?: {
|
||||
sourceSampleRate?: number;
|
||||
channels?: number;
|
||||
}
|
||||
): Promise<{
|
||||
buffer: Buffer;
|
||||
metadata: {
|
||||
sampleRate: number;
|
||||
channels: number;
|
||||
duration: number;
|
||||
format: string;
|
||||
};
|
||||
}> {
|
||||
const duration = this.estimateDuration(audioBuffer, options?.sourceSampleRate ?? 44100);
|
||||
|
||||
if (duration < voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC) {
|
||||
throw new Error(
|
||||
`Audio too short: ${duration.toFixed(1)}s < ${voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC}s minimum`
|
||||
);
|
||||
}
|
||||
|
||||
if (duration > voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC) {
|
||||
throw new Error(
|
||||
`Audio too long: ${duration.toFixed(1)}s > ${voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC}s maximum`
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: Integrate with Python librosa/torchaudio for actual preprocessing
|
||||
// For MVP, return original buffer with target metadata
|
||||
return {
|
||||
buffer: audioBuffer,
|
||||
metadata: {
|
||||
sampleRate: audioPreprocessingConfig.sampleRate,
|
||||
channels: audioPreprocessingConfig.channels,
|
||||
duration,
|
||||
format: 'wav',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply Voice Activity Detection to remove silence segments.
|
||||
*/
|
||||
async applyVAD(buffer: Buffer): Promise<Buffer> {
|
||||
// TODO: Integrate with Python webrtcvad or silero-vad
|
||||
// For MVP, return original buffer
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate audio duration from buffer size and sample rate.
|
||||
*/
|
||||
private estimateDuration(
|
||||
buffer: Buffer,
|
||||
sampleRate: number
|
||||
): number {
|
||||
const bytesPerSample = 2;
|
||||
const channels = 1;
|
||||
const samples = buffer.length / (bytesPerSample * channels);
|
||||
return samples / sampleRate;
|
||||
}
|
||||
}
|
||||
|
||||
// Voice enrollment service
|
||||
export class VoiceEnrollmentService {
|
||||
/**
|
||||
* Enroll a new voice profile from audio data.
|
||||
*/
|
||||
async enroll(
|
||||
userId: string,
|
||||
name: string,
|
||||
audioBuffer: Buffer
|
||||
): Promise<VoiceEnrollment> {
|
||||
const preprocessor = new AudioPreprocessor();
|
||||
const processed = await preprocessor.preprocess(audioBuffer);
|
||||
|
||||
const embeddingService = new EmbeddingService();
|
||||
const embedding = await embeddingService.extract(processed.buffer);
|
||||
const voiceHash = this.computeEmbeddingHash(embedding);
|
||||
|
||||
const enrollment = await prisma.voiceEnrollment.create({
|
||||
data: {
|
||||
userId,
|
||||
name,
|
||||
voiceHash,
|
||||
audioMetadata: {
|
||||
...processed.metadata,
|
||||
embeddingDimensions: embedding.length,
|
||||
enrollmentTimestamp: new Date().toISOString(),
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// Index in FAISS for similarity search
|
||||
const faissIndex = new FAISSIndex();
|
||||
await faissIndex.add(enrollment.id, embedding);
|
||||
|
||||
return enrollment;
|
||||
}
|
||||
|
||||
/**
|
||||
* List all enrollments for a user.
|
||||
*/
|
||||
async listEnrollments(
|
||||
userId: string,
|
||||
options?: {
|
||||
isActive?: boolean;
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
}
|
||||
): Promise<VoiceEnrollment[]> {
|
||||
return prisma.voiceEnrollment.findMany({
|
||||
where: {
|
||||
userId,
|
||||
...(options?.isActive !== undefined && { isActive: options.isActive }),
|
||||
},
|
||||
orderBy: { createdAt: 'desc' },
|
||||
take: options?.limit ?? 50,
|
||||
skip: options?.offset ?? 0,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a single enrollment by ID.
|
||||
*/
|
||||
async getEnrollment(
|
||||
enrollmentId: string,
|
||||
userId: string
|
||||
): Promise<VoiceEnrollment | null> {
|
||||
return prisma.voiceEnrollment.findFirst({
|
||||
where: {
|
||||
id: enrollmentId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove (deactivate) an enrollment.
|
||||
*/
|
||||
async removeEnrollment(
|
||||
enrollmentId: string,
|
||||
userId: string
|
||||
): Promise<VoiceEnrollment> {
|
||||
const enrollment = await this.getEnrollment(enrollmentId, userId);
|
||||
if (!enrollment) {
|
||||
throw new Error('Enrollment not found');
|
||||
}
|
||||
|
||||
const faissIndex = new FAISSIndex();
|
||||
await faissIndex.remove(enrollmentId);
|
||||
|
||||
return prisma.voiceEnrollment.update({
|
||||
where: { id: enrollmentId },
|
||||
data: { isActive: false },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for similar enrollments using FAISS.
|
||||
*/
|
||||
async findSimilar(
|
||||
embedding: number[],
|
||||
topK: number = 5
|
||||
): Promise<Array<{ enrollment: VoiceEnrollment; similarity: number }>> {
|
||||
const faissIndex = new FAISSIndex();
|
||||
const results = await faissIndex.search(embedding, topK);
|
||||
|
||||
const enrollmentIds = results.map((r) => r.id);
|
||||
const enrollments = await prisma.voiceEnrollment.findMany({
|
||||
where: { id: { in: enrollmentIds } },
|
||||
});
|
||||
|
||||
return results.map((r, i) => ({
|
||||
enrollment: enrollments[i],
|
||||
similarity: r.similarity,
|
||||
}));
|
||||
}
|
||||
|
||||
private computeEmbeddingHash(embedding: number[]): string {
|
||||
let hash = 0;
|
||||
for (let i = 0; i < embedding.length; i++) {
|
||||
hash = ((hash << 5) - hash) + embedding[i];
|
||||
hash |= 0;
|
||||
}
|
||||
return `vp_${Math.abs(hash).toString(16)}_${embedding.length}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Audio analysis service
|
||||
export class AnalysisService {
|
||||
/**
|
||||
* Analyze a single audio file for synthetic voice detection.
|
||||
*/
|
||||
async analyze(
|
||||
userId: string,
|
||||
audioBuffer: Buffer,
|
||||
options?: {
|
||||
enrollmentId?: string;
|
||||
audioUrl?: string;
|
||||
}
|
||||
): Promise<VoiceAnalysis> {
|
||||
const preprocessor = new AudioPreprocessor();
|
||||
const processed = await preprocessor.preprocess(audioBuffer);
|
||||
|
||||
const audioHash = this.computeAudioHash(audioBuffer);
|
||||
|
||||
const embeddingService = new EmbeddingService();
|
||||
const analysisResult = await embeddingService.analyze(processed.buffer);
|
||||
|
||||
const isSynthetic = analysisResult.confidence >= voicePrintEnv.SYNTHETIC_THRESHOLD;
|
||||
|
||||
const voiceAnalysis = await prisma.voiceAnalysis.create({
|
||||
data: {
|
||||
userId,
|
||||
enrollmentId: options?.enrollmentId,
|
||||
audioHash,
|
||||
isSynthetic,
|
||||
confidence: analysisResult.confidence,
|
||||
analysisResult: {
|
||||
...analysisResult,
|
||||
processedMetadata: processed.metadata,
|
||||
analysisTimestamp: new Date().toISOString(),
|
||||
modelVersion: 'ecapa-tdnn-v1-mock',
|
||||
},
|
||||
audioUrl: options?.audioUrl ?? '',
|
||||
},
|
||||
});
|
||||
|
||||
return voiceAnalysis;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get analysis result by ID.
|
||||
*/
|
||||
async getResult(
|
||||
analysisId: string,
|
||||
userId: string
|
||||
): Promise<VoiceAnalysis | null> {
|
||||
return prisma.voiceAnalysis.findFirst({
|
||||
where: {
|
||||
id: analysisId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get analysis history for a user.
|
||||
*/
|
||||
async getHistory(
|
||||
userId: string,
|
||||
options?: {
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
isSynthetic?: boolean;
|
||||
}
|
||||
): Promise<VoiceAnalysis[]> {
|
||||
return prisma.voiceAnalysis.findMany({
|
||||
where: {
|
||||
userId,
|
||||
...(options?.isSynthetic !== undefined && { isSynthetic: options.isSynthetic }),
|
||||
},
|
||||
orderBy: { createdAt: 'desc' },
|
||||
take: options?.limit ?? 50,
|
||||
skip: options?.offset ?? 0,
|
||||
});
|
||||
}
|
||||
|
||||
private computeAudioHash(buffer: Buffer): string {
|
||||
let hash = 0;
|
||||
const sampleSize = Math.min(buffer.length, 1024);
|
||||
for (let i = 0; i < sampleSize; i += 8) {
|
||||
hash = ((hash << 5) - hash) + buffer.readUInt8(i);
|
||||
hash |= 0;
|
||||
}
|
||||
return `audio_${Math.abs(hash).toString(16)}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Batch analysis service
|
||||
export class BatchAnalysisService {
|
||||
/**
|
||||
* Analyze multiple audio files in a batch.
|
||||
*/
|
||||
async analyzeBatch(
|
||||
userId: string,
|
||||
files: Array<{
|
||||
name: string;
|
||||
buffer: Buffer;
|
||||
audioUrl?: string;
|
||||
}>,
|
||||
options?: {
|
||||
enrollmentId?: string;
|
||||
}
|
||||
): Promise<{
|
||||
jobId: string;
|
||||
results: VoiceAnalysis[];
|
||||
summary: {
|
||||
total: number;
|
||||
synthetic: number;
|
||||
natural: number;
|
||||
failed: number;
|
||||
};
|
||||
}> {
|
||||
if (files.length > voicePrintEnv.BATCH_MAX_FILES) {
|
||||
throw new Error(
|
||||
`Batch too large: ${files.length} > ${voicePrintEnv.BATCH_MAX_FILES} max`
|
||||
);
|
||||
}
|
||||
|
||||
const analysisService = new AnalysisService();
|
||||
const results: VoiceAnalysis[] = [];
|
||||
let synthetic = 0;
|
||||
let natural = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const result = await analysisService.analyze(userId, file.buffer, {
|
||||
enrollmentId: options?.enrollmentId,
|
||||
audioUrl: file.audioUrl,
|
||||
});
|
||||
results.push(result);
|
||||
if (result.isSynthetic) {
|
||||
synthetic++;
|
||||
} else {
|
||||
natural++;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Batch analysis failed for ${file.name}:`, error);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
const jobId = `batch_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
||||
|
||||
return {
|
||||
jobId,
|
||||
results,
|
||||
summary: {
|
||||
total: files.length,
|
||||
synthetic,
|
||||
natural,
|
||||
failed,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Embedding service — ECAPA-TDNN inference wrapper
|
||||
export class EmbeddingService {
|
||||
private initialized = false;
|
||||
|
||||
/**
|
||||
* Initialize the ECAPA-TDNN model.
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
if (this.initialized) return;
|
||||
|
||||
// TODO: Connect to Python ML service for real inference
|
||||
// const response = await fetch(`${voicePrintEnv.ML_SERVICE_URL}/initialize`, {
|
||||
// method: 'POST',
|
||||
// body: JSON.stringify({ modelPath: voicePrintEnv.ECAPA_TDNN_MODEL_PATH }),
|
||||
// });
|
||||
|
||||
this.initialized = true;
|
||||
console.log('Embedding service initialized (mock model)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract voice embedding from audio.
|
||||
*/
|
||||
async extract(audioBuffer: Buffer): Promise<number[]> {
|
||||
await this.initialize();
|
||||
|
||||
// TODO: Call Python ML service
|
||||
// const response = await fetch(`${voicePrintEnv.ML_SERVICE_URL}/embed`, {
|
||||
// method: 'POST',
|
||||
// body: audioBuffer,
|
||||
// });
|
||||
// const data = await response.json();
|
||||
// return data.embedding;
|
||||
|
||||
// Mock: generate deterministic embedding based on buffer content
|
||||
const dims = voicePrintEnv.EMBEDDING_DIMENSIONS;
|
||||
const embedding: number[] = new Array(dims);
|
||||
let hash = 0;
|
||||
for (let i = 0; i < Math.min(audioBuffer.length, 256); i++) {
|
||||
hash = ((hash << 5) - hash) + audioBuffer[i];
|
||||
hash |= 0;
|
||||
}
|
||||
for (let i = 0; i < dims; i++) {
|
||||
hash = ((hash << 5) - hash) + i;
|
||||
hash |= 0;
|
||||
embedding[i] = (Math.abs(hash) % 1000) / 1000.0;
|
||||
}
|
||||
|
||||
// L2 normalize
|
||||
const norm = Math.sqrt(embedding.reduce((s, v) => s + v * v, 0));
|
||||
return embedding.map((v) => v / norm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run full analysis: embedding + synthetic detection.
|
||||
*/
|
||||
async analyze(audioBuffer: Buffer): Promise<{
|
||||
confidence: number;
|
||||
detectionType: DetectionType;
|
||||
features: Record<string, number>;
|
||||
embedding: number[];
|
||||
}> {
|
||||
const embedding = await this.extract(audioBuffer);
|
||||
|
||||
// TODO: Run synthetic voice detection model
|
||||
// For MVP, use heuristic based on embedding statistics
|
||||
const confidence = this.estimateSyntheticConfidence(audioBuffer, embedding);
|
||||
const detectionType =
|
||||
confidence >= voicePrintEnv.SYNTHETIC_THRESHOLD
|
||||
? DetectionType.SYNTHETIC_VOICE
|
||||
: DetectionType.NATURAL;
|
||||
|
||||
const features = this.extractAnalysisFeatures(audioBuffer, embedding);
|
||||
|
||||
return {
|
||||
confidence,
|
||||
detectionType,
|
||||
features,
|
||||
embedding,
|
||||
};
|
||||
}
|
||||
|
||||
private estimateSyntheticConfidence(
|
||||
buffer: Buffer,
|
||||
embedding: number[]
|
||||
): number {
|
||||
// Heuristic features for synthetic detection
|
||||
const meanAmplitude =
|
||||
buffer.reduce((s, v) => s + v, 0) / buffer.length / 255;
|
||||
const embeddingStdDev =
|
||||
Math.sqrt(
|
||||
embedding.reduce((s, v) => s + (v - embedding.reduce((a, b) => a + b) / embedding.length) ** 2, 0) /
|
||||
embedding.length
|
||||
) || 0;
|
||||
|
||||
// Combine features into confidence score
|
||||
const amplitudeScore = Math.abs(meanAmplitude - 0.5) * 2;
|
||||
const embeddingScore = 1.0 - Math.min(1.0, embeddingStdDev * 2);
|
||||
|
||||
return Math.min(
|
||||
1.0,
|
||||
amplitudeScore * 0.3 + embeddingScore * 0.4 + Math.random() * 0.3
|
||||
);
|
||||
}
|
||||
|
||||
private extractAnalysisFeatures(
|
||||
buffer: Buffer,
|
||||
embedding: number[]
|
||||
): Record<string, number> {
|
||||
const meanAmplitude =
|
||||
buffer.reduce((s, v) => s + v, 0) / buffer.length / 255;
|
||||
const zeroCrossings = buffer.reduce((count, v, i, arr) => {
|
||||
return i > 0 && ((v - 128) * (arr[i - 1] - 128) < 0) ? count + 1 : count;
|
||||
}, 0);
|
||||
|
||||
return {
|
||||
mean_amplitude: meanAmplitude,
|
||||
zero_crossing_rate: zeroCrossings / buffer.length,
|
||||
embedding_energy: embedding.reduce((s, v) => s + v * v, 0),
|
||||
embedding_entropy: this.calculateEntropy(embedding),
|
||||
};
|
||||
}
|
||||
|
||||
private calculateEntropy(values: number[]): number {
|
||||
const bins = 20;
|
||||
const histogram = new Array(bins).fill(0);
|
||||
const min = Math.min(...values);
|
||||
const max = Math.max(...values);
|
||||
const range = max - min || 1;
|
||||
|
||||
for (const v of values) {
|
||||
const bin = Math.min(bins - 1, Math.floor(((v - min) / range) * bins));
|
||||
histogram[bin]++;
|
||||
}
|
||||
|
||||
let entropy = 0;
|
||||
const total = values.length;
|
||||
for (const count of histogram) {
|
||||
if (count > 0) {
|
||||
const p = count / total;
|
||||
entropy -= p * Math.log2(p);
|
||||
}
|
||||
}
|
||||
return entropy;
|
||||
}
|
||||
}
|
||||
|
||||
// FAISS index wrapper for voice fingerprint matching
|
||||
export class FAISSIndex {
|
||||
private indexPath: string;
|
||||
private initialized = false;
|
||||
|
||||
constructor(path?: string) {
|
||||
this.indexPath = path ?? voicePrintEnv.FAISS_INDEX_PATH;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize or load the FAISS index.
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
if (this.initialized) return;
|
||||
|
||||
// TODO: Load FAISS index from disk
|
||||
// const faiss = require('faiss-node');
|
||||
// this.index = faiss.readIndex(this.indexPath);
|
||||
|
||||
this.initialized = true;
|
||||
console.log(`FAISS index initialized at ${this.indexPath}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an enrollment embedding to the index.
|
||||
*/
|
||||
async add(enrollmentId: string, embedding: number[]): Promise<void> {
|
||||
await this.initialize();
|
||||
|
||||
// TODO: Add to FAISS index
|
||||
// this.index.add([embedding]);
|
||||
// Store mapping: enrollmentId -> index position
|
||||
console.log(`Added enrollment ${enrollmentId} to FAISS index`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an enrollment from the index.
|
||||
*/
|
||||
async remove(enrollmentId: string): Promise<void> {
|
||||
await this.initialize();
|
||||
|
||||
// TODO: Remove from FAISS index
|
||||
console.log(`Removed enrollment ${enrollmentId} from FAISS index`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for similar voice embeddings.
|
||||
*/
|
||||
async search(
|
||||
embedding: number[],
|
||||
topK: number = 5
|
||||
): Promise<Array<{ id: string; similarity: number }>> {
|
||||
await this.initialize();
|
||||
|
||||
// TODO: Query FAISS index
|
||||
// const [distances, indices] = this.index.search([embedding], topK);
|
||||
// Map indices back to enrollment IDs
|
||||
|
||||
// Mock: return empty results
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the index to disk.
|
||||
*/
|
||||
async save(): Promise<void> {
|
||||
await this.initialize();
|
||||
// TODO: Write FAISS index to disk
|
||||
console.log(`FAISS index saved to ${this.indexPath}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instances
|
||||
export const audioPreprocessor = new AudioPreprocessor();
|
||||
export const voiceEnrollmentService = new VoiceEnrollmentService();
|
||||
export const analysisService = new AnalysisService();
|
||||
export const batchAnalysisService = new BatchAnalysisService();
|
||||
export const embeddingService = new EmbeddingService();
|
||||
Reference in New Issue
Block a user