ShieldAI/packages/extension/src/lib/phishing-detector.ts

import { ThreatType, UrlVerdict, ThreatInfo } from '../types';

export class PhishingDetector {
  private knownSuspiciousTlds: Set<string> = new Set([
    '.tk', '.ml', '.ga', '.cf', '.gq', '.xyz', '.top', '.click', '.link', '.work',
  ]);

  private commonBrands: Map<string, string[]> = new Map([
    ['google', ['gmail', 'drive', 'docs', 'maps', 'play', 'chrome', 'youtube']],
    ['apple', ['icloud', 'appstore', 'icloud_content', 'appleid']],
    ['amazon', ['aws', 'amazonaws', 'amazon-adsystem', 'prime-video']],
    ['microsoft', ['office', 'outlook', 'onedrive', 'teams', 'azure', 'windows']],
    ['facebook', ['fb', 'fbcdn', 'instagram', 'whatsapp', 'messenger']],
    ['paypal', ['paypalobjects', 'paypal-web', 'xoom']],
    ['netflix', ['nflximg', 'nflxso', 'nflxvideo', 'nflxext']],
    ['bank', ['chase', 'bofa', 'wellsfargo', 'citi', 'hsbc', 'barclays']],
  ]);

  analyzeUrl(url: string): { verdict: UrlVerdict; threats: ThreatInfo[]; score: number } {
    const threats: ThreatInfo[] = [];
    let score = 0;

    try {
      const parsed = new URL(url);
      const hostname = parsed.hostname.toLowerCase();
      const domainParts = hostname.split('.');
      const tld = domainParts[domainParts.length - 1];

      score += this.checkTld(tld, domainParts, threats);
      score += this.checkEntropy(parsed.pathname + parsed.search, threats);
      score += this.checkTyposquatting(hostname, threats);
      score += this.checkIpAddress(hostname, threats);
      score += this.checkLongUrl(url, threats);
      score += this.checkSubdomainDepth(domainParts, threats);
      score += this.checkHttpsProtocol(parsed.protocol, threats);
      score += this.checkRedirectPatterns(parsed.search, threats);
      score += this.checkEncodedChars(url, threats);
      score += this.checkBrandImpersonation(hostname, threats);
    } catch {
      return {
        verdict: UrlVerdict.UNKNOWN,
        threats: [{ type: ThreatType.PHISHING_HEURISTIC, severity: 3, source: 'heuristic', description: 'Malformed URL' }],
        score: 30,
      };
    }

    const verdict = score >= 70 ? UrlVerdict.PHISHING
      : score >= 40 ? UrlVerdict.SUSPICIOUS
      : score >= 20 ? UrlVerdict.SPAM
      : UrlVerdict.SAFE;

    return { verdict, threats, score };
  }

  private checkTld(tld: string, parts: string[], threats: ThreatInfo[]): number {
    if (this.knownSuspiciousTlds.has(`.${tld}`)) {
      threats.push({
        type: ThreatType.DOMAIN_AGE,
        severity: 4,
        source: 'heuristic',
        description: `Suspicious TLD: .${tld}`,
      });
      return 25;
    }
    if (parts.length === 1) {
      threats.push({
        type: ThreatType.DOMAIN_AGE,
        severity: 3,
        source: 'heuristic',
        description: 'Single-label domain (possible local or new domain)',
      });
      return 15;
    }
    return 0;
  }

  private checkEntropy(pathname: string, threats: ThreatInfo[]): number {
    if (!pathname || pathname.length < 20) return 0;
    const entropy = this.calculateEntropy(pathname);
    if (entropy > 4.5) {
      threats.push({
        type: ThreatType.URL_ENTROPY,
        severity: 4,
        source: 'heuristic',
        description: `High URL path entropy (${entropy.toFixed(2)}) suggests obfuscation`,
      });
      return 20;
    }
    return 0;
  }

  private checkTyposquatting(hostname: string, threats: ThreatInfo[]): number {
    for (const [brand, subdomains] of this.commonBrands) {
      const brandParts = hostname.split('.');
      const mainDomain = brandParts.slice(0, -1).join('.');
      const firstLabel = mainDomain.split('.')[0];

      if (mainDomain.includes(brand) && mainDomain !== brand) {
        const editDist = this.levenshteinDistance(firstLabel, brand);
        if (editDist <= 2 && editDist > 0) {
          threats.push({
            type: ThreatType.TYPOSQUAT,
            severity: 5,
            source: 'heuristic',
            description: `Possible typosquat of "${brand}" (edit distance: ${editDist})`,
          });
          return 35;
        }
      }

      const editDist = this.levenshteinDistance(firstLabel, brand);
      if (editDist <= 2 && editDist > 0 && firstLabel.length >= brand.length - 1) {
        threats.push({
          type: ThreatType.TYPOSQUAT,
          severity: 5,
          source: 'heuristic',
          description: `Possible typosquat of "${brand}" (edit distance: ${editDist})`,
        });
        return 35;
      }

      for (const sub of subdomains) {
        if (hostname.includes(sub) && !hostname.startsWith(`${sub}.` + brandParts[brandParts.length - 1])) {
          threats.push({
            type: ThreatType.TYPOSQUAT,
            severity: 3,
            source: 'heuristic',
            description: `Domain contains "${sub}" but not an official ${brand} subdomain`,
          });
          return 15;
        }
      }
    }
    return 0;
  }

  private checkIpAddress(hostname: string, threats: ThreatInfo[]): number {
    const ipPattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
    if (ipPattern.test(hostname) && hostname !== '127.0.0.1' && hostname !== 'localhost') {
      threats.push({
        type: ThreatType.PHISHING_HEURISTIC,
        severity: 4,
        source: 'heuristic',
        description: `IP address used as hostname: ${hostname}`,
      });
      return 25;
    }
    return 0;
  }

  private checkLongUrl(url: string, threats: ThreatInfo[]): number {
    if (url.length > 200) {
      threats.push({
        type: ThreatType.PHISHING_HEURISTIC,
        severity: 3,
        source: 'heuristic',
        description: `Unusually long URL (${url.length} chars)`,
      });
      return 15;
    }
    return 0;
  }

  private checkSubdomainDepth(parts: string[], threats: ThreatInfo[]): number {
    if (parts.length > 5) {
      threats.push({
        type: ThreatType.PHISHING_HEURISTIC,
        severity: 3,
        source: 'heuristic',
        description: `Deep subdomain nesting (${parts.length} levels)`,
      });
      return 15;
    }
    return 0;
  }

  private checkHttpsProtocol(protocol: string, threats: ThreatInfo[]): number {
    if (protocol === 'http:') {
      threats.push({
        type: ThreatType.MIXED_CONTENT,
        severity: 2,
        source: 'heuristic',
        description: 'Page loaded over HTTP (not HTTPS)',
      });
      return 10;
    }
    return 0;
  }

  private checkRedirectPatterns(query: string, threats: ThreatInfo[]): number {
    const redirectParams = ['redirect', 'url', 'dest', 'return', 'next', 'target'];
    let count = 0;
    for (const param of redirectParams) {
      if (query.includes(`${param}=`)) count++;
    }
    if (count >= 2) {
      threats.push({
        type: ThreatType.REDIRECT_CHAIN,
        severity: 3,
        source: 'heuristic',
        description: `Multiple redirect parameters detected (${count})`,
      });
      return 15;
    }
    return 0;
  }

  private checkEncodedChars(url: string, threats: ThreatInfo[]): number {
    const encodedPattern = /(%[0-9a-fA-F]{2}){3,}/g;
    const matches = url.match(encodedPattern);
    if (matches && matches.length > 0) {
      threats.push({
        type: ThreatType.URL_ENTROPY,
        severity: 3,
        source: 'heuristic',
        description: 'Excessive URL encoding detected',
      });
      return 15;
    }
    return 0;
  }

  private checkBrandImpersonation(hostname: string, threats: ThreatInfo[]): number {
    const impersonationPatterns = [
      /login[-_]?(secure|portal|page|form)/i,
      /account[-_]?(verify|confirm|update)/i,
      /secure[-_]?(signin|auth|login)/i,
      /wallet[-_]?(connect|link|verify)/i,
    ];
    for (const pattern of impersonationPatterns) {
      if (pattern.test(hostname)) {
        threats.push({
          type: ThreatType.PHISHING_HEURISTIC,
          severity: 4,
          source: 'heuristic',
          description: `Common phishing pattern detected: ${hostname}`,
        });
        return 20;
      }
    }
    return 0;
  }

  private calculateEntropy(str: string): number {
    const freq: Record<string, number> = {};
    for (const char of str) {
      freq[char] = (freq[char] || 0) + 1;
    }
    let entropy = 0;
    const len = str.length;
    for (const count of Object.values(freq)) {
      const p = count / len;
      entropy -= p * Math.log2(p);
    }
    return entropy;
  }

  private levenshteinDistance(a: string, b: string): number {
    const matrix: number[][] = [];
    for (let i = 0; i <= b.length; i++) matrix[i] = [i];
    for (let j = 0; j <= a.length; j++) matrix[0][j] = j;
    for (let i = 1; i <= b.length; i++) {
      for (let j = 1; j <= a.length; j++) {
        matrix[i][j] = b[i - 1] === a[j - 1]
          ? matrix[i - 1][j - 1]
          : Math.min(
              matrix[i - 1][j - 1] + 1,
              matrix[i][j - 1] + 1,
              matrix[i - 1][j] + 1,
            );
      }
    }
    return matrix[b.length][a.length];
  }
}

export const phishingDetector = new PhishingDetector();