ShieldAI/infra/modules/ecs/main.tf

variable "environment" {
  description = "Deployment environment"
  type        = string
}

variable "cluster_name" {
  description = "ECS cluster name"
  type        = string
}

variable "vpc_id" {
  description = "VPC ID"
  type        = string
}

variable "subnet_ids" {
  description = "Private subnet IDs"
  type        = list(string)
}

variable "security_group_ids" {
  description = "Security group IDs"
  type        = list(string)
}

variable "services" {
  description = "ECS services to deploy"
  type = map(object({
    cpu    = number
    memory = number
    port   = number
  }))
}

variable "container_images" {
  description = "Container image tags"
  type        = map(string)
}

variable "secrets_arn" {
  description = "Secrets Manager ARN"
  type        = string
}

resource "aws_ecs_cluster" "main" {
  name = var.cluster_name

  settings {
    name  = "containerInsights"
    value = "enabled"
  }

  tags = {
    Name = var.cluster_name
  }
}

resource "aws_ecs_cluster_capacity_providers" "main" {
  cluster_name = aws_ecs_cluster.main.name

  capacity_providers = ["FARGATE"]

  default_capacity_provider_strategy {
    base              = 1
    weight            = 100
    capacity_provider = "FARGATE"
  }
}

resource "aws_ecs_task_definition" "services" {
  for_each = var.services

  family = "${var.cluster_name}-${each.key}"

  container_definitions = jsonencode([
    {
      name      = each.key
      image     = "ghcr.io/shieldai/shieldai-${each.key}:${var.container_images[each.key]}"
      cpu       = each.cpu
      memory    = each.memory
      essential = true

      portMappings = [
        {
          containerPort = each.port
          hostPort      = each.port
          protocol      = "tcp"
        }
      ]

      environment = [
        {
          name  = "NODE_ENV"
          value = var.environment
        },
        {
          name  = "PORT"
          value = tostring(each.port)
        }
      ]

      secrets = [
        {
          name      = "DATABASE_URL"
          valueFrom = "${var.secrets_arn}:DATABASE_URL::"
        },
        {
          name      = "REDIS_URL"
          valueFrom = "${var.secrets_arn}:REDIS_URL::"
        },
        {
          name      = "HIBP_API_KEY"
          valueFrom = "${var.secrets_arn}:HIBP_API_KEY::"
        },
        {
          name      = "RESEND_API_KEY"
          valueFrom = "${var.secrets_arn}:RESEND_API_KEY::"
        }
      ]

      logConfiguration = {
        logDriver = "awslogs"
        options = {
          "awslogs-group"         = "/ecs/${var.cluster_name}-${each.key}"
          "awslogs-region"        = "us-east-1"
          "awslogs-stream-prefix" = each.key
        }
      }

      healthCheck = {
        command = ["CMD-SHELL", "wget -q --spider http://localhost:${each.port}/health || exit 1"]
        interval = 30
        timeout  = 5
        retries  = 3
        startPeriod = 60
      }
    }
  ])

  network_mode = "awsvpc"
  memory       = each.memory
  cpu          = each.cpu
  requires_compatibilities = ["FARGATE"]

  execution_role_arn = aws_iam_role.execution[each.key].arn
  task_role_arn      = aws_iam_role.task[each.key].arn

  tags = {
    Name = "${var.cluster_name}-${each.key}"
  }
}

resource "aws_iam_role" "execution" {
  for_each = var.services

  name = "${var.cluster_name}-${each.key}-execution"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "ecs-tasks.amazonaws.com"
        }
      }
    ]
  })

  managed_policy_arns = [
    "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
  ]
}

resource "aws_iam_role" "task" {
  for_each = var.services

  name = "${var.cluster_name}-${each.key}-task"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "ecs-tasks.amazonaws.com"
        }
      }
    ]
  })

  managed_policy_arns = [
    "arn:aws:iam::aws:policy/SecretsManagerReadOnly"
  ]

  inline_policy {
    name = "elasticache-access"
    policy = jsonencode({
      Version = "2012-10-17"
      Statement = [
        {
          Effect = "Allow"
          Action = [
            "elasticache:DescribeCacheClusters",
            "elasticache:DescribeCacheSubnetGroups"
          ]
          Resource = "*"
        }
      ]
    })
  }
}

resource "aws_ecs_service" "services" {
  for_each = var.services

  name            = "${var.cluster_name}-${each.key}"
  cluster         = aws_ecs_cluster.main.id
  task_definition = aws_ecs_task_definition.services[each.key].arn
  desired_count   = var.environment == "production" ? 3 : 1

  launch_desired_count = "FARGATE"

  network_configuration {
    subnets          = var.subnet_ids
    security_groups  = var.security_group_ids
    assign_public_ip = false
  }

  load_balancer {
    target_group_arn = aws_lb_target_group.services[each.key].arn
    container_name   = each.key
    container_port   = each.port
  }

  auto_scaling {
    max_capacity = var.environment == "production" ? 10 : 3
    min_capacity = var.environment == "production" ? 2 : 1
  }

  tags = {
    Name = "${var.cluster_name}-${each.key}"
    Service = each.key
  }

  depends_on = [
    aws_lb_listener.services
  ]
}

resource "aws_lb" "main" {
  name               = "${var.cluster_name}-alb"
  internal           = false
  load_balancer_type = "application"
  security_groups    = var.security_group_ids
  subnets            = var.subnet_ids

  tags = {
    Name = "${var.cluster_name}-alb"
  }
}

resource "aws_lb_target_group" "services" {
  for_each = var.services

  name     = "${var.cluster_name}-${each.key}-tg"
  port     = each.port
  protocol = "HTTP"
  vpc_id   = var.vpc_id

  health_check {
    enabled            = true
    healthy_threshold   = 3
    interval            = 30
    matcher             = "200"
    path                = "/health"
    port                = "traffic-port"
    protocol            = "HTTP"
    timeout             = 5
    unhealthy_threshold = 3
  }

  stickiness {
    type            = "lb_cookie"
    cookie_duration = 86400
  }
}

resource "aws_lb_listener" "services" {
  for_each = var.services

  load_balancer_arn = aws_lb.main.arn
  port              = 80
  protocol          = "HTTP"

  default_action {
    type             = "forward"
    target_group_arn = aws_lb_target_group.services[each.key].arn
  }
}

resource "aws_appautoscaling_target" "services" {
  for_each = var.services

  service_namespace = "ecs"
  resource_id       = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.services[each.key].name}"
  scalable_dimension = "ecs:service:DesiredCount"
  min_capacity       = var.environment == "production" ? 2 : 1
  max_capacity       = var.environment == "production" ? 10 : 3
}

resource "aws_appautoscaling_policy" "cpu" {
  for_each = var.services

  name               = "${var.cluster_name}-${each.key}-cpu-scaling"
  service_namespace  = "ecs"
  resource_id        = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.services[each.key].name}"
  scalable_dimension = "ecs:service:DesiredCount"

  target_tracking_scaling_policy_configuration {
    target_value       = 70.0
    scale_in_cooldown  = 60
    scale_out_cooldown = 30

    customized_metric_specification {
      metric_name = "CPUUtilization"
      namespace   = "AWS/ECS"
      statistic   = "Average"
      dimensions  = [{ name = "ClusterName", value = aws_ecs_cluster.main.name }]
    }
  }
}

resource "aws_cloudwatch_log_group" "services" {
  for_each = var.services

  name              = "/ecs/${var.cluster_name}-${each.key}"
  retention_in_days = var.environment == "production" ? 30 : 7

  tags = {
    Name = "${var.cluster_name}-${each.key}-logs"
  }
}

output "cluster_arn" {
  description = "ECS cluster ARN"
  value       = aws_ecs_cluster.main.arn
}

output "alb_dns_name" {
  description = "ALB DNS name"
  value       = aws_lb.main.dns_name
}