variable "environment" { description = "Deployment environment" type = string } variable "cluster_name" { description = "ECS cluster name" type = string } variable "vpc_id" { description = "VPC ID" type = string } variable "subnet_ids" { description = "Private subnet IDs for ECS tasks" type = list(string) } variable "public_subnet_ids" { description = "Public subnet IDs for ALB" type = list(string) } variable "security_group_ids" { description = "Security group IDs" type = list(string) } variable "alb_security_group_id" { description = "ALB security group ID" type = string } variable "services" { description = "ECS services to deploy" type = map(object({ cpu = number memory = number port = number })) } variable "container_images" { description = "Container image tags" type = map(string) } variable "secrets_arn" { description = "Secrets Manager ARN" type = string } variable "cache_cluster_arn" { description = "ElastiCache replication group ARN" type = string } variable "domain_name" { description = "Route53 hosted zone domain for ACM cert validation" type = string default = "shieldai.app" } resource "aws_ecs_cluster" "main" { name = var.cluster_name settings { name = "containerInsights" value = "enabled" } tags = { Name = var.cluster_name } } resource "aws_ecs_cluster_capacity_providers" "main" { cluster_name = aws_ecs_cluster.main.name capacity_providers = ["FARGATE"] default_capacity_provider_strategy { base = 1 weight = 100 capacity_provider = "FARGATE" } } resource "aws_ecs_task_definition" "services" { for_each = var.services family = "${var.cluster_name}-${each.key}" container_definitions = jsonencode([ { name = each.key image = "ghcr.io/shieldai/shieldai-${each.key}:${var.container_images[each.key]}" cpu = each.cpu memory = each.memory essential = true portMappings = [ { containerPort = each.port hostPort = each.port protocol = "tcp" } ] environment = [ { name = "NODE_ENV" value = var.environment }, { name = "PORT" value = tostring(each.port) }, { name = "DD_ENV" value = var.environment }, { name = "DD_SERVICE" value = "${var.cluster_name}-${each.key}" }, { name = "DD_VERSION" value = var.container_images[each.key] }, { name = "DD_TRACE_ENABLED" value = "true" }, { name = "DD_LOGS_INJECTION" value = "true" }, { name = "DD_AGENT_HOST" value = "localhost" }, { name = "DD_AGENT_PORT" value = "8126" }, { name = "SENTRY_ENVIRONMENT" value = var.environment }, { name = "SENTRY_RELEASE" value = var.container_images[each.key] }, { name = "AWS_REGION" value = "us-east-1" }, { name = "DD_SITE" value = "datadoghq.com" } ] secrets = [ { name = "DATABASE_URL" valueFrom = "${var.secrets_arn}:DATABASE_URL::" }, { name = "REDIS_URL" valueFrom = "${var.secrets_arn}:REDIS_URL::" }, { name = "HIBP_API_KEY" valueFrom = "${var.secrets_arn}:HIBP_API_KEY::" }, { name = "RESEND_API_KEY" valueFrom = "${var.secrets_arn}:RESEND_API_KEY::" }, { name = "SENTRY_DSN" valueFrom = "${var.secrets_arn}:SENTRY_DSN::" }, { name = "DD_API_KEY" valueFrom = "${var.secrets_arn}:DD_API_KEY::" } ] logConfiguration = { logDriver = "awslogs" options = { "awslogs-group" = "/ecs/${var.cluster_name}-${each.key}" "awslogs-region" = "us-east-1" "awslogs-stream-prefix" = each.key } } healthCheck = { command = ["CMD-SHELL", "curl -f http://localhost:${each.port}/health || exit 1"] interval = 30 timeout = 5 retries = 3 startPeriod = 60 } } ]) network_mode = "awsvpc" memory = each.memory cpu = each.cpu requires_compatibilities = ["FARGATE"] execution_role_arn = aws_iam_role.execution[each.key].arn task_role_arn = aws_iam_role.task[each.key].arn tags = { Name = "${var.cluster_name}-${each.key}" } } resource "aws_iam_role" "execution" { for_each = var.services name = "${var.cluster_name}-${each.key}-execution" assume_role_policy = jsonencode({ Version = "2012-10-17" Statement = [ { Action = "sts:AssumeRole" Effect = "Allow" Principal = { Service = "ecs-tasks.amazonaws.com" } } ] }) managed_policy_arns = [ "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" ] } resource "aws_iam_role" "task" { for_each = var.services name = "${var.cluster_name}-${each.key}-task" assume_role_policy = jsonencode({ Version = "2012-10-17" Statement = [ { Action = "sts:AssumeRole" Effect = "Allow" Principal = { Service = "ecs-tasks.amazonaws.com" } } ] }) inline_policy { name = "secrets-manager-access" policy = jsonencode({ Version = "2012-10-17" Statement = [ { Effect = "Allow" Action = [ "secretsmanager:GetSecretValue", "secretsmanager:DescribeSecret" ] Resource = var.secrets_arn } ] }) } inline_policy { name = "elasticache-access" policy = jsonencode({ Version = "2012-10-17" Statement = [ { Effect = "Allow" Action = [ "elasticache:DescribeCacheClusters", "elasticache:DescribeCacheSubnetGroups" ] Resource = var.cache_cluster_arn } ] }) } } resource "aws_ecs_service" "services" { for_each = var.services name = "${var.cluster_name}-${each.key}" cluster = aws_ecs_cluster.main.id task_definition = aws_ecs_task_definition.services[each.key].arn desired_count = var.environment == "production" ? 3 : 1 launch_type = "FARGATE" network_configuration { subnets = var.subnet_ids security_groups = var.security_group_ids assign_public_ip = false } load_balancer { target_group_arn = aws_lb_target_group.services[each.key].arn container_name = each.key container_port = each.port } auto_scaling { max_capacity = var.environment == "production" ? 10 : 3 min_capacity = var.environment == "production" ? 2 : 1 } tags = { Name = "${var.cluster_name}-${each.key}" Service = each.key } depends_on = [ aws_lb_listener.https ] } resource "aws_lb" "main" { name = "${var.cluster_name}-alb" internal = false load_balancer_type = "application" security_groups = [var.alb_security_group_id] subnets = var.public_subnet_ids tags = { Name = "${var.cluster_name}-alb" } } resource "aws_acm_certificate" "main" { domain_name = "${var.cluster_name}.${var.environment}.shieldai.app" validation_method = "DNS" tags = { Name = "${var.cluster_name}-cert" } } data "aws_route53_zone" "main" { name = var.domain_name } resource "aws_route53_record" "acm_validation" { for_each = { for rv in aws_acm_certificate.main.domain_validation_options : rv.domain_name => rv if rv.resource_record_name != null } zone_id = data.aws_route53_zone.main.zone_id name = each.value.resource_record_name type = each.value.resource_record_type ttl = 60 records = [each.value.resource_record_value] } resource "aws_acm_certificate_validation" "main" { certificate_arn = aws_acm_certificate.main.arn validation_record_fqdns = [aws_route53_record.acm_validation[*].fqdn] } resource "aws_lb_target_group" "services" { for_each = var.services name = "${var.cluster_name}-${each.key}-tg" port = each.port protocol = "HTTP" vpc_id = var.vpc_id health_check { enabled = true healthy_threshold = 3 interval = 30 matcher = "200" path = "/health" port = "traffic-port" protocol = "HTTP" timeout = 5 unhealthy_threshold = 3 } stickiness { type = "lb_cookie" cookie_duration = 86400 } } resource "aws_lb_listener" "https" { load_balancer_arn = aws_lb.main.arn port = 443 protocol = "HTTPS" ssl_certificate_arn = aws_acm_certificate_validation.main.certificate_arn default_action { type = "forward" target_group_arn = aws_lb_target_group.services["api"].arn } } resource "aws_lb_listener_rule" "services" { for_each = { for k, v in var.services : k => v if k != "api" } listener_arn = aws_lb_listener.https.arn action { type = "forward" target_group_arn = aws_lb_target_group.services[each.key].arn } condition { path_pattern { values = ["/${each.key}/*", "/${each.key}"] } } } resource "aws_lb_listener" "http_redirect" { load_balancer_arn = aws_lb.main.arn port = 80 protocol = "HTTP" default_action { type = "redirect" redirect { port = "443" protocol = "HTTPS" status_code = "HTTP_301" } } } resource "aws_appautoscaling_target" "services" { for_each = var.services service_namespace = "ecs" resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.services[each.key].name}" scalable_dimension = "ecs:service:DesiredCount" min_capacity = var.environment == "production" ? 2 : 1 max_capacity = var.environment == "production" ? 10 : 3 } resource "aws_appautoscaling_policy" "cpu" { for_each = var.services name = "${var.cluster_name}-${each.key}-cpu-scaling" service_namespace = "ecs" resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.services[each.key].name}" scalable_dimension = "ecs:service:DesiredCount" target_tracking_scaling_policy_configuration { target_value = 70.0 scale_in_cooldown = 60 scale_out_cooldown = 30 customized_metric_specification { metric_name = "CPUUtilization" namespace = "AWS/ECS" statistic = "Average" dimensions = [{ name = "ClusterName", value = aws_ecs_cluster.main.name }] } } } resource "aws_kms_key" "logs" { description = "${var.cluster_name} logs encryption key" deletion_window_in_days = 7 enable_key_rotation = true tags = { Name = "${var.cluster_name}-logs-kms" } } resource "aws_cloudwatch_log_group" "services" { for_each = var.services name = "/ecs/${var.cluster_name}-${each.key}" retention_in_days = var.environment == "production" ? 30 : 7 kms_key_id = aws_kms_key.logs.arn tags = { Name = "${var.cluster_name}-${each.key}-logs" } } output "cluster_arn" { description = "ECS cluster ARN" value = aws_ecs_cluster.main.arn } output "alb_dns_name" { description = "ALB DNS name" value = aws_lb.main.dns_name } output "kms_key_arn" { description = "KMS key ARN for log encryption" value = aws_kms_key.logs.arn }