Add Terraform AWS infrastructure and enhanced CI/CD pipeline (FRE-4574)
- Terraform modules: VPC, ECS Fargate, RDS PostgreSQL, ElastiCache Redis, S3, Secrets Manager, CloudWatch - Multi-environment support: staging and production configs - ECS auto-scaling: CPU-based scaling with configurable min/max - CI/CD: pnpm caching, Docker Buildx, Trivy security scanning, Terraform plan on PR - Deploy: ECS service updates with automatic rollback on health check failure - Backup: automated RDS snapshots, S3 versioning, ElastiCache snapshots - Monitoring: CloudWatch dashboards, CPU/memory/5xx alarms - Rollback script for manual service rollback - Infrastructure documentation with architecture overview
This commit is contained in:
355
infra/modules/ecs/main.tf
Normal file
355
infra/modules/ecs/main.tf
Normal file
@@ -0,0 +1,355 @@
|
||||
variable "environment" {
|
||||
description = "Deployment environment"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "cluster_name" {
|
||||
description = "ECS cluster name"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "vpc_id" {
|
||||
description = "VPC ID"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "subnet_ids" {
|
||||
description = "Private subnet IDs"
|
||||
type = list(string)
|
||||
}
|
||||
|
||||
variable "security_group_ids" {
|
||||
description = "Security group IDs"
|
||||
type = list(string)
|
||||
}
|
||||
|
||||
variable "services" {
|
||||
description = "ECS services to deploy"
|
||||
type = map(object({
|
||||
cpu = number
|
||||
memory = number
|
||||
port = number
|
||||
}))
|
||||
}
|
||||
|
||||
variable "container_images" {
|
||||
description = "Container image tags"
|
||||
type = map(string)
|
||||
}
|
||||
|
||||
variable "secrets_arn" {
|
||||
description = "Secrets Manager ARN"
|
||||
type = string
|
||||
}
|
||||
|
||||
resource "aws_ecs_cluster" "main" {
|
||||
name = var.cluster_name
|
||||
|
||||
settings {
|
||||
name = "containerInsights"
|
||||
value = "enabled"
|
||||
}
|
||||
|
||||
tags = {
|
||||
Name = var.cluster_name
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_ecs_cluster_capacity_providers" "main" {
|
||||
cluster_name = aws_ecs_cluster.main.name
|
||||
|
||||
capacity_providers = ["FARGATE"]
|
||||
|
||||
default_capacity_provider_strategy {
|
||||
base = 1
|
||||
weight = 100
|
||||
capacity_provider = "FARGATE"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_ecs_task_definition" "services" {
|
||||
for_each = var.services
|
||||
|
||||
family = "${var.cluster_name}-${each.key}"
|
||||
|
||||
container_definitions = jsonencode([
|
||||
{
|
||||
name = each.key
|
||||
image = "ghcr.io/shieldai/shieldai-${each.key}:${var.container_images[each.key]}"
|
||||
cpu = each.cpu
|
||||
memory = each.memory
|
||||
essential = true
|
||||
|
||||
portMappings = [
|
||||
{
|
||||
containerPort = each.port
|
||||
hostPort = each.port
|
||||
protocol = "tcp"
|
||||
}
|
||||
]
|
||||
|
||||
environment = [
|
||||
{
|
||||
name = "NODE_ENV"
|
||||
value = var.environment
|
||||
},
|
||||
{
|
||||
name = "PORT"
|
||||
value = tostring(each.port)
|
||||
}
|
||||
]
|
||||
|
||||
secrets = [
|
||||
{
|
||||
name = "DATABASE_URL"
|
||||
valueFrom = "${var.secrets_arn}:DATABASE_URL::"
|
||||
},
|
||||
{
|
||||
name = "REDIS_URL"
|
||||
valueFrom = "${var.secrets_arn}:REDIS_URL::"
|
||||
},
|
||||
{
|
||||
name = "HIBP_API_KEY"
|
||||
valueFrom = "${var.secrets_arn}:HIBP_API_KEY::"
|
||||
},
|
||||
{
|
||||
name = "RESEND_API_KEY"
|
||||
valueFrom = "${var.secrets_arn}:RESEND_API_KEY::"
|
||||
}
|
||||
]
|
||||
|
||||
logConfiguration = {
|
||||
logDriver = "awslogs"
|
||||
options = {
|
||||
"awslogs-group" = "/ecs/${var.cluster_name}-${each.key}"
|
||||
"awslogs-region" = "us-east-1"
|
||||
"awslogs-stream-prefix" = each.key
|
||||
}
|
||||
}
|
||||
|
||||
healthCheck = {
|
||||
command = ["CMD-SHELL", "wget -q --spider http://localhost:${each.port}/health || exit 1"]
|
||||
interval = 30
|
||||
timeout = 5
|
||||
retries = 3
|
||||
startPeriod = 60
|
||||
}
|
||||
}
|
||||
])
|
||||
|
||||
network_mode = "awsvpc"
|
||||
memory = each.memory
|
||||
cpu = each.cpu
|
||||
requires_compatibilities = ["FARGATE"]
|
||||
|
||||
execution_role_arn = aws_iam_role.execution[each.key].arn
|
||||
task_role_arn = aws_iam_role.task[each.key].arn
|
||||
|
||||
tags = {
|
||||
Name = "${var.cluster_name}-${each.key}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_iam_role" "execution" {
|
||||
for_each = var.services
|
||||
|
||||
name = "${var.cluster_name}-${each.key}-execution"
|
||||
|
||||
assume_role_policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [
|
||||
{
|
||||
Action = "sts:AssumeRole"
|
||||
Effect = "Allow"
|
||||
Principal = {
|
||||
Service = "ecs-tasks.amazonaws.com"
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
managed_policy_arns = [
|
||||
"arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
|
||||
]
|
||||
}
|
||||
|
||||
resource "aws_iam_role" "task" {
|
||||
for_each = var.services
|
||||
|
||||
name = "${var.cluster_name}-${each.key}-task"
|
||||
|
||||
assume_role_policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [
|
||||
{
|
||||
Action = "sts:AssumeRole"
|
||||
Effect = "Allow"
|
||||
Principal = {
|
||||
Service = "ecs-tasks.amazonaws.com"
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
managed_policy_arns = [
|
||||
"arn:aws:iam::aws:policy/SecretsManagerReadOnly"
|
||||
]
|
||||
|
||||
inline_policy {
|
||||
name = "elasticache-access"
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [
|
||||
{
|
||||
Effect = "Allow"
|
||||
Action = [
|
||||
"elasticache:DescribeCacheClusters",
|
||||
"elasticache:DescribeCacheSubnetGroups"
|
||||
]
|
||||
Resource = "*"
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_ecs_service" "services" {
|
||||
for_each = var.services
|
||||
|
||||
name = "${var.cluster_name}-${each.key}"
|
||||
cluster = aws_ecs_cluster.main.id
|
||||
task_definition = aws_ecs_task_definition.services[each.key].arn
|
||||
desired_count = var.environment == "production" ? 3 : 1
|
||||
|
||||
launch_desired_count = "FARGATE"
|
||||
|
||||
network_configuration {
|
||||
subnets = var.subnet_ids
|
||||
security_groups = var.security_group_ids
|
||||
assign_public_ip = false
|
||||
}
|
||||
|
||||
load_balancer {
|
||||
target_group_arn = aws_lb_target_group.services[each.key].arn
|
||||
container_name = each.key
|
||||
container_port = each.port
|
||||
}
|
||||
|
||||
auto_scaling {
|
||||
max_capacity = var.environment == "production" ? 10 : 3
|
||||
min_capacity = var.environment == "production" ? 2 : 1
|
||||
}
|
||||
|
||||
tags = {
|
||||
Name = "${var.cluster_name}-${each.key}"
|
||||
Service = each.key
|
||||
}
|
||||
|
||||
depends_on = [
|
||||
aws_lb_listener.services
|
||||
]
|
||||
}
|
||||
|
||||
resource "aws_lb" "main" {
|
||||
name = "${var.cluster_name}-alb"
|
||||
internal = false
|
||||
load_balancer_type = "application"
|
||||
security_groups = var.security_group_ids
|
||||
subnets = var.subnet_ids
|
||||
|
||||
tags = {
|
||||
Name = "${var.cluster_name}-alb"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_lb_target_group" "services" {
|
||||
for_each = var.services
|
||||
|
||||
name = "${var.cluster_name}-${each.key}-tg"
|
||||
port = each.port
|
||||
protocol = "HTTP"
|
||||
vpc_id = var.vpc_id
|
||||
|
||||
health_check {
|
||||
enabled = true
|
||||
healthy_threshold = 3
|
||||
interval = 30
|
||||
matcher = "200"
|
||||
path = "/health"
|
||||
port = "traffic-port"
|
||||
protocol = "HTTP"
|
||||
timeout = 5
|
||||
unhealthy_threshold = 3
|
||||
}
|
||||
|
||||
stickiness {
|
||||
type = "lb_cookie"
|
||||
cookie_duration = 86400
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_lb_listener" "services" {
|
||||
for_each = var.services
|
||||
|
||||
load_balancer_arn = aws_lb.main.arn
|
||||
port = 80
|
||||
protocol = "HTTP"
|
||||
|
||||
default_action {
|
||||
type = "forward"
|
||||
target_group_arn = aws_lb_target_group.services[each.key].arn
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_appautoscaling_target" "services" {
|
||||
for_each = var.services
|
||||
|
||||
service_namespace = "ecs"
|
||||
resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.services[each.key].name}"
|
||||
scalable_dimension = "ecs:service:DesiredCount"
|
||||
min_capacity = var.environment == "production" ? 2 : 1
|
||||
max_capacity = var.environment == "production" ? 10 : 3
|
||||
}
|
||||
|
||||
resource "aws_appautoscaling_policy" "cpu" {
|
||||
for_each = var.services
|
||||
|
||||
name = "${var.cluster_name}-${each.key}-cpu-scaling"
|
||||
service_namespace = "ecs"
|
||||
resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.services[each.key].name}"
|
||||
scalable_dimension = "ecs:service:DesiredCount"
|
||||
|
||||
target_tracking_scaling_policy_configuration {
|
||||
target_value = 70.0
|
||||
scale_in_cooldown = 60
|
||||
scale_out_cooldown = 30
|
||||
|
||||
customized_metric_specification {
|
||||
metric_name = "CPUUtilization"
|
||||
namespace = "AWS/ECS"
|
||||
statistic = "Average"
|
||||
dimensions = [{ name = "ClusterName", value = aws_ecs_cluster.main.name }]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_log_group" "services" {
|
||||
for_each = var.services
|
||||
|
||||
name = "/ecs/${var.cluster_name}-${each.key}"
|
||||
retention_in_days = var.environment == "production" ? 30 : 7
|
||||
|
||||
tags = {
|
||||
Name = "${var.cluster_name}-${each.key}-logs"
|
||||
}
|
||||
}
|
||||
|
||||
output "cluster_arn" {
|
||||
description = "ECS cluster ARN"
|
||||
value = aws_ecs_cluster.main.arn
|
||||
}
|
||||
|
||||
output "alb_dns_name" {
|
||||
description = "ALB DNS name"
|
||||
value = aws_lb.main.dns_name
|
||||
}
|
||||
Reference in New Issue
Block a user