Add Terraform AWS infrastructure and enhanced CI/CD pipeline (FRE-4574)
- Terraform modules: VPC, ECS Fargate, RDS PostgreSQL, ElastiCache Redis, S3, Secrets Manager, CloudWatch - Multi-environment support: staging and production configs - ECS auto-scaling: CPU-based scaling with configurable min/max - CI/CD: pnpm caching, Docker Buildx, Trivy security scanning, Terraform plan on PR - Deploy: ECS service updates with automatic rollback on health check failure - Backup: automated RDS snapshots, S3 versioning, ElastiCache snapshots - Monitoring: CloudWatch dashboards, CPU/memory/5xx alarms - Rollback script for manual service rollback - Infrastructure documentation with architecture overview
This commit is contained in:
183
infra/modules/cloudwatch/main.tf
Normal file
183
infra/modules/cloudwatch/main.tf
Normal file
@@ -0,0 +1,183 @@
|
||||
variable "environment" {
|
||||
description = "Deployment environment"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "cluster_name" {
|
||||
description = "ECS cluster name"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "project_name" {
|
||||
description = "Project name"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "rds_identifier" {
|
||||
description = "RDS instance identifier"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "cache_endpoint" {
|
||||
description = "ElastiCache endpoint"
|
||||
type = string
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_dashboard" "main" {
|
||||
dashboard_name = "${var.project_name}-${var.environment}-dashboard"
|
||||
|
||||
dashboard_body = jsonencode({
|
||||
widgets = [
|
||||
{
|
||||
type = "metric"
|
||||
properties = {
|
||||
title = "ECS CPU Utilization"
|
||||
metrics = [
|
||||
["AWS/ECS", "CPUUtilization", "ClusterName", var.cluster_name]
|
||||
]
|
||||
view = "timeSeries"
|
||||
stacked = false
|
||||
region = "us-east-1"
|
||||
period = 300
|
||||
}
|
||||
},
|
||||
{
|
||||
type = "metric"
|
||||
properties = {
|
||||
title = "ECS Memory Utilization"
|
||||
metrics = [
|
||||
["AWS/ECS", "MemoryUtilization", "ClusterName", var.cluster_name]
|
||||
]
|
||||
view = "timeSeries"
|
||||
stacked = false
|
||||
region = "us-east-1"
|
||||
period = 300
|
||||
}
|
||||
},
|
||||
{
|
||||
type = "metric"
|
||||
properties = {
|
||||
title = "RDS CPU Utilization"
|
||||
metrics = [
|
||||
["AWS/RDS", "CPUUtilization", "DBInstanceIdentifier", var.rds_identifier]
|
||||
]
|
||||
view = "timeSeries"
|
||||
stacked = false
|
||||
region = "us-east-1"
|
||||
period = 300
|
||||
}
|
||||
},
|
||||
{
|
||||
type = "metric"
|
||||
properties = {
|
||||
title = "ALB Request Count"
|
||||
metrics = [
|
||||
["AWS/ApplicationELB", "RequestCount", "LoadBalancer", "${var.cluster_name}-alb"]
|
||||
]
|
||||
view = "timeSeries"
|
||||
stacked = false
|
||||
region = "us-east-1"
|
||||
period = 60
|
||||
}
|
||||
},
|
||||
{
|
||||
type = "metric"
|
||||
properties = {
|
||||
title = "ALB 5xx Errors"
|
||||
metrics = [
|
||||
["AWS/ApplicationELB", "HTTPCode_Elb_5XX_Count", "LoadBalancer", "${var.cluster_name}-alb"]
|
||||
]
|
||||
view = "timeSeries"
|
||||
stacked = false
|
||||
region = "us-east-1"
|
||||
period = 60
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ecs_cpu_high" {
|
||||
alarm_name = "${var.project_name}-${var.environment}-ecs-cpu-high"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = 2
|
||||
metric_name = "CPUUtilization"
|
||||
namespace = "AWS/ECS"
|
||||
period = 300
|
||||
statistic = "Average"
|
||||
threshold = 80
|
||||
alarm_description = "ECS CPU utilization above 80%"
|
||||
|
||||
dimensions = {
|
||||
ClusterName = var.cluster_name
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ecs_memory_high" {
|
||||
alarm_name = "${var.project_name}-${var.environment}-ecs-memory-high"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = 2
|
||||
metric_name = "MemoryUtilization"
|
||||
namespace = "AWS/ECS"
|
||||
period = 300
|
||||
statistic = "Average"
|
||||
threshold = 85
|
||||
alarm_description = "ECS memory utilization above 85%"
|
||||
|
||||
dimensions = {
|
||||
ClusterName = var.cluster_name
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "alb_5xx" {
|
||||
alarm_name = "${var.project_name}-${var.environment}-alb-5xx"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = 3
|
||||
metric_name = "HTTPCode_Elb_5XX_Count"
|
||||
namespace = "AWS/ApplicationELB"
|
||||
period = 60
|
||||
statistic = "Sum"
|
||||
threshold = 10
|
||||
alarm_description = "ALB 5xx errors above 10 per minute"
|
||||
|
||||
dimensions = {
|
||||
LoadBalancer = "${var.cluster_name}-alb"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "rds_cpu_high" {
|
||||
alarm_name = "${var.project_name}-${var.environment}-rds-cpu-high"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = 2
|
||||
metric_name = "CPUUtilization"
|
||||
namespace = "AWS/RDS"
|
||||
period = 300
|
||||
statistic = "Average"
|
||||
threshold = 75
|
||||
alarm_description = "RDS CPU utilization above 75%"
|
||||
|
||||
dimensions = {
|
||||
DBInstanceIdentifier = var.rds_identifier
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "rds_free_storage" {
|
||||
alarm_name = "${var.project_name}-${var.environment}-rds-free-storage"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = 2
|
||||
metric_name = "FreeStorageSpace"
|
||||
namespace = "AWS/RDS"
|
||||
period = 300
|
||||
statistic = "Average"
|
||||
threshold = 524288000
|
||||
alarm_description = "RDS free storage below 500MB"
|
||||
|
||||
dimensions = {
|
||||
DBInstanceIdentifier = var.rds_identifier
|
||||
}
|
||||
}
|
||||
|
||||
output "dashboard_url" {
|
||||
description = "CloudWatch dashboard URL"
|
||||
value = "https://us-east-1.console.aws.amazon.com/cloudwatch/home#dashboards/dashboard/${var.project_name}-${var.environment}-dashboard"
|
||||
}
|
||||
Reference in New Issue
Block a user