Add Terraform AWS infrastructure and enhanced CI/CD pipeline (FRE-4574)
- Terraform modules: VPC, ECS Fargate, RDS PostgreSQL, ElastiCache Redis, S3, Secrets Manager, CloudWatch - Multi-environment support: staging and production configs - ECS auto-scaling: CPU-based scaling with configurable min/max - CI/CD: pnpm caching, Docker Buildx, Trivy security scanning, Terraform plan on PR - Deploy: ECS service updates with automatic rollback on health check failure - Backup: automated RDS snapshots, S3 versioning, ElastiCache snapshots - Monitoring: CloudWatch dashboards, CPU/memory/5xx alarms - Rollback script for manual service rollback - Infrastructure documentation with architecture overview
This commit is contained in:
78
.github/workflows/ci.yml
vendored
78
.github/workflows/ci.yml
vendored
@@ -24,11 +24,14 @@ jobs:
|
|||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: ${{ env.NODE_VERSION }}
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
cache: "npm"
|
cache: "pnpm"
|
||||||
|
- uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: ${{ env.PNPM_VERSION }}
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: npm ci
|
run: pnpm install --frozen-lockfile
|
||||||
- name: Run linter
|
- name: Run linter
|
||||||
run: npm run lint
|
run: pnpm lint
|
||||||
|
|
||||||
typecheck:
|
typecheck:
|
||||||
name: Type Check
|
name: Type Check
|
||||||
@@ -39,11 +42,14 @@ jobs:
|
|||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: ${{ env.NODE_VERSION }}
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
cache: "npm"
|
cache: "pnpm"
|
||||||
|
- uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: ${{ env.PNPM_VERSION }}
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: npm ci
|
run: pnpm install --frozen-lockfile
|
||||||
- name: Build all packages
|
- name: Build all packages
|
||||||
run: npm run build
|
run: pnpm build
|
||||||
|
|
||||||
test:
|
test:
|
||||||
name: Test Suite
|
name: Test Suite
|
||||||
@@ -77,15 +83,14 @@ jobs:
|
|||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: ${{ env.NODE_VERSION }}
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
cache: "npm"
|
cache: "pnpm"
|
||||||
|
- uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: ${{ env.PNPM_VERSION }}
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: npm ci
|
run: pnpm install --frozen-lockfile
|
||||||
- name: Generate Prisma client
|
|
||||||
run: npx prisma generate --schema=packages/db/prisma/schema.prisma
|
|
||||||
env:
|
|
||||||
DATABASE_URL: "postgresql://shieldai:shieldai_dev@localhost:5432/shieldai"
|
|
||||||
- name: Run tests with coverage
|
- name: Run tests with coverage
|
||||||
run: npm run test:coverage
|
run: pnpm test:coverage
|
||||||
env:
|
env:
|
||||||
DATABASE_URL: "postgresql://shieldai:shieldai_dev@localhost:5432/shieldai"
|
DATABASE_URL: "postgresql://shieldai:shieldai_dev@localhost:5432/shieldai"
|
||||||
REDIS_URL: "redis://localhost:6379"
|
REDIS_URL: "redis://localhost:6379"
|
||||||
@@ -100,8 +105,9 @@ jobs:
|
|||||||
docker-build:
|
docker-build:
|
||||||
name: Docker Build
|
name: Docker Build
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [lint, typecheck]
|
needs: [lint, typecheck, test]
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- name: api
|
- name: api
|
||||||
@@ -118,6 +124,8 @@ jobs:
|
|||||||
dockerfile: services/voiceprint/Dockerfile
|
dockerfile: services/voiceprint/Dockerfile
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
- name: Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
- name: Build Docker image
|
- name: Build Docker image
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
@@ -127,3 +135,45 @@ jobs:
|
|||||||
tags: shieldai-${{ matrix.name }}:${{ github.sha }}
|
tags: shieldai-${{ matrix.name }}:${{ github.sha }}
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
security-scan:
|
||||||
|
name: Security Scan
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [lint]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Run npm audit
|
||||||
|
run: pnpm audit --prod
|
||||||
|
continue-on-error: true
|
||||||
|
- name: Trivy filesystem scan
|
||||||
|
uses: aquasecurity/trivy-action@master
|
||||||
|
with:
|
||||||
|
scan-type: fs
|
||||||
|
scan-ref: "."
|
||||||
|
format: table
|
||||||
|
exit-code: 1
|
||||||
|
ignore-unfixed: true
|
||||||
|
severity: CRITICAL,HIGH
|
||||||
|
|
||||||
|
terraform-plan:
|
||||||
|
name: Terraform Plan
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [lint]
|
||||||
|
if: github.event_name == 'pull_request'
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Terraform Format
|
||||||
|
working-directory: infra
|
||||||
|
run: terraform fmt -check -diff
|
||||||
|
- name: Terraform Init
|
||||||
|
working-directory: infra
|
||||||
|
run: terraform init
|
||||||
|
- name: Terraform Validate
|
||||||
|
working-directory: infra
|
||||||
|
run: terraform validate
|
||||||
|
- name: Terraform Plan
|
||||||
|
working-directory: infra
|
||||||
|
run: terraform plan -var-file=environments/staging/terraform.tfvars.example -no-color
|
||||||
|
env:
|
||||||
|
TF_VAR_hibp_api_key: ${{ secrets.HIBP_API_KEY }}
|
||||||
|
TF_VAR_resend_api_key: ${{ secrets.RESEND_API_KEY }}
|
||||||
|
|||||||
192
.github/workflows/deploy.yml
vendored
192
.github/workflows/deploy.yml
vendored
@@ -12,6 +12,7 @@ concurrency:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
NODE_VERSION: "20"
|
NODE_VERSION: "20"
|
||||||
|
PNPM_VERSION: "9"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
detect-environment:
|
detect-environment:
|
||||||
@@ -19,6 +20,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
environment: ${{ steps.detect.outputs.environment }}
|
environment: ${{ steps.detect.outputs.environment }}
|
||||||
|
tag: ${{ steps.tag.outputs.tag }}
|
||||||
steps:
|
steps:
|
||||||
- name: Detect deployment target
|
- name: Detect deployment target
|
||||||
id: detect
|
id: detect
|
||||||
@@ -28,13 +30,59 @@ jobs:
|
|||||||
else
|
else
|
||||||
echo "environment=staging" >> $GITHUB_OUTPUT
|
echo "environment=staging" >> $GITHUB_OUTPUT
|
||||||
fi
|
fi
|
||||||
|
- name: Calculate tag
|
||||||
|
id: tag
|
||||||
|
run: |
|
||||||
|
if [ "${{ needs.detect-environment.outputs.environment }}" = "production" ]; then
|
||||||
|
echo "tag=${{ github.event.release.tag_name }}" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
echo "tag=${{ github.sha }}" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
terraform-apply:
|
||||||
|
name: Terraform Apply
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: detect-environment
|
||||||
|
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Setup Terraform
|
||||||
|
uses: hashicorp/setup-terraform@v3
|
||||||
|
with:
|
||||||
|
terraform_version: "~> 1.5"
|
||||||
|
- name: Terraform Init
|
||||||
|
working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
|
||||||
|
run: terraform init -backend-config="bucket=shieldai-${{ needs.detect-environment.outputs.environment }}-terraform-state"
|
||||||
|
- name: Terraform Plan
|
||||||
|
id: plan
|
||||||
|
working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
|
||||||
|
run: |
|
||||||
|
terraform plan \
|
||||||
|
-var="hibp_api_key=${{ secrets.HIBP_API_KEY }}" \
|
||||||
|
-var="resend_api_key=${{ secrets.RESEND_API_KEY }}" \
|
||||||
|
-var="sentry_dsn=${{ secrets.SENTRY_DSN }}" \
|
||||||
|
-var="datadog_api_key=${{ secrets.DATADOG_API_KEY }}" \
|
||||||
|
-no-color | tee /tmp/terraform-plan.out
|
||||||
|
- name: Terraform Apply
|
||||||
|
working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
|
||||||
|
run: |
|
||||||
|
terraform apply -auto-approve \
|
||||||
|
-var="hibp_api_key=${{ secrets.HIBP_API_KEY }}" \
|
||||||
|
-var="resend_api_key=${{ secrets.RESEND_API_KEY }}" \
|
||||||
|
-var="sentry_dsn=${{ secrets.SENTRY_DSN }}" \
|
||||||
|
-var="datadog_api_key=${{ secrets.DATADOG_API_KEY }}"
|
||||||
|
env:
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
|
AWS_DEFAULT_REGION: us-east-1
|
||||||
|
|
||||||
build-and-push:
|
build-and-push:
|
||||||
name: Build and Push Docker Images
|
name: Build and Push Docker Images
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: detect-environment
|
needs: [detect-environment]
|
||||||
environment: ${{ needs.detect-environment.outputs.environment }}
|
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- name: api
|
- name: api
|
||||||
@@ -47,6 +95,8 @@ jobs:
|
|||||||
dockerfile: services/voiceprint/Dockerfile
|
dockerfile: services/voiceprint/Dockerfile
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
- name: Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
- name: Login to Container Registry
|
- name: Login to Container Registry
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
@@ -55,47 +105,127 @@ jobs:
|
|||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- name: Calculate image tag
|
- name: Calculate image tag
|
||||||
id: tag
|
id: tag
|
||||||
run: |
|
run: echo "tag=${{ needs.detect-environment.outputs.tag }}" >> $GITHUB_OUTPUT
|
||||||
if [ "${{ needs.detect-environment.outputs.environment }}" = "production" ]; then
|
|
||||||
echo "tag=${{ github.event.release.tag_name }}" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
echo "tag=staging-${{ github.sha }}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
- name: Build and push ${{ matrix.name }}
|
- name: Build and push ${{ matrix.name }}
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ${{ matrix.dockerfile }}
|
file: ${{ matrix.dockerfile }}
|
||||||
push: true
|
push: true
|
||||||
tags: ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:${{ steps.tag.outputs.tag }}
|
tags: |
|
||||||
|
ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:${{ steps.tag.outputs.tag }}
|
||||||
|
ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:latest
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
deploy:
|
deploy-ecs:
|
||||||
name: Deploy to ${{ needs.detect-environment.outputs.environment }}
|
name: Deploy to ECS
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [detect-environment, build-and-push]
|
needs: [detect-environment, terraform-apply, build-and-push]
|
||||||
environment: ${{ needs.detect-environment.outputs.environment }}
|
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
service: [api, darkwatch, spamshield, voiceprint]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Calculate deployment tag
|
- name: Configure AWS
|
||||||
id: tag
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
run: |
|
|
||||||
if [ "${{ needs.detect-environment.outputs.environment }}" = "production" ]; then
|
|
||||||
echo "tag=${{ github.event.release.tag_name }}" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
echo "tag=staging-${{ github.sha }}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
- name: Deploy via Docker Compose
|
|
||||||
uses: appleboy/ssh-action@v1
|
|
||||||
with:
|
with:
|
||||||
host: ${{ secrets.DEPLOY_HOST }}
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||||
username: ${{ secrets.DEPLOY_USER }}
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
key: ${{ secrets.DEPLOY_SSH_KEY }}
|
aws-region: us-east-1
|
||||||
script: |
|
- name: Update ECS Service
|
||||||
cd /opt/shieldai
|
run: |
|
||||||
export DOCKER_TAG="${{ steps.tag.outputs.tag }}"
|
IMAGE="ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.service }}:${{ needs.detect-environment.outputs.tag }}"
|
||||||
export ENVIRONMENT="${{ needs.detect-environment.outputs.environment }}"
|
CLUSTER="shieldai-${{ needs.detect-environment.outputs.environment }}"
|
||||||
docker compose pull
|
SERVICE="${{ matrix.service }}"
|
||||||
docker compose up -d
|
|
||||||
docker image prune -f
|
TASK_DEF=$(aws ecs describe-task-definition \
|
||||||
|
--task-definition "${CLUSTER}-${SERVICE}" \
|
||||||
|
--query 'taskDefinition' --output json)
|
||||||
|
|
||||||
|
NEW_TASK_DEF=$(echo "$TASK_DEF" | jq \
|
||||||
|
--arg image "$IMAGE" \
|
||||||
|
'.containerDefinitions[0].image = $image')
|
||||||
|
|
||||||
|
NEW_TASK_DEF_ARN=$(echo "$NEW_TASK_DEF" | \
|
||||||
|
aws ecs register-task-definition \
|
||||||
|
--family "${CLUSTER}-${SERVICE}" \
|
||||||
|
--cli-input-json - \
|
||||||
|
--query 'taskDefinition.taskDefinitionArn' --output text)
|
||||||
|
|
||||||
|
aws ecs update-service \
|
||||||
|
--cluster "$CLUSTER" \
|
||||||
|
--service "${CLUSTER}-${SERVICE}" \
|
||||||
|
--task-definition "$NEW_TASK_DEF_ARN" \
|
||||||
|
--force-new-deployment
|
||||||
|
|
||||||
|
echo "Deployed $IMAGE to $SERVICE"
|
||||||
|
|
||||||
|
health-check:
|
||||||
|
name: Post-Deploy Health Check
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [detect-environment, deploy-ecs]
|
||||||
|
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||||
|
steps:
|
||||||
|
- name: Wait for deployment
|
||||||
|
run: sleep 30
|
||||||
|
- name: Health Check
|
||||||
|
uses: jasongd/retry-action@v2
|
||||||
|
with:
|
||||||
|
timeout-minutes: 5
|
||||||
|
retry-minutes: 10
|
||||||
|
command: |
|
||||||
|
ALB_DNS=$(aws ecs describe-services \
|
||||||
|
--cluster "shieldai-${{ needs.detect-environment.outputs.environment }}" \
|
||||||
|
--services "shieldai-${{ needs.detect-environment.outputs.environment }}-api" \
|
||||||
|
--query 'services[0].loadBalancers[0].targetGroupArn' --output text)
|
||||||
|
|
||||||
|
for service in api darkwatch spamshield voiceprint; do
|
||||||
|
PORT=$(case $service in
|
||||||
|
api) echo 3000;;
|
||||||
|
darkwatch) echo 3001;;
|
||||||
|
spamshield) echo 3002;;
|
||||||
|
voiceprint) echo 3003;;
|
||||||
|
esac)
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||||
|
"https://shieldai-${{ needs.detect-environment.outputs.environment }}-alb.us-east-1.elb.amazonaws.com/health" || true)
|
||||||
|
|
||||||
|
if [ "$HTTP_CODE" = "200" ]; then
|
||||||
|
echo "Health check passed: $service"
|
||||||
|
else
|
||||||
|
echo "Health check failed: $service (HTTP $HTTP_CODE)"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
rollback:
|
||||||
|
name: Rollback on Failure
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [detect-environment, deploy-ecs, health-check]
|
||||||
|
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||||
|
if: failure() && needs.health-check.result == 'failure'
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
service: [api, darkwatch, spamshield, voiceprint]
|
||||||
|
steps:
|
||||||
|
- name: Configure AWS
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||||
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
|
aws-region: us-east-1
|
||||||
|
- name: Rollback ECS Service
|
||||||
|
run: |
|
||||||
|
CLUSTER="shieldai-${{ needs.detect-environment.outputs.environment }}"
|
||||||
|
SERVICE="${{ matrix.service }}"
|
||||||
|
|
||||||
|
aws ecs update-service \
|
||||||
|
--cluster "$CLUSTER" \
|
||||||
|
--service "${CLUSTER}-${SERVICE}" \
|
||||||
|
--rollback \
|
||||||
|
--no-cli-auto-prompt
|
||||||
|
|
||||||
|
echo "Rolled back $SERVICE"
|
||||||
|
|||||||
9
infra/.gitignore
vendored
Normal file
9
infra/.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
.terraform/
|
||||||
|
*.tfstate
|
||||||
|
*.tfstate.backup
|
||||||
|
*.tfvars
|
||||||
|
.terraform.lock.hcl
|
||||||
|
override.tf
|
||||||
|
override.tf.json
|
||||||
|
*_override.tf
|
||||||
|
*_override.tf.json
|
||||||
114
infra/README.md
Normal file
114
infra/README.md
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
/infra/
|
||||||
|
├── main.tf # Root module: VPC, ECS, RDS, ElastiCache, S3, Secrets, CloudWatch
|
||||||
|
├── variables.tf # Input variables with validation
|
||||||
|
├── outputs.tf # Output values (endpoints, ARNs, URLs)
|
||||||
|
├── modules/
|
||||||
|
│ ├── vpc/main.tf # VPC, subnets, IGW, NAT GW, security groups
|
||||||
|
│ ├── ecs/main.tf # ECS cluster, task definitions, services, ALB, auto-scaling
|
||||||
|
│ ├── rds/main.tf # RDS PostgreSQL with automated backups
|
||||||
|
│ ├── elasticache/main.tf # ElastiCache Redis with replication
|
||||||
|
│ ├── s3/main.tf # S3 buckets: state, artifacts, logs
|
||||||
|
│ ├── secrets/main.tf # AWS Secrets Manager
|
||||||
|
│ └── cloudwatch/main.tf # Dashboards, alarms, notifications
|
||||||
|
├── environments/
|
||||||
|
│ ├── staging/main.tf # Staging environment config
|
||||||
|
│ └── production/main.tf # Production environment config
|
||||||
|
└── scripts/
|
||||||
|
└── rollback.sh # Manual rollback script
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
- Terraform >= 1.5.0
|
||||||
|
- AWS CLI configured with appropriate credentials
|
||||||
|
- AWS account with ECS, RDS, ElastiCache permissions
|
||||||
|
|
||||||
|
### Initialize
|
||||||
|
```bash
|
||||||
|
cd infra/environments/staging
|
||||||
|
terraform init
|
||||||
|
terraform plan -var-file=terraform.tfvars.example
|
||||||
|
terraform apply -var-file=terraform.tfvars.example
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deploy via CI/CD
|
||||||
|
- Push to `main` → deploys to staging
|
||||||
|
- Create a release → deploys to production
|
||||||
|
- Health check failure → automatic rollback
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Networking
|
||||||
|
- VPC with public/private subnets across multiple AZs
|
||||||
|
- NAT Gateway for outbound traffic from private subnets
|
||||||
|
- Security groups: ECS → RDS (5432), ECS → ElastiCache (6379)
|
||||||
|
|
||||||
|
### Compute
|
||||||
|
- ECS Fargate for serverless container orchestration
|
||||||
|
- Application Load Balancer with health checks
|
||||||
|
- Auto-scaling: CPU-based scaling (70% target)
|
||||||
|
- Production: 3 replicas per service, min 2, max 10
|
||||||
|
|
||||||
|
### Data
|
||||||
|
- RDS PostgreSQL 16.2 with Multi-AZ (production)
|
||||||
|
- Automated daily backups, 7-14 day retention
|
||||||
|
- ElastiCache Redis 7.0 with replication
|
||||||
|
- S3 with versioning and lifecycle policies
|
||||||
|
|
||||||
|
### Secrets
|
||||||
|
- AWS Secrets Manager for all credentials
|
||||||
|
- ECS task execution role with SecretsManagerReadOnly
|
||||||
|
- DB credentials auto-rotated via RDS integration
|
||||||
|
|
||||||
|
### Monitoring
|
||||||
|
- CloudWatch dashboards: CPU, memory, ALB metrics
|
||||||
|
- Alarms: CPU >80%, memory >85%, 5xx >10/min, RDS storage <500MB
|
||||||
|
- Container Insights enabled for ECS
|
||||||
|
- Logs: 30-day retention (production), 7-day (staging)
|
||||||
|
|
||||||
|
### Backup Strategy
|
||||||
|
- RDS: automated snapshots every 24h, 7-14 day retention
|
||||||
|
- RDS: Multi-AZ for automatic failover (production)
|
||||||
|
- ElastiCache: daily snapshots, 1-7 day retention
|
||||||
|
- S3: versioning enabled, non-current versions expire after 30 days
|
||||||
|
- Terraform state: S3 with versioning + DynamoDB locking
|
||||||
|
|
||||||
|
## Rollback
|
||||||
|
|
||||||
|
### Automatic (CI/CD)
|
||||||
|
The deploy workflow triggers automatic rollback when health checks fail:
|
||||||
|
```
|
||||||
|
deploy-ecs → health-check (failure) → rollback
|
||||||
|
```
|
||||||
|
|
||||||
|
### Manual
|
||||||
|
```bash
|
||||||
|
# Rollback specific service
|
||||||
|
cd infra/scripts
|
||||||
|
./rollback.sh staging api
|
||||||
|
|
||||||
|
# Rollback all services
|
||||||
|
./rollback.sh staging all
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Migration Rollback
|
||||||
|
```bash
|
||||||
|
# Run previous migration
|
||||||
|
DATABASE_URL=$(aws secretsmanager get-secret-value \
|
||||||
|
--secret-id shieldai-staging-db-password \
|
||||||
|
--query 'SecretString' --output json | jq -r '.host')
|
||||||
|
|
||||||
|
npx prisma migrate resolve --applied <migration_name>
|
||||||
|
npx prisma migrate deploy
|
||||||
|
```
|
||||||
|
|
||||||
|
## GitHub Secrets Required
|
||||||
|
| Secret | Description |
|
||||||
|
|--------|-------------|
|
||||||
|
| AWS_ACCESS_KEY_ID | IAM user with ECS, RDS, ElastiCache permissions |
|
||||||
|
| AWS_SECRET_ACCESS_KEY | IAM secret key |
|
||||||
|
| HIBP_API_KEY | Have I Been Pwned API key |
|
||||||
|
| RESEND_API_KEY | Resend email API key |
|
||||||
|
| SENTRY_DSN | Sentry error tracking DSN |
|
||||||
|
| DATADOG_API_KEY | Datadog monitoring API key |
|
||||||
|
| GITHUB_TOKEN | Auto-provided, needs write:packages scope |
|
||||||
57
infra/environments/production/main.tf
Normal file
57
infra/environments/production/main.tf
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
terraform {
|
||||||
|
backend "s3" {
|
||||||
|
bucket = "shieldai-production-terraform-state"
|
||||||
|
key = "production/terraform.tfstate"
|
||||||
|
region = "us-east-1"
|
||||||
|
encrypt = true
|
||||||
|
dynamodb_table = "shieldai-terraform-locks"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module "shieldai" {
|
||||||
|
source = "../.."
|
||||||
|
|
||||||
|
environment = "production"
|
||||||
|
aws_region = "us-east-1"
|
||||||
|
project_name = "shieldai"
|
||||||
|
vpc_cidr = "10.1.0.0/16"
|
||||||
|
az_count = 3
|
||||||
|
|
||||||
|
db_instance_class = "db.r6g.large"
|
||||||
|
db_multi_az = true
|
||||||
|
db_backup_retention = 14
|
||||||
|
|
||||||
|
elasticache_node_type = "cache.r6g.large"
|
||||||
|
elasticache_num_nodes = 3
|
||||||
|
|
||||||
|
secrets = {
|
||||||
|
HIBP_API_KEY = var.hibp_api_key
|
||||||
|
RESEND_API_KEY = var.resend_api_key
|
||||||
|
SENTRY_DSN = var.sentry_dsn
|
||||||
|
DATADOG_API_KEY = var.datadog_api_key
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "hibp_api_key" {
|
||||||
|
description = "Have I Been Pwned API key"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "resend_api_key" {
|
||||||
|
description = "Resend API key"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "sentry_dsn" {
|
||||||
|
description = "Sentry DSN"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "datadog_api_key" {
|
||||||
|
description = "Datadog API key"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
4
infra/environments/production/terraform.tfvars.example
Normal file
4
infra/environments/production/terraform.tfvars.example
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
hibp_api_key = "YOUR_HIBP_API_KEY"
|
||||||
|
resend_api_key = "YOUR_RESEND_API_KEY"
|
||||||
|
sentry_dsn = "YOUR_SENTRY_DSN"
|
||||||
|
datadog_api_key = "YOUR_DATADOG_API_KEY"
|
||||||
57
infra/environments/staging/main.tf
Normal file
57
infra/environments/staging/main.tf
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
terraform {
|
||||||
|
backend "s3" {
|
||||||
|
bucket = "shieldai-staging-terraform-state"
|
||||||
|
key = "staging/terraform.tfstate"
|
||||||
|
region = "us-east-1"
|
||||||
|
encrypt = true
|
||||||
|
dynamodb_table = "shieldai-terraform-locks"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module "shieldai" {
|
||||||
|
source = "../.."
|
||||||
|
|
||||||
|
environment = "staging"
|
||||||
|
aws_region = "us-east-1"
|
||||||
|
project_name = "shieldai"
|
||||||
|
vpc_cidr = "10.0.0.0/16"
|
||||||
|
az_count = 2
|
||||||
|
|
||||||
|
db_instance_class = "db.t3.medium"
|
||||||
|
db_multi_az = false
|
||||||
|
db_backup_retention = 3
|
||||||
|
|
||||||
|
elasticache_node_type = "cache.t3.small"
|
||||||
|
elasticache_num_nodes = 1
|
||||||
|
|
||||||
|
secrets = {
|
||||||
|
HIBP_API_KEY = var.hibp_api_key
|
||||||
|
RESEND_API_KEY = var.resend_api_key
|
||||||
|
SENTRY_DSN = var.sentry_dsn
|
||||||
|
DATADOG_API_KEY = var.datadog_api_key
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "hibp_api_key" {
|
||||||
|
description = "Have I Been Pwned API key"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "resend_api_key" {
|
||||||
|
description = "Resend API key"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "sentry_dsn" {
|
||||||
|
description = "Sentry DSN"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "datadog_api_key" {
|
||||||
|
description = "Datadog API key"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
4
infra/environments/staging/terraform.tfvars.example
Normal file
4
infra/environments/staging/terraform.tfvars.example
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
hibp_api_key = "YOUR_HIBP_API_KEY"
|
||||||
|
resend_api_key = "YOUR_RESEND_API_KEY"
|
||||||
|
sentry_dsn = "YOUR_SENTRY_DSN"
|
||||||
|
datadog_api_key = "YOUR_DATADOG_API_KEY"
|
||||||
107
infra/main.tf
Normal file
107
infra/main.tf
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
terraform {
|
||||||
|
required_version = ">= 1.5.0"
|
||||||
|
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = "~> 5.30"
|
||||||
|
}
|
||||||
|
github = {
|
||||||
|
source = "integrations/github"
|
||||||
|
version = "~> 6.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
backend "s3" {
|
||||||
|
bucket = "shieldai-terraform-state"
|
||||||
|
key = "global/terraform.tfstate"
|
||||||
|
region = "us-east-1"
|
||||||
|
encrypt = true
|
||||||
|
dynamodb_table = "shieldai-terraform-locks"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
provider "aws" {
|
||||||
|
region = var.aws_region
|
||||||
|
|
||||||
|
default_tags {
|
||||||
|
tags = {
|
||||||
|
Project = "ShieldAI"
|
||||||
|
ManagedBy = "terraform"
|
||||||
|
Environment = var.environment
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module "vpc" {
|
||||||
|
source = "./modules/vpc"
|
||||||
|
|
||||||
|
environment = var.environment
|
||||||
|
vpc_cidr = var.vpc_cidr
|
||||||
|
az_count = var.az_count
|
||||||
|
project_name = var.project_name
|
||||||
|
}
|
||||||
|
|
||||||
|
module "ecs" {
|
||||||
|
source = "./modules/ecs"
|
||||||
|
|
||||||
|
environment = var.environment
|
||||||
|
cluster_name = "${var.project_name}-${var.environment}"
|
||||||
|
vpc_id = module.vpc.vpc_id
|
||||||
|
subnet_ids = module.vpc.private_subnet_ids
|
||||||
|
security_group_ids = [module.vpc.ecs_security_group_id]
|
||||||
|
services = var.services
|
||||||
|
container_images = var.container_images
|
||||||
|
secrets_arn = module.secrets.secrets_manager_arn
|
||||||
|
}
|
||||||
|
|
||||||
|
module "rds" {
|
||||||
|
source = "./modules/rds"
|
||||||
|
|
||||||
|
environment = var.environment
|
||||||
|
vpc_id = module.vpc.vpc_id
|
||||||
|
subnet_ids = module.vpc.private_subnet_ids
|
||||||
|
security_group_id = module.vpc.rds_security_group_id
|
||||||
|
db_name = var.db_name
|
||||||
|
db_instance_class = var.db_instance_class
|
||||||
|
multi_az = var.db_multi_az
|
||||||
|
backup_retention = var.db_backup_retention
|
||||||
|
project_name = var.project_name
|
||||||
|
}
|
||||||
|
|
||||||
|
module "elasticache" {
|
||||||
|
source = "./modules/elasticache"
|
||||||
|
|
||||||
|
environment = var.environment
|
||||||
|
vpc_id = module.vpc.vpc_id
|
||||||
|
subnet_ids = module.vpc.private_subnet_ids
|
||||||
|
security_group_id = module.vpc.elasticache_security_group_id
|
||||||
|
node_type = var.elasticache_node_type
|
||||||
|
num_nodes = var.elasticache_num_nodes
|
||||||
|
project_name = var.project_name
|
||||||
|
}
|
||||||
|
|
||||||
|
module "s3" {
|
||||||
|
source = "./modules/s3"
|
||||||
|
|
||||||
|
environment = var.environment
|
||||||
|
project_name = var.project_name
|
||||||
|
}
|
||||||
|
|
||||||
|
module "secrets" {
|
||||||
|
source = "./modules/secrets"
|
||||||
|
|
||||||
|
environment = var.environment
|
||||||
|
project_name = var.project_name
|
||||||
|
secrets = var.secrets
|
||||||
|
}
|
||||||
|
|
||||||
|
module "cloudwatch" {
|
||||||
|
source = "./modules/cloudwatch"
|
||||||
|
|
||||||
|
environment = var.environment
|
||||||
|
cluster_name = "${var.project_name}-${var.environment}"
|
||||||
|
project_name = var.project_name
|
||||||
|
rds_identifier = module.rds.db_instance_identifier
|
||||||
|
cache_endpoint = module.elasticache.cache_endpoint
|
||||||
|
}
|
||||||
183
infra/modules/cloudwatch/main.tf
Normal file
183
infra/modules/cloudwatch/main.tf
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
variable "environment" {
|
||||||
|
description = "Deployment environment"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cluster_name" {
|
||||||
|
description = "ECS cluster name"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "project_name" {
|
||||||
|
description = "Project name"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "rds_identifier" {
|
||||||
|
description = "RDS instance identifier"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cache_endpoint" {
|
||||||
|
description = "ElastiCache endpoint"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_dashboard" "main" {
|
||||||
|
dashboard_name = "${var.project_name}-${var.environment}-dashboard"
|
||||||
|
|
||||||
|
dashboard_body = jsonencode({
|
||||||
|
widgets = [
|
||||||
|
{
|
||||||
|
type = "metric"
|
||||||
|
properties = {
|
||||||
|
title = "ECS CPU Utilization"
|
||||||
|
metrics = [
|
||||||
|
["AWS/ECS", "CPUUtilization", "ClusterName", var.cluster_name]
|
||||||
|
]
|
||||||
|
view = "timeSeries"
|
||||||
|
stacked = false
|
||||||
|
region = "us-east-1"
|
||||||
|
period = 300
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type = "metric"
|
||||||
|
properties = {
|
||||||
|
title = "ECS Memory Utilization"
|
||||||
|
metrics = [
|
||||||
|
["AWS/ECS", "MemoryUtilization", "ClusterName", var.cluster_name]
|
||||||
|
]
|
||||||
|
view = "timeSeries"
|
||||||
|
stacked = false
|
||||||
|
region = "us-east-1"
|
||||||
|
period = 300
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type = "metric"
|
||||||
|
properties = {
|
||||||
|
title = "RDS CPU Utilization"
|
||||||
|
metrics = [
|
||||||
|
["AWS/RDS", "CPUUtilization", "DBInstanceIdentifier", var.rds_identifier]
|
||||||
|
]
|
||||||
|
view = "timeSeries"
|
||||||
|
stacked = false
|
||||||
|
region = "us-east-1"
|
||||||
|
period = 300
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type = "metric"
|
||||||
|
properties = {
|
||||||
|
title = "ALB Request Count"
|
||||||
|
metrics = [
|
||||||
|
["AWS/ApplicationELB", "RequestCount", "LoadBalancer", "${var.cluster_name}-alb"]
|
||||||
|
]
|
||||||
|
view = "timeSeries"
|
||||||
|
stacked = false
|
||||||
|
region = "us-east-1"
|
||||||
|
period = 60
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type = "metric"
|
||||||
|
properties = {
|
||||||
|
title = "ALB 5xx Errors"
|
||||||
|
metrics = [
|
||||||
|
["AWS/ApplicationELB", "HTTPCode_Elb_5XX_Count", "LoadBalancer", "${var.cluster_name}-alb"]
|
||||||
|
]
|
||||||
|
view = "timeSeries"
|
||||||
|
stacked = false
|
||||||
|
region = "us-east-1"
|
||||||
|
period = 60
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_metric_alarm" "ecs_cpu_high" {
|
||||||
|
alarm_name = "${var.project_name}-${var.environment}-ecs-cpu-high"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = 2
|
||||||
|
metric_name = "CPUUtilization"
|
||||||
|
namespace = "AWS/ECS"
|
||||||
|
period = 300
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = 80
|
||||||
|
alarm_description = "ECS CPU utilization above 80%"
|
||||||
|
|
||||||
|
dimensions = {
|
||||||
|
ClusterName = var.cluster_name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_metric_alarm" "ecs_memory_high" {
|
||||||
|
alarm_name = "${var.project_name}-${var.environment}-ecs-memory-high"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = 2
|
||||||
|
metric_name = "MemoryUtilization"
|
||||||
|
namespace = "AWS/ECS"
|
||||||
|
period = 300
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = 85
|
||||||
|
alarm_description = "ECS memory utilization above 85%"
|
||||||
|
|
||||||
|
dimensions = {
|
||||||
|
ClusterName = var.cluster_name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_metric_alarm" "alb_5xx" {
|
||||||
|
alarm_name = "${var.project_name}-${var.environment}-alb-5xx"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = 3
|
||||||
|
metric_name = "HTTPCode_Elb_5XX_Count"
|
||||||
|
namespace = "AWS/ApplicationELB"
|
||||||
|
period = 60
|
||||||
|
statistic = "Sum"
|
||||||
|
threshold = 10
|
||||||
|
alarm_description = "ALB 5xx errors above 10 per minute"
|
||||||
|
|
||||||
|
dimensions = {
|
||||||
|
LoadBalancer = "${var.cluster_name}-alb"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_metric_alarm" "rds_cpu_high" {
|
||||||
|
alarm_name = "${var.project_name}-${var.environment}-rds-cpu-high"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = 2
|
||||||
|
metric_name = "CPUUtilization"
|
||||||
|
namespace = "AWS/RDS"
|
||||||
|
period = 300
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = 75
|
||||||
|
alarm_description = "RDS CPU utilization above 75%"
|
||||||
|
|
||||||
|
dimensions = {
|
||||||
|
DBInstanceIdentifier = var.rds_identifier
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_metric_alarm" "rds_free_storage" {
|
||||||
|
alarm_name = "${var.project_name}-${var.environment}-rds-free-storage"
|
||||||
|
comparison_operator = "LessThanThreshold"
|
||||||
|
evaluation_periods = 2
|
||||||
|
metric_name = "FreeStorageSpace"
|
||||||
|
namespace = "AWS/RDS"
|
||||||
|
period = 300
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = 524288000
|
||||||
|
alarm_description = "RDS free storage below 500MB"
|
||||||
|
|
||||||
|
dimensions = {
|
||||||
|
DBInstanceIdentifier = var.rds_identifier
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output "dashboard_url" {
|
||||||
|
description = "CloudWatch dashboard URL"
|
||||||
|
value = "https://us-east-1.console.aws.amazon.com/cloudwatch/home#dashboards/dashboard/${var.project_name}-${var.environment}-dashboard"
|
||||||
|
}
|
||||||
355
infra/modules/ecs/main.tf
Normal file
355
infra/modules/ecs/main.tf
Normal file
@@ -0,0 +1,355 @@
|
|||||||
|
variable "environment" {
|
||||||
|
description = "Deployment environment"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cluster_name" {
|
||||||
|
description = "ECS cluster name"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vpc_id" {
|
||||||
|
description = "VPC ID"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "subnet_ids" {
|
||||||
|
description = "Private subnet IDs"
|
||||||
|
type = list(string)
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "security_group_ids" {
|
||||||
|
description = "Security group IDs"
|
||||||
|
type = list(string)
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "services" {
|
||||||
|
description = "ECS services to deploy"
|
||||||
|
type = map(object({
|
||||||
|
cpu = number
|
||||||
|
memory = number
|
||||||
|
port = number
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "container_images" {
|
||||||
|
description = "Container image tags"
|
||||||
|
type = map(string)
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "secrets_arn" {
|
||||||
|
description = "Secrets Manager ARN"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_ecs_cluster" "main" {
|
||||||
|
name = var.cluster_name
|
||||||
|
|
||||||
|
settings {
|
||||||
|
name = "containerInsights"
|
||||||
|
value = "enabled"
|
||||||
|
}
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = var.cluster_name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_ecs_cluster_capacity_providers" "main" {
|
||||||
|
cluster_name = aws_ecs_cluster.main.name
|
||||||
|
|
||||||
|
capacity_providers = ["FARGATE"]
|
||||||
|
|
||||||
|
default_capacity_provider_strategy {
|
||||||
|
base = 1
|
||||||
|
weight = 100
|
||||||
|
capacity_provider = "FARGATE"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_ecs_task_definition" "services" {
|
||||||
|
for_each = var.services
|
||||||
|
|
||||||
|
family = "${var.cluster_name}-${each.key}"
|
||||||
|
|
||||||
|
container_definitions = jsonencode([
|
||||||
|
{
|
||||||
|
name = each.key
|
||||||
|
image = "ghcr.io/shieldai/shieldai-${each.key}:${var.container_images[each.key]}"
|
||||||
|
cpu = each.cpu
|
||||||
|
memory = each.memory
|
||||||
|
essential = true
|
||||||
|
|
||||||
|
portMappings = [
|
||||||
|
{
|
||||||
|
containerPort = each.port
|
||||||
|
hostPort = each.port
|
||||||
|
protocol = "tcp"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
environment = [
|
||||||
|
{
|
||||||
|
name = "NODE_ENV"
|
||||||
|
value = var.environment
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name = "PORT"
|
||||||
|
value = tostring(each.port)
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
secrets = [
|
||||||
|
{
|
||||||
|
name = "DATABASE_URL"
|
||||||
|
valueFrom = "${var.secrets_arn}:DATABASE_URL::"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name = "REDIS_URL"
|
||||||
|
valueFrom = "${var.secrets_arn}:REDIS_URL::"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name = "HIBP_API_KEY"
|
||||||
|
valueFrom = "${var.secrets_arn}:HIBP_API_KEY::"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name = "RESEND_API_KEY"
|
||||||
|
valueFrom = "${var.secrets_arn}:RESEND_API_KEY::"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
logConfiguration = {
|
||||||
|
logDriver = "awslogs"
|
||||||
|
options = {
|
||||||
|
"awslogs-group" = "/ecs/${var.cluster_name}-${each.key}"
|
||||||
|
"awslogs-region" = "us-east-1"
|
||||||
|
"awslogs-stream-prefix" = each.key
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
healthCheck = {
|
||||||
|
command = ["CMD-SHELL", "wget -q --spider http://localhost:${each.port}/health || exit 1"]
|
||||||
|
interval = 30
|
||||||
|
timeout = 5
|
||||||
|
retries = 3
|
||||||
|
startPeriod = 60
|
||||||
|
}
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
network_mode = "awsvpc"
|
||||||
|
memory = each.memory
|
||||||
|
cpu = each.cpu
|
||||||
|
requires_compatibilities = ["FARGATE"]
|
||||||
|
|
||||||
|
execution_role_arn = aws_iam_role.execution[each.key].arn
|
||||||
|
task_role_arn = aws_iam_role.task[each.key].arn
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.cluster_name}-${each.key}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_iam_role" "execution" {
|
||||||
|
for_each = var.services
|
||||||
|
|
||||||
|
name = "${var.cluster_name}-${each.key}-execution"
|
||||||
|
|
||||||
|
assume_role_policy = jsonencode({
|
||||||
|
Version = "2012-10-17"
|
||||||
|
Statement = [
|
||||||
|
{
|
||||||
|
Action = "sts:AssumeRole"
|
||||||
|
Effect = "Allow"
|
||||||
|
Principal = {
|
||||||
|
Service = "ecs-tasks.amazonaws.com"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
managed_policy_arns = [
|
||||||
|
"arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_iam_role" "task" {
|
||||||
|
for_each = var.services
|
||||||
|
|
||||||
|
name = "${var.cluster_name}-${each.key}-task"
|
||||||
|
|
||||||
|
assume_role_policy = jsonencode({
|
||||||
|
Version = "2012-10-17"
|
||||||
|
Statement = [
|
||||||
|
{
|
||||||
|
Action = "sts:AssumeRole"
|
||||||
|
Effect = "Allow"
|
||||||
|
Principal = {
|
||||||
|
Service = "ecs-tasks.amazonaws.com"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
managed_policy_arns = [
|
||||||
|
"arn:aws:iam::aws:policy/SecretsManagerReadOnly"
|
||||||
|
]
|
||||||
|
|
||||||
|
inline_policy {
|
||||||
|
name = "elasticache-access"
|
||||||
|
policy = jsonencode({
|
||||||
|
Version = "2012-10-17"
|
||||||
|
Statement = [
|
||||||
|
{
|
||||||
|
Effect = "Allow"
|
||||||
|
Action = [
|
||||||
|
"elasticache:DescribeCacheClusters",
|
||||||
|
"elasticache:DescribeCacheSubnetGroups"
|
||||||
|
]
|
||||||
|
Resource = "*"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_ecs_service" "services" {
|
||||||
|
for_each = var.services
|
||||||
|
|
||||||
|
name = "${var.cluster_name}-${each.key}"
|
||||||
|
cluster = aws_ecs_cluster.main.id
|
||||||
|
task_definition = aws_ecs_task_definition.services[each.key].arn
|
||||||
|
desired_count = var.environment == "production" ? 3 : 1
|
||||||
|
|
||||||
|
launch_desired_count = "FARGATE"
|
||||||
|
|
||||||
|
network_configuration {
|
||||||
|
subnets = var.subnet_ids
|
||||||
|
security_groups = var.security_group_ids
|
||||||
|
assign_public_ip = false
|
||||||
|
}
|
||||||
|
|
||||||
|
load_balancer {
|
||||||
|
target_group_arn = aws_lb_target_group.services[each.key].arn
|
||||||
|
container_name = each.key
|
||||||
|
container_port = each.port
|
||||||
|
}
|
||||||
|
|
||||||
|
auto_scaling {
|
||||||
|
max_capacity = var.environment == "production" ? 10 : 3
|
||||||
|
min_capacity = var.environment == "production" ? 2 : 1
|
||||||
|
}
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.cluster_name}-${each.key}"
|
||||||
|
Service = each.key
|
||||||
|
}
|
||||||
|
|
||||||
|
depends_on = [
|
||||||
|
aws_lb_listener.services
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_lb" "main" {
|
||||||
|
name = "${var.cluster_name}-alb"
|
||||||
|
internal = false
|
||||||
|
load_balancer_type = "application"
|
||||||
|
security_groups = var.security_group_ids
|
||||||
|
subnets = var.subnet_ids
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.cluster_name}-alb"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_lb_target_group" "services" {
|
||||||
|
for_each = var.services
|
||||||
|
|
||||||
|
name = "${var.cluster_name}-${each.key}-tg"
|
||||||
|
port = each.port
|
||||||
|
protocol = "HTTP"
|
||||||
|
vpc_id = var.vpc_id
|
||||||
|
|
||||||
|
health_check {
|
||||||
|
enabled = true
|
||||||
|
healthy_threshold = 3
|
||||||
|
interval = 30
|
||||||
|
matcher = "200"
|
||||||
|
path = "/health"
|
||||||
|
port = "traffic-port"
|
||||||
|
protocol = "HTTP"
|
||||||
|
timeout = 5
|
||||||
|
unhealthy_threshold = 3
|
||||||
|
}
|
||||||
|
|
||||||
|
stickiness {
|
||||||
|
type = "lb_cookie"
|
||||||
|
cookie_duration = 86400
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_lb_listener" "services" {
|
||||||
|
for_each = var.services
|
||||||
|
|
||||||
|
load_balancer_arn = aws_lb.main.arn
|
||||||
|
port = 80
|
||||||
|
protocol = "HTTP"
|
||||||
|
|
||||||
|
default_action {
|
||||||
|
type = "forward"
|
||||||
|
target_group_arn = aws_lb_target_group.services[each.key].arn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_appautoscaling_target" "services" {
|
||||||
|
for_each = var.services
|
||||||
|
|
||||||
|
service_namespace = "ecs"
|
||||||
|
resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.services[each.key].name}"
|
||||||
|
scalable_dimension = "ecs:service:DesiredCount"
|
||||||
|
min_capacity = var.environment == "production" ? 2 : 1
|
||||||
|
max_capacity = var.environment == "production" ? 10 : 3
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_appautoscaling_policy" "cpu" {
|
||||||
|
for_each = var.services
|
||||||
|
|
||||||
|
name = "${var.cluster_name}-${each.key}-cpu-scaling"
|
||||||
|
service_namespace = "ecs"
|
||||||
|
resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.services[each.key].name}"
|
||||||
|
scalable_dimension = "ecs:service:DesiredCount"
|
||||||
|
|
||||||
|
target_tracking_scaling_policy_configuration {
|
||||||
|
target_value = 70.0
|
||||||
|
scale_in_cooldown = 60
|
||||||
|
scale_out_cooldown = 30
|
||||||
|
|
||||||
|
customized_metric_specification {
|
||||||
|
metric_name = "CPUUtilization"
|
||||||
|
namespace = "AWS/ECS"
|
||||||
|
statistic = "Average"
|
||||||
|
dimensions = [{ name = "ClusterName", value = aws_ecs_cluster.main.name }]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_log_group" "services" {
|
||||||
|
for_each = var.services
|
||||||
|
|
||||||
|
name = "/ecs/${var.cluster_name}-${each.key}"
|
||||||
|
retention_in_days = var.environment == "production" ? 30 : 7
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.cluster_name}-${each.key}-logs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output "cluster_arn" {
|
||||||
|
description = "ECS cluster ARN"
|
||||||
|
value = aws_ecs_cluster.main.arn
|
||||||
|
}
|
||||||
|
|
||||||
|
output "alb_dns_name" {
|
||||||
|
description = "ALB DNS name"
|
||||||
|
value = aws_lb.main.dns_name
|
||||||
|
}
|
||||||
80
infra/modules/elasticache/main.tf
Normal file
80
infra/modules/elasticache/main.tf
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
variable "environment" {
|
||||||
|
description = "Deployment environment"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vpc_id" {
|
||||||
|
description = "VPC ID"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "subnet_ids" {
|
||||||
|
description = "Private subnet IDs"
|
||||||
|
type = list(string)
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "security_group_id" {
|
||||||
|
description = "ElastiCache security group ID"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "node_type" {
|
||||||
|
description = "Cache node type"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "num_nodes" {
|
||||||
|
description = "Number of cache nodes"
|
||||||
|
type = number
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "project_name" {
|
||||||
|
description = "Project name"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_elasticache_subnet_group" "main" {
|
||||||
|
name = "${var.project_name}-${var.environment}-redis-subnet"
|
||||||
|
subnet_ids = var.subnet_ids
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-redis-subnet"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_elasticache_replication_group" "main" {
|
||||||
|
replication_group_id = "${var.project_name}-${var.environment}-redis"
|
||||||
|
description = "${var.project_name} Redis cluster (${var.environment})"
|
||||||
|
|
||||||
|
node_type = var.node_type
|
||||||
|
num_cache_clusters = var.num_nodes
|
||||||
|
engine = "redis"
|
||||||
|
engine_version = "7.0"
|
||||||
|
|
||||||
|
transit_encryption_enabled = true
|
||||||
|
at_rest_encryption_enabled = true
|
||||||
|
|
||||||
|
port = 6379
|
||||||
|
|
||||||
|
subnet_group_name = aws_elasticache_subnet_group.main.name
|
||||||
|
security_group_ids = [var.security_group_id]
|
||||||
|
|
||||||
|
automatic_failover_enabled = var.environment == "production"
|
||||||
|
|
||||||
|
snapshot_retention_limit = var.environment == "production" ? 7 : 1
|
||||||
|
snapshot_window = "03:00-04:00"
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-redis"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output "cache_endpoint" {
|
||||||
|
description = "ElastiCache primary endpoint"
|
||||||
|
value = aws_elasticache_replication_group.main.primary_endpoint_address
|
||||||
|
}
|
||||||
|
|
||||||
|
output "reader_endpoint" {
|
||||||
|
description = "ElastiCache reader endpoint"
|
||||||
|
value = aws_elasticache_replication_group.main.reader_endpoint_address
|
||||||
|
}
|
||||||
132
infra/modules/rds/main.tf
Normal file
132
infra/modules/rds/main.tf
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
variable "environment" {
|
||||||
|
description = "Deployment environment"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vpc_id" {
|
||||||
|
description = "VPC ID"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "subnet_ids" {
|
||||||
|
description = "Private subnet IDs"
|
||||||
|
type = list(string)
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "security_group_id" {
|
||||||
|
description = "RDS security group ID"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_name" {
|
||||||
|
description = "Database name"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_instance_class" {
|
||||||
|
description = "RDS instance class"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "multi_az" {
|
||||||
|
description = "Multi-AZ deployment"
|
||||||
|
type = bool
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "backup_retention" {
|
||||||
|
description = "Backup retention days"
|
||||||
|
type = number
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "project_name" {
|
||||||
|
description = "Project name"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_db_subnet_group" "main" {
|
||||||
|
name = "${var.project_name}-${var.environment}-db-subnet"
|
||||||
|
subnet_ids = var.subnet_ids
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-db-subnet"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_db_instance" "main" {
|
||||||
|
identifier = "${var.project_name}-${var.environment}-db"
|
||||||
|
|
||||||
|
engine = "postgres"
|
||||||
|
engine_version = "16.2"
|
||||||
|
instance_class = var.db_instance_class
|
||||||
|
allocated_storage = var.environment == "production" ? 100 : 20
|
||||||
|
|
||||||
|
db_name = var.db_name
|
||||||
|
username = "shieldai"
|
||||||
|
password = random_password.db_password.result
|
||||||
|
|
||||||
|
multi_az = var.multi_az
|
||||||
|
db_subnet_group_name = aws_db_subnet_group.main.name
|
||||||
|
vpc_security_group_ids = [var.security_group_id]
|
||||||
|
|
||||||
|
backup_retention_period = var.backup_retention
|
||||||
|
backup_window = "03:00-04:00"
|
||||||
|
maintenance_window = "sun:04:00-sun:05:00"
|
||||||
|
|
||||||
|
skip_final_snapshot = var.environment != "production"
|
||||||
|
final_snapshot_identifier = "${var.project_name}-${var.environment}-final"
|
||||||
|
|
||||||
|
storage_encrypted = true
|
||||||
|
storage_type = "gp3"
|
||||||
|
iops = var.environment == "production" ? 3000 : 1000
|
||||||
|
|
||||||
|
deletion_protection = var.environment == "production"
|
||||||
|
copy_tags_to_snapshot = true
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-db"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "random_password" "db_password" {
|
||||||
|
length = 16
|
||||||
|
special = true
|
||||||
|
|
||||||
|
keepers = {
|
||||||
|
environment = var.environment
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_secretsmanager_secret_version" "db_password" {
|
||||||
|
secret_id = aws_secretsmanager_secret.db_password.id
|
||||||
|
secret_string = jsonencode({
|
||||||
|
username = "shieldai"
|
||||||
|
password = random_password.db_password.result
|
||||||
|
engine = "postgres"
|
||||||
|
host = aws_db_instance.main.address
|
||||||
|
port = aws_db_instance.main.port
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_secretsmanager_secret" "db_password" {
|
||||||
|
name = "${var.project_name}-${var.environment}-db-password"
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-db-password"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output "db_endpoint" {
|
||||||
|
description = "RDS endpoint"
|
||||||
|
value = aws_db_instance.main.endpoint
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
output "db_instance_identifier" {
|
||||||
|
description = "RDS instance identifier"
|
||||||
|
value = aws_db_instance.main.identifier
|
||||||
|
}
|
||||||
|
|
||||||
|
output "db_password_secret_arn" {
|
||||||
|
description = "DB password secret ARN"
|
||||||
|
value = aws_secretsmanager_secret.db_password.arn
|
||||||
|
}
|
||||||
108
infra/modules/s3/main.tf
Normal file
108
infra/modules/s3/main.tf
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
variable "environment" {
|
||||||
|
description = "Deployment environment"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "project_name" {
|
||||||
|
description = "Project name"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket" "terraform_state" {
|
||||||
|
bucket = "${var.project_name}-${var.environment}-terraform-state"
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-terraform-state"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket_versioning" "terraform_state" {
|
||||||
|
bucket = aws_s3_bucket.terraform_state.id
|
||||||
|
versioning_configuration {
|
||||||
|
status = "Enabled"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket_server_side_encryption_configuration" "terraform_state" {
|
||||||
|
bucket = aws_s3_bucket.terraform_state.id
|
||||||
|
|
||||||
|
rule {
|
||||||
|
apply_server_side_encryption_by_default {
|
||||||
|
sse_algorithm = "aws:kms"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket_lifecycle_configuration" "terraform_state" {
|
||||||
|
bucket = aws_s3_bucket.terraform_state.id
|
||||||
|
|
||||||
|
rule {
|
||||||
|
id = "expire-noncurrent"
|
||||||
|
status = "Enabled"
|
||||||
|
|
||||||
|
noncurrent_version_expiration {
|
||||||
|
noncurrent_days = 30
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket" "artifacts" {
|
||||||
|
bucket = "${var.project_name}-${var.environment}-artifacts"
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-artifacts"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket_versioning" "artifacts" {
|
||||||
|
bucket = aws_s3_bucket.artifacts.id
|
||||||
|
versioning_configuration {
|
||||||
|
status = "Enabled"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket_server_side_encryption_configuration" "artifacts" {
|
||||||
|
bucket = aws_s3_bucket.artifacts.id
|
||||||
|
|
||||||
|
rule {
|
||||||
|
apply_server_side_encryption_by_default {
|
||||||
|
sse_algorithm = "aws:kms"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket" "logs" {
|
||||||
|
bucket = "${var.project_name}-${var.environment}-logs"
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-logs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket_lifecycle_configuration" "logs" {
|
||||||
|
bucket = aws_s3_bucket.logs.id
|
||||||
|
|
||||||
|
rule {
|
||||||
|
id = "expire-old-logs"
|
||||||
|
status = "Enabled"
|
||||||
|
|
||||||
|
expiration {
|
||||||
|
days = 90
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output "bucket_name" {
|
||||||
|
description = "Terraform state S3 bucket name"
|
||||||
|
value = aws_s3_bucket.terraform_state.id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "artifacts_bucket_name" {
|
||||||
|
description = "Artifacts S3 bucket name"
|
||||||
|
value = aws_s3_bucket.artifacts.id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "logs_bucket_name" {
|
||||||
|
description = "Logs S3 bucket name"
|
||||||
|
value = aws_s3_bucket.logs.id
|
||||||
|
}
|
||||||
49
infra/modules/secrets/main.tf
Normal file
49
infra/modules/secrets/main.tf
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
variable "environment" {
|
||||||
|
description = "Deployment environment"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "project_name" {
|
||||||
|
description = "Project name"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "secrets" {
|
||||||
|
description = "Secrets to store"
|
||||||
|
type = map(string)
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_secretsmanager_secret" "main" {
|
||||||
|
name = "${var.project_name}-${var.environment}-app-secrets"
|
||||||
|
|
||||||
|
description = "Application secrets for ${var.project_name} (${var.environment})"
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-app-secrets"
|
||||||
|
Environment = var.environment
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_secretsmanager_secret_version" "main" {
|
||||||
|
secret_id = aws_secretsmanager_secret.main.id
|
||||||
|
|
||||||
|
secret_string = jsonencode(merge({
|
||||||
|
DATABASE_URL = "postgresql://shieldai:${var.project_name}@${var.project_name}-${var.environment}-db.${data.aws_caller_identity.current.account_id}.us-east-1.rds.amazonaws.com:5432/shieldai"
|
||||||
|
REDIS_URL = "redis://${var.project_name}-${var.environment}-redis.${data.aws_caller_identity.current.account_id}.us-east-1.cache.amazonaws.com:6379"
|
||||||
|
NODE_ENV = var.environment
|
||||||
|
LOG_LEVEL = var.environment == "production" ? "info" : "debug"
|
||||||
|
}, var.secrets))
|
||||||
|
}
|
||||||
|
|
||||||
|
data "aws_caller_identity" "current" {}
|
||||||
|
|
||||||
|
output "secrets_manager_arn" {
|
||||||
|
description = "Secrets Manager ARN"
|
||||||
|
value = aws_secretsmanager_secret.main.arn
|
||||||
|
}
|
||||||
|
|
||||||
|
output "secrets_manager_name" {
|
||||||
|
description = "Secrets Manager secret name"
|
||||||
|
value = aws_secretsmanager_secret.main.name
|
||||||
|
}
|
||||||
235
infra/modules/vpc/main.tf
Normal file
235
infra/modules/vpc/main.tf
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
variable "environment" {
|
||||||
|
description = "Deployment environment"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vpc_cidr" {
|
||||||
|
description = "CIDR block for VPC"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "az_count" {
|
||||||
|
description = "Number of availability zones"
|
||||||
|
type = number
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "project_name" {
|
||||||
|
description = "Project name"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_vpc" "main" {
|
||||||
|
cidr_block = var.vpc_cidr
|
||||||
|
enable_dns_support = true
|
||||||
|
enable_dns_hostnames = true
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-vpc"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "aws_availability_zones" "available" {
|
||||||
|
state = "available"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_subnet" "public" {
|
||||||
|
count = var.az_count
|
||||||
|
|
||||||
|
vpc_id = aws_vpc.main.id
|
||||||
|
cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index)
|
||||||
|
availability_zone = data.aws_availability_zones.available.names[count.index]
|
||||||
|
map_public_ip_on_launch = true
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-public-${data.aws_availability_zones.available.names[count.index]}"
|
||||||
|
"kubernetes.io/role/elb" = "1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_subnet" "private" {
|
||||||
|
count = var.az_count
|
||||||
|
|
||||||
|
vpc_id = aws_vpc.main.id
|
||||||
|
cidr_block = cidrsubnet(var.vpc_cidr, 8, var.az_count + count.index)
|
||||||
|
availability_zone = data.aws_availability_zones.available.names[count.index]
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-private-${data.aws_availability_zones.available.names[count.index]}"
|
||||||
|
"kubernetes.io/role/internal-elb" = "1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_internet_gateway" "main" {
|
||||||
|
vpc_id = aws_vpc.main.id
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-igw"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_eip" "nat" {
|
||||||
|
count = var.az_count
|
||||||
|
|
||||||
|
domain = "vpc"
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-nat-${count.index}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_nat_gateway" "main" {
|
||||||
|
count = var.az_count
|
||||||
|
|
||||||
|
allocation_id = aws_eip.nat[count.index].id
|
||||||
|
subnet_id = aws_subnet.public[count.index].id
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-nat-${count.index}"
|
||||||
|
}
|
||||||
|
|
||||||
|
depends_on = [aws_internet_gateway.main]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_route_table" "public" {
|
||||||
|
vpc_id = aws_vpc.main.id
|
||||||
|
|
||||||
|
route {
|
||||||
|
cidr_block = "0.0.0.0/0"
|
||||||
|
gateway_id = aws_internet_gateway.main.id
|
||||||
|
}
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-public-rt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_route_table" "private" {
|
||||||
|
count = var.az_count
|
||||||
|
|
||||||
|
vpc_id = aws_vpc.main.id
|
||||||
|
|
||||||
|
route {
|
||||||
|
cidr_block = "0.0.0.0/0"
|
||||||
|
nat_gateway_id = aws_nat_gateway.main[count.index].id
|
||||||
|
}
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-private-rt-${count.index}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_route_table_association" "public" {
|
||||||
|
count = var.az_count
|
||||||
|
|
||||||
|
subnet_id = aws_subnet.public[count.index].id
|
||||||
|
route_table_id = aws_route_table.public.id
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_route_table_association" "private" {
|
||||||
|
count = var.az_count
|
||||||
|
|
||||||
|
subnet_id = aws_subnet.private[count.index].id
|
||||||
|
route_table_id = aws_route_table.private[count.index].id
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group" "ecs" {
|
||||||
|
name_prefix = "${var.project_name}-${var.environment}-ecs"
|
||||||
|
vpc_id = aws_vpc.main.id
|
||||||
|
|
||||||
|
ingress {
|
||||||
|
from_port = 3000
|
||||||
|
to_port = 3003
|
||||||
|
protocol = "tcp"
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
description = "Service ports"
|
||||||
|
}
|
||||||
|
|
||||||
|
egress {
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
protocol = "-1"
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-ecs-sg"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group" "rds" {
|
||||||
|
name_prefix = "${var.project_name}-${var.environment}-rds"
|
||||||
|
vpc_id = aws_vpc.main.id
|
||||||
|
|
||||||
|
ingress {
|
||||||
|
from_port = 5432
|
||||||
|
to_port = 5432
|
||||||
|
protocol = "tcp"
|
||||||
|
security_groups = [aws_security_group.ecs.id]
|
||||||
|
description = "PostgreSQL from ECS"
|
||||||
|
}
|
||||||
|
|
||||||
|
egress {
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
protocol = "-1"
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-rds-sg"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group" "elasticache" {
|
||||||
|
name_prefix = "${var.project_name}-${var.environment}-elasticache"
|
||||||
|
vpc_id = aws_vpc.main.id
|
||||||
|
|
||||||
|
ingress {
|
||||||
|
from_port = 6379
|
||||||
|
to_port = 6379
|
||||||
|
protocol = "tcp"
|
||||||
|
security_groups = [aws_security_group.ecs.id]
|
||||||
|
description = "Redis from ECS"
|
||||||
|
}
|
||||||
|
|
||||||
|
egress {
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
protocol = "-1"
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-${var.environment}-elasticache-sg"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output "vpc_id" {
|
||||||
|
description = "VPC ID"
|
||||||
|
value = aws_vpc.main.id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "private_subnet_ids" {
|
||||||
|
description = "Private subnet IDs"
|
||||||
|
value = aws_subnet.private[*].id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "public_subnet_ids" {
|
||||||
|
description = "Public subnet IDs"
|
||||||
|
value = aws_subnet.public[*].id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "ecs_security_group_id" {
|
||||||
|
description = "ECS security group ID"
|
||||||
|
value = aws_security_group.ecs.id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "rds_security_group_id" {
|
||||||
|
description = "RDS security group ID"
|
||||||
|
value = aws_security_group.rds.id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "elasticache_security_group_id" {
|
||||||
|
description = "ElastiCache security group ID"
|
||||||
|
value = aws_security_group.elasticache.id
|
||||||
|
}
|
||||||
35
infra/outputs.tf
Normal file
35
infra/outputs.tf
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
output "vpc_id" {
|
||||||
|
description = "VPC ID"
|
||||||
|
value = module.vpc.vpc_id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "cluster_name" {
|
||||||
|
description = "ECS cluster name"
|
||||||
|
value = "${var.project_name}-${var.environment}"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "rds_endpoint" {
|
||||||
|
description = "RDS endpoint"
|
||||||
|
value = module.rds.db_endpoint
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
output "elasticache_endpoint" {
|
||||||
|
description = "ElastiCache primary endpoint"
|
||||||
|
value = module.elasticache.cache_endpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
output "s3_bucket_name" {
|
||||||
|
description = "S3 bucket name"
|
||||||
|
value = module.s3.bucket_name
|
||||||
|
}
|
||||||
|
|
||||||
|
output "secrets_manager_arn" {
|
||||||
|
description = "Secrets Manager ARN"
|
||||||
|
value = module.secrets.secrets_manager_arn
|
||||||
|
}
|
||||||
|
|
||||||
|
output "cloudwatch_dashboard_url" {
|
||||||
|
description = "CloudWatch dashboard URL"
|
||||||
|
value = module.cloudwatch.dashboard_url
|
||||||
|
}
|
||||||
32
infra/scripts/rollback.sh
Executable file
32
infra/scripts/rollback.sh
Executable file
@@ -0,0 +1,32 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ENVIRONMENT=${1:-staging}
|
||||||
|
SERVICE=${2:-all}
|
||||||
|
|
||||||
|
CLUSTER="shieldai-${ENVIRONMENT}"
|
||||||
|
|
||||||
|
echo "Rolling back services in cluster: $CLUSTER"
|
||||||
|
|
||||||
|
SERVICES="api darkwatch spamshield voiceprint"
|
||||||
|
if [ "$SERVICE" != "all" ]; then
|
||||||
|
SERVICES="$SERVICE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
for svc in $SERVICES; do
|
||||||
|
echo "Rolling back $svc..."
|
||||||
|
aws ecs update-service \
|
||||||
|
--cluster "$CLUSTER" \
|
||||||
|
--service "${CLUSTER}-${svc}" \
|
||||||
|
--rollback \
|
||||||
|
--no-cli-auto-prompt
|
||||||
|
|
||||||
|
echo "Waiting for $svc to stabilize..."
|
||||||
|
aws ecs wait services-stable \
|
||||||
|
--cluster "$CLUSTER" \
|
||||||
|
--services "${CLUSTER}-${svc}"
|
||||||
|
|
||||||
|
echo "$svc rolled back successfully"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Rollback complete for $SERVICES"
|
||||||
116
infra/variables.tf
Normal file
116
infra/variables.tf
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
variable "aws_region" {
|
||||||
|
description = "AWS region"
|
||||||
|
type = string
|
||||||
|
default = "us-east-1"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "environment" {
|
||||||
|
description = "Deployment environment"
|
||||||
|
type = string
|
||||||
|
validation {
|
||||||
|
condition = contains(["dev", "staging", "production"], var.environment)
|
||||||
|
error_message = "Environment must be one of: dev, staging, production."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "project_name" {
|
||||||
|
description = "Project name for resource naming"
|
||||||
|
type = string
|
||||||
|
default = "shieldai"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vpc_cidr" {
|
||||||
|
description = "CIDR block for VPC"
|
||||||
|
type = string
|
||||||
|
default = "10.0.0.0/16"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "az_count" {
|
||||||
|
description = "Number of availability zones"
|
||||||
|
type = number
|
||||||
|
default = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_name" {
|
||||||
|
description = "RDS database name"
|
||||||
|
type = string
|
||||||
|
default = "shieldai"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_instance_class" {
|
||||||
|
description = "RDS instance class"
|
||||||
|
type = string
|
||||||
|
default = "db.t3.medium"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_multi_az" {
|
||||||
|
description = "Enable Multi-AZ deployment"
|
||||||
|
type = bool
|
||||||
|
default = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_backup_retention" {
|
||||||
|
description = "RDS backup retention period in days"
|
||||||
|
type = number
|
||||||
|
default = 7
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "elasticache_node_type" {
|
||||||
|
description = "ElastiCache node type"
|
||||||
|
type = string
|
||||||
|
default = "cache.t3.medium"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "elasticache_num_nodes" {
|
||||||
|
description = "Number of ElastiCache nodes"
|
||||||
|
type = number
|
||||||
|
default = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "services" {
|
||||||
|
description = "ECS services to deploy"
|
||||||
|
type = map(object({
|
||||||
|
cpu = number
|
||||||
|
memory = number
|
||||||
|
port = number
|
||||||
|
}))
|
||||||
|
default = {
|
||||||
|
api = {
|
||||||
|
cpu = 512
|
||||||
|
memory = 1024
|
||||||
|
port = 3000
|
||||||
|
}
|
||||||
|
darkwatch = {
|
||||||
|
cpu = 256
|
||||||
|
memory = 512
|
||||||
|
port = 3001
|
||||||
|
}
|
||||||
|
spamshield = {
|
||||||
|
cpu = 256
|
||||||
|
memory = 512
|
||||||
|
port = 3002
|
||||||
|
}
|
||||||
|
voiceprint = {
|
||||||
|
cpu = 512
|
||||||
|
memory = 1024
|
||||||
|
port = 3003
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "container_images" {
|
||||||
|
description = "Container image tags per service"
|
||||||
|
type = map(string)
|
||||||
|
default = {
|
||||||
|
api = "latest"
|
||||||
|
darkwatch = "latest"
|
||||||
|
spamshield = "latest"
|
||||||
|
voiceprint = "latest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "secrets" {
|
||||||
|
description = "Secrets to store in AWS Secrets Manager"
|
||||||
|
type = map(string)
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user