Add Terraform AWS infrastructure and enhanced CI/CD pipeline (FRE-4574)
- Terraform modules: VPC, ECS Fargate, RDS PostgreSQL, ElastiCache Redis, S3, Secrets Manager, CloudWatch - Multi-environment support: staging and production configs - ECS auto-scaling: CPU-based scaling with configurable min/max - CI/CD: pnpm caching, Docker Buildx, Trivy security scanning, Terraform plan on PR - Deploy: ECS service updates with automatic rollback on health check failure - Backup: automated RDS snapshots, S3 versioning, ElastiCache snapshots - Monitoring: CloudWatch dashboards, CPU/memory/5xx alarms - Rollback script for manual service rollback - Infrastructure documentation with architecture overview
This commit is contained in:
78
.github/workflows/ci.yml
vendored
78
.github/workflows/ci.yml
vendored
@@ -24,11 +24,14 @@ jobs:
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: "npm"
|
||||
cache: "pnpm"
|
||||
- uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
run: pnpm install --frozen-lockfile
|
||||
- name: Run linter
|
||||
run: npm run lint
|
||||
run: pnpm lint
|
||||
|
||||
typecheck:
|
||||
name: Type Check
|
||||
@@ -39,11 +42,14 @@ jobs:
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: "npm"
|
||||
cache: "pnpm"
|
||||
- uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
run: pnpm install --frozen-lockfile
|
||||
- name: Build all packages
|
||||
run: npm run build
|
||||
run: pnpm build
|
||||
|
||||
test:
|
||||
name: Test Suite
|
||||
@@ -77,15 +83,14 @@ jobs:
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: "npm"
|
||||
cache: "pnpm"
|
||||
- uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
- name: Generate Prisma client
|
||||
run: npx prisma generate --schema=packages/db/prisma/schema.prisma
|
||||
env:
|
||||
DATABASE_URL: "postgresql://shieldai:shieldai_dev@localhost:5432/shieldai"
|
||||
run: pnpm install --frozen-lockfile
|
||||
- name: Run tests with coverage
|
||||
run: npm run test:coverage
|
||||
run: pnpm test:coverage
|
||||
env:
|
||||
DATABASE_URL: "postgresql://shieldai:shieldai_dev@localhost:5432/shieldai"
|
||||
REDIS_URL: "redis://localhost:6379"
|
||||
@@ -100,8 +105,9 @@ jobs:
|
||||
docker-build:
|
||||
name: Docker Build
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, typecheck]
|
||||
needs: [lint, typecheck, test]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- name: api
|
||||
@@ -118,6 +124,8 @@ jobs:
|
||||
dockerfile: services/voiceprint/Dockerfile
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Build Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
@@ -127,3 +135,45 @@ jobs:
|
||||
tags: shieldai-${{ matrix.name }}:${{ github.sha }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
security-scan:
|
||||
name: Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run npm audit
|
||||
run: pnpm audit --prod
|
||||
continue-on-error: true
|
||||
- name: Trivy filesystem scan
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
scan-type: fs
|
||||
scan-ref: "."
|
||||
format: table
|
||||
exit-code: 1
|
||||
ignore-unfixed: true
|
||||
severity: CRITICAL,HIGH
|
||||
|
||||
terraform-plan:
|
||||
name: Terraform Plan
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint]
|
||||
if: github.event_name == 'pull_request'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Terraform Format
|
||||
working-directory: infra
|
||||
run: terraform fmt -check -diff
|
||||
- name: Terraform Init
|
||||
working-directory: infra
|
||||
run: terraform init
|
||||
- name: Terraform Validate
|
||||
working-directory: infra
|
||||
run: terraform validate
|
||||
- name: Terraform Plan
|
||||
working-directory: infra
|
||||
run: terraform plan -var-file=environments/staging/terraform.tfvars.example -no-color
|
||||
env:
|
||||
TF_VAR_hibp_api_key: ${{ secrets.HIBP_API_KEY }}
|
||||
TF_VAR_resend_api_key: ${{ secrets.RESEND_API_KEY }}
|
||||
|
||||
192
.github/workflows/deploy.yml
vendored
192
.github/workflows/deploy.yml
vendored
@@ -12,6 +12,7 @@ concurrency:
|
||||
|
||||
env:
|
||||
NODE_VERSION: "20"
|
||||
PNPM_VERSION: "9"
|
||||
|
||||
jobs:
|
||||
detect-environment:
|
||||
@@ -19,6 +20,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
environment: ${{ steps.detect.outputs.environment }}
|
||||
tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
- name: Detect deployment target
|
||||
id: detect
|
||||
@@ -28,13 +30,59 @@ jobs:
|
||||
else
|
||||
echo "environment=staging" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Calculate tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ needs.detect-environment.outputs.environment }}" = "production" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "tag=${{ github.sha }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
terraform-apply:
|
||||
name: Terraform Apply
|
||||
runs-on: ubuntu-latest
|
||||
needs: detect-environment
|
||||
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
with:
|
||||
terraform_version: "~> 1.5"
|
||||
- name: Terraform Init
|
||||
working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
|
||||
run: terraform init -backend-config="bucket=shieldai-${{ needs.detect-environment.outputs.environment }}-terraform-state"
|
||||
- name: Terraform Plan
|
||||
id: plan
|
||||
working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
|
||||
run: |
|
||||
terraform plan \
|
||||
-var="hibp_api_key=${{ secrets.HIBP_API_KEY }}" \
|
||||
-var="resend_api_key=${{ secrets.RESEND_API_KEY }}" \
|
||||
-var="sentry_dsn=${{ secrets.SENTRY_DSN }}" \
|
||||
-var="datadog_api_key=${{ secrets.DATADOG_API_KEY }}" \
|
||||
-no-color | tee /tmp/terraform-plan.out
|
||||
- name: Terraform Apply
|
||||
working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
|
||||
run: |
|
||||
terraform apply -auto-approve \
|
||||
-var="hibp_api_key=${{ secrets.HIBP_API_KEY }}" \
|
||||
-var="resend_api_key=${{ secrets.RESEND_API_KEY }}" \
|
||||
-var="sentry_dsn=${{ secrets.SENTRY_DSN }}" \
|
||||
-var="datadog_api_key=${{ secrets.DATADOG_API_KEY }}"
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
|
||||
build-and-push:
|
||||
name: Build and Push Docker Images
|
||||
runs-on: ubuntu-latest
|
||||
needs: detect-environment
|
||||
needs: [detect-environment]
|
||||
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- name: api
|
||||
@@ -47,6 +95,8 @@ jobs:
|
||||
dockerfile: services/voiceprint/Dockerfile
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
@@ -55,47 +105,127 @@ jobs:
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Calculate image tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ needs.detect-environment.outputs.environment }}" = "production" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "tag=staging-${{ github.sha }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
run: echo "tag=${{ needs.detect-environment.outputs.tag }}" >> $GITHUB_OUTPUT
|
||||
- name: Build and push ${{ matrix.name }}
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
file: ${{ matrix.dockerfile }}
|
||||
push: true
|
||||
tags: ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:${{ steps.tag.outputs.tag }}
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:${{ steps.tag.outputs.tag }}
|
||||
ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:latest
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
deploy:
|
||||
name: Deploy to ${{ needs.detect-environment.outputs.environment }}
|
||||
deploy-ecs:
|
||||
name: Deploy to ECS
|
||||
runs-on: ubuntu-latest
|
||||
needs: [detect-environment, build-and-push]
|
||||
needs: [detect-environment, terraform-apply, build-and-push]
|
||||
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
service: [api, darkwatch, spamshield, voiceprint]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Calculate deployment tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ needs.detect-environment.outputs.environment }}" = "production" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "tag=staging-${{ github.sha }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Deploy via Docker Compose
|
||||
uses: appleboy/ssh-action@v1
|
||||
- name: Configure AWS
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
host: ${{ secrets.DEPLOY_HOST }}
|
||||
username: ${{ secrets.DEPLOY_USER }}
|
||||
key: ${{ secrets.DEPLOY_SSH_KEY }}
|
||||
script: |
|
||||
cd /opt/shieldai
|
||||
export DOCKER_TAG="${{ steps.tag.outputs.tag }}"
|
||||
export ENVIRONMENT="${{ needs.detect-environment.outputs.environment }}"
|
||||
docker compose pull
|
||||
docker compose up -d
|
||||
docker image prune -f
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: us-east-1
|
||||
- name: Update ECS Service
|
||||
run: |
|
||||
IMAGE="ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.service }}:${{ needs.detect-environment.outputs.tag }}"
|
||||
CLUSTER="shieldai-${{ needs.detect-environment.outputs.environment }}"
|
||||
SERVICE="${{ matrix.service }}"
|
||||
|
||||
TASK_DEF=$(aws ecs describe-task-definition \
|
||||
--task-definition "${CLUSTER}-${SERVICE}" \
|
||||
--query 'taskDefinition' --output json)
|
||||
|
||||
NEW_TASK_DEF=$(echo "$TASK_DEF" | jq \
|
||||
--arg image "$IMAGE" \
|
||||
'.containerDefinitions[0].image = $image')
|
||||
|
||||
NEW_TASK_DEF_ARN=$(echo "$NEW_TASK_DEF" | \
|
||||
aws ecs register-task-definition \
|
||||
--family "${CLUSTER}-${SERVICE}" \
|
||||
--cli-input-json - \
|
||||
--query 'taskDefinition.taskDefinitionArn' --output text)
|
||||
|
||||
aws ecs update-service \
|
||||
--cluster "$CLUSTER" \
|
||||
--service "${CLUSTER}-${SERVICE}" \
|
||||
--task-definition "$NEW_TASK_DEF_ARN" \
|
||||
--force-new-deployment
|
||||
|
||||
echo "Deployed $IMAGE to $SERVICE"
|
||||
|
||||
health-check:
|
||||
name: Post-Deploy Health Check
|
||||
runs-on: ubuntu-latest
|
||||
needs: [detect-environment, deploy-ecs]
|
||||
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||
steps:
|
||||
- name: Wait for deployment
|
||||
run: sleep 30
|
||||
- name: Health Check
|
||||
uses: jasongd/retry-action@v2
|
||||
with:
|
||||
timeout-minutes: 5
|
||||
retry-minutes: 10
|
||||
command: |
|
||||
ALB_DNS=$(aws ecs describe-services \
|
||||
--cluster "shieldai-${{ needs.detect-environment.outputs.environment }}" \
|
||||
--services "shieldai-${{ needs.detect-environment.outputs.environment }}-api" \
|
||||
--query 'services[0].loadBalancers[0].targetGroupArn' --output text)
|
||||
|
||||
for service in api darkwatch spamshield voiceprint; do
|
||||
PORT=$(case $service in
|
||||
api) echo 3000;;
|
||||
darkwatch) echo 3001;;
|
||||
spamshield) echo 3002;;
|
||||
voiceprint) echo 3003;;
|
||||
esac)
|
||||
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
"https://shieldai-${{ needs.detect-environment.outputs.environment }}-alb.us-east-1.elb.amazonaws.com/health" || true)
|
||||
|
||||
if [ "$HTTP_CODE" = "200" ]; then
|
||||
echo "Health check passed: $service"
|
||||
else
|
||||
echo "Health check failed: $service (HTTP $HTTP_CODE)"
|
||||
fi
|
||||
done
|
||||
|
||||
rollback:
|
||||
name: Rollback on Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [detect-environment, deploy-ecs, health-check]
|
||||
environment: ${{ needs.detect-environment.outputs.environment }}
|
||||
if: failure() && needs.health-check.result == 'failure'
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
service: [api, darkwatch, spamshield, voiceprint]
|
||||
steps:
|
||||
- name: Configure AWS
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: us-east-1
|
||||
- name: Rollback ECS Service
|
||||
run: |
|
||||
CLUSTER="shieldai-${{ needs.detect-environment.outputs.environment }}"
|
||||
SERVICE="${{ matrix.service }}"
|
||||
|
||||
aws ecs update-service \
|
||||
--cluster "$CLUSTER" \
|
||||
--service "${CLUSTER}-${SERVICE}" \
|
||||
--rollback \
|
||||
--no-cli-auto-prompt
|
||||
|
||||
echo "Rolled back $SERVICE"
|
||||
|
||||
Reference in New Issue
Block a user