Files
ShieldAI/.github/workflows/deploy.yml
Michael Freno 4ddd24fd72 Fix 6 P1 infrastructure issues from code review (FRE-4574)
- ALB: deploy to public subnets instead of private (adds public_subnet_ids var)
- ECS: fix launch_desired_count → launch_type = FARGATE
- Secrets: accept actual RDS/ElastiCache endpoints from parent module
- Deploy: fix circular dependency (needs.detect → steps.detect)
- Health check: dynamic ALB DNS lookup via aws elbv2 CLI
- Health check: exit 1 on failure so rollback triggers

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-05-10 02:28:48 -04:00

243 lines
8.3 KiB
YAML

name: Deploy
on:
push:
branches: [main]
release:
types: [published]
concurrency:
group: deploy-${{ github.ref }}
cancel-in-progress: true
env:
NODE_VERSION: "20"
PNPM_VERSION: "9"
jobs:
detect-environment:
name: Detect Environment
runs-on: ubuntu-latest
outputs:
environment: ${{ steps.detect.outputs.environment }}
tag: ${{ steps.tag.outputs.tag }}
steps:
- name: Detect deployment target
id: detect
run: |
if [ "${{ github.event_name }}" = "release" ]; then
echo "environment=production" >> $GITHUB_OUTPUT
else
echo "environment=staging" >> $GITHUB_OUTPUT
fi
- name: Calculate tag
id: tag
run: |
if [ "${{ steps.detect.outputs.environment }}" = "production" ]; then
echo "tag=${{ github.event.release.tag_name }}" >> $GITHUB_OUTPUT
else
echo "tag=${{ github.sha }}" >> $GITHUB_OUTPUT
fi
terraform-apply:
name: Terraform Apply
runs-on: ubuntu-latest
needs: detect-environment
environment: ${{ needs.detect-environment.outputs.environment }}
steps:
- uses: actions/checkout@v4
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_version: "~> 1.5"
- name: Terraform Init
working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
run: terraform init -backend-config="bucket=shieldai-${{ needs.detect-environment.outputs.environment }}-terraform-state"
- name: Terraform Plan
id: plan
working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
run: |
terraform plan \
-var="hibp_api_key=${{ secrets.HIBP_API_KEY }}" \
-var="resend_api_key=${{ secrets.RESEND_API_KEY }}" \
-var="sentry_dsn=${{ secrets.SENTRY_DSN }}" \
-var="datadog_api_key=${{ secrets.DATADOG_API_KEY }}" \
-no-color | tee /tmp/terraform-plan.out
- name: Terraform Apply
working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
run: |
terraform apply -auto-approve \
-var="hibp_api_key=${{ secrets.HIBP_API_KEY }}" \
-var="resend_api_key=${{ secrets.RESEND_API_KEY }}" \
-var="sentry_dsn=${{ secrets.SENTRY_DSN }}" \
-var="datadog_api_key=${{ secrets.DATADOG_API_KEY }}"
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
build-and-push:
name: Build and Push Docker Images
runs-on: ubuntu-latest
needs: [detect-environment]
environment: ${{ needs.detect-environment.outputs.environment }}
strategy:
fail-fast: false
matrix:
include:
- name: api
dockerfile: packages/api/Dockerfile
- name: darkwatch
dockerfile: services/darkwatch/Dockerfile
- name: spamshield
dockerfile: services/spamshield/Dockerfile
- name: voiceprint
dockerfile: services/voiceprint/Dockerfile
steps:
- uses: actions/checkout@v4
- name: Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Calculate image tag
id: tag
run: echo "tag=${{ needs.detect-environment.outputs.tag }}" >> $GITHUB_OUTPUT
- name: Build and push ${{ matrix.name }}
uses: docker/build-push-action@v5
with:
context: .
file: ${{ matrix.dockerfile }}
push: true
tags: |
ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:${{ steps.tag.outputs.tag }}
ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max
deploy-ecs:
name: Deploy to ECS
runs-on: ubuntu-latest
needs: [detect-environment, terraform-apply, build-and-push]
environment: ${{ needs.detect-environment.outputs.environment }}
strategy:
fail-fast: false
matrix:
service: [api, darkwatch, spamshield, voiceprint]
steps:
- uses: actions/checkout@v4
- name: Configure AWS
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Update ECS Service
run: |
IMAGE="ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.service }}:${{ needs.detect-environment.outputs.tag }}"
CLUSTER="shieldai-${{ needs.detect-environment.outputs.environment }}"
SERVICE="${{ matrix.service }}"
TASK_DEF=$(aws ecs describe-task-definition \
--task-definition "${CLUSTER}-${SERVICE}" \
--query 'taskDefinition' --output json)
NEW_TASK_DEF=$(echo "$TASK_DEF" | jq \
--arg image "$IMAGE" \
'.containerDefinitions[0].image = $image')
NEW_TASK_DEF_ARN=$(echo "$NEW_TASK_DEF" | \
aws ecs register-task-definition \
--family "${CLUSTER}-${SERVICE}" \
--cli-input-json - \
--query 'taskDefinition.taskDefinitionArn' --output text)
aws ecs update-service \
--cluster "$CLUSTER" \
--service "${CLUSTER}-${SERVICE}" \
--task-definition "$NEW_TASK_DEF_ARN" \
--force-new-deployment
echo "Deployed $IMAGE to $SERVICE"
health-check:
name: Post-Deploy Health Check
runs-on: ubuntu-latest
needs: [detect-environment, deploy-ecs]
environment: ${{ needs.detect-environment.outputs.environment }}
steps:
- name: Configure AWS
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Wait for deployment
run: sleep 30
- name: Health Check
id: health
run: |
ENV="${{ needs.detect-environment.outputs.environment }}"
CLUSTER="shieldai-${ENV}"
ALB_DNS=$(aws elbv2 describe-load-balancers \
--query "LoadBalancers[?contains(LoadBalancerName, '${CLUSTER}-alb')].DNSName" \
--output text)
if [ -z "$ALB_DNS" ]; then
echo "Health check failed: ALB DNS not found"
exit 1
fi
echo "ALB DNS: $ALB_DNS"
FAILED=0
for service in api darkwatch spamshield voiceprint; do
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
"http://${ALB_DNS}/health" || true)
if [ "$HTTP_CODE" = "200" ]; then
echo "Health check passed: $service"
else
echo "Health check failed: $service (HTTP $HTTP_CODE)"
FAILED=1
fi
done
if [ "$FAILED" -eq 1 ]; then
exit 1
fi
rollback:
name: Rollback on Failure
runs-on: ubuntu-latest
needs: [detect-environment, deploy-ecs, health-check]
environment: ${{ needs.detect-environment.outputs.environment }}
if: failure() && needs.health-check.result == 'failure'
strategy:
fail-fast: false
matrix:
service: [api, darkwatch, spamshield, voiceprint]
steps:
- name: Configure AWS
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Rollback ECS Service
run: |
CLUSTER="shieldai-${{ needs.detect-environment.outputs.environment }}"
SERVICE="${{ matrix.service }}"
aws ecs update-service \
--cluster "$CLUSTER" \
--service "${CLUSTER}-${SERVICE}" \
--rollback \
--no-cli-auto-prompt
echo "Rolled back $SERVICE"