ShieldAI/.github/workflows/deploy.yml

name: Deploy

on:
  push:
    branches: [main]
  release:
    types: [published]

concurrency:
  group: deploy-${{ github.ref }}
  cancel-in-progress: true

env:
  NODE_VERSION: "20"
  PNPM_VERSION: "9"

jobs:
  detect-environment:
    name: Detect Environment
    runs-on: ubuntu-latest
    outputs:
      environment: ${{ steps.detect.outputs.environment }}
      tag: ${{ steps.tag.outputs.tag }}
    steps:
      - name: Detect deployment target
        id: detect
        run: |
          if [ "${{ github.event_name }}" = "release" ]; then
            echo "environment=production" >> $GITHUB_OUTPUT
          else
            echo "environment=staging" >> $GITHUB_OUTPUT
          fi
      - name: Calculate tag
        id: tag
        run: |
          if [ "${{ steps.detect.outputs.environment }}" = "production" ]; then
            echo "tag=${{ github.event.release.tag_name }}" >> $GITHUB_OUTPUT
          else
            echo "tag=${{ github.sha }}" >> $GITHUB_OUTPUT
          fi

  terraform-apply:
    name: Terraform Apply
    runs-on: ubuntu-latest
    needs: detect-environment
    environment: ${{ needs.detect-environment.outputs.environment }}
    steps:
      - uses: actions/checkout@v4
      - name: Setup Terraform
        uses: hashicorp/setup-terraform@v3
        with:
          terraform_version: "~> 1.5"
      - name: Terraform Init
        working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
        run: terraform init -backend-config="bucket=shieldai-${{ needs.detect-environment.outputs.environment }}-terraform-state"
      - name: Terraform Plan
        id: plan
        working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
        run: |
          terraform plan \
            -var="hibp_api_key=${{ secrets.HIBP_API_KEY }}" \
            -var="resend_api_key=${{ secrets.RESEND_API_KEY }}" \
            -var="sentry_dsn=${{ secrets.SENTRY_DSN }}" \
            -var="datadog_api_key=${{ secrets.DATADOG_API_KEY }}" \
            -no-color | tee /tmp/terraform-plan.out
      - name: Terraform Apply
        working-directory: infra/environments/${{ needs.detect-environment.outputs.environment }}
        run: |
          terraform apply -auto-approve \
            -var="hibp_api_key=${{ secrets.HIBP_API_KEY }}" \
            -var="resend_api_key=${{ secrets.RESEND_API_KEY }}" \
            -var="sentry_dsn=${{ secrets.SENTRY_DSN }}" \
            -var="datadog_api_key=${{ secrets.DATADOG_API_KEY }}"
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          AWS_DEFAULT_REGION: us-east-1

  build-and-push:
    name: Build and Push Docker Images
    runs-on: ubuntu-latest
    needs: [detect-environment]
    environment: ${{ needs.detect-environment.outputs.environment }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - name: api
            dockerfile: packages/api/Dockerfile
          - name: darkwatch
            dockerfile: services/darkwatch/Dockerfile
          - name: spamshield
            dockerfile: services/spamshield/Dockerfile
          - name: voiceprint
            dockerfile: services/voiceprint/Dockerfile
    steps:
      - uses: actions/checkout@v4
      - name: Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to Container Registry
        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Calculate image tag
        id: tag
        run: echo "tag=${{ needs.detect-environment.outputs.tag }}" >> $GITHUB_OUTPUT
      - name: Build and push ${{ matrix.name }}
        uses: docker/build-push-action@v5
        with:
          context: .
          file: ${{ matrix.dockerfile }}
          push: true
          tags: |
            ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:${{ steps.tag.outputs.tag }}
            ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.name }}:latest
          cache-from: type=gha
          cache-to: type=gha,mode=max

  deploy-ecs:
    name: Deploy to ECS
    runs-on: ubuntu-latest
    needs: [detect-environment, terraform-apply, build-and-push]
    environment: ${{ needs.detect-environment.outputs.environment }}
    strategy:
      fail-fast: false
      matrix:
        service: [api, darkwatch, spamshield, voiceprint]
    steps:
      - uses: actions/checkout@v4
      - name: Configure AWS
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: us-east-1
      - name: Update ECS Service
        run: |
          IMAGE="ghcr.io/${{ github.repository_owner }}/shieldai-${{ matrix.service }}:${{ needs.detect-environment.outputs.tag }}"
          CLUSTER="shieldai-${{ needs.detect-environment.outputs.environment }}"
          SERVICE="${{ matrix.service }}"

          TASK_DEF=$(aws ecs describe-task-definition \
            --task-definition "${CLUSTER}-${SERVICE}" \
            --query 'taskDefinition' --output json)

          NEW_TASK_DEF=$(echo "$TASK_DEF" | jq \
            --arg image "$IMAGE" \
            '.containerDefinitions[0].image = $image')

          NEW_TASK_DEF_ARN=$(echo "$NEW_TASK_DEF" | \
            aws ecs register-task-definition \
            --family "${CLUSTER}-${SERVICE}" \
            --cli-input-json - \
            --query 'taskDefinition.taskDefinitionArn' --output text)

          aws ecs update-service \
            --cluster "$CLUSTER" \
            --service "${CLUSTER}-${SERVICE}" \
            --task-definition "$NEW_TASK_DEF_ARN" \
            --force-new-deployment

          echo "Deployed $IMAGE to $SERVICE"

  health-check:
    name: Post-Deploy Health Check
    runs-on: ubuntu-latest
    needs: [detect-environment, deploy-ecs]
    environment: ${{ needs.detect-environment.outputs.environment }}
    steps:
      - name: Configure AWS
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: us-east-1
      - name: Wait for deployment
        run: sleep 30
      - name: Health Check
        id: health
        run: |
          ENV="${{ needs.detect-environment.outputs.environment }}"
          CLUSTER="shieldai-${ENV}"

          ALB_DNS=$(aws elbv2 describe-load-balancers \
            --query "LoadBalancers[?contains(LoadBalancerName, '${CLUSTER}-alb')].DNSName" \
            --output text)

          if [ -z "$ALB_DNS" ]; then
            echo "Health check failed: ALB DNS not found"
            exit 1
          fi

          echo "ALB DNS: $ALB_DNS"

          FAILED=0
          for service in api darkwatch spamshield voiceprint; do
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
              "http://${ALB_DNS}/health" || true)

            if [ "$HTTP_CODE" = "200" ]; then
              echo "Health check passed: $service"
            else
              echo "Health check failed: $service (HTTP $HTTP_CODE)"
              FAILED=1
            fi
          done

          if [ "$FAILED" -eq 1 ]; then
            exit 1
          fi

  rollback:
    name: Rollback on Failure
    runs-on: ubuntu-latest
    needs: [detect-environment, deploy-ecs, health-check]
    environment: ${{ needs.detect-environment.outputs.environment }}
    if: failure() && needs.health-check.result == 'failure'
    strategy:
      fail-fast: false
      matrix:
        service: [api, darkwatch, spamshield, voiceprint]
    steps:
      - name: Configure AWS
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: us-east-1
      - name: Rollback ECS Service
        run: |
          CLUSTER="shieldai-${{ needs.detect-environment.outputs.environment }}"
          SERVICE="${{ matrix.service }}"

          aws ecs update-service \
            --cluster "$CLUSTER" \
            --service "${CLUSTER}-${SERVICE}" \
            --rollback \
            --no-cli-auto-prompt

          echo "Rolled back $SERVICE"