Fix 6 P1 infrastructure issues from code review (FRE-4574)

- ALB: deploy to public subnets instead of private (adds public_subnet_ids var)
- ECS: fix launch_desired_count → launch_type = FARGATE
- Secrets: accept actual RDS/ElastiCache endpoints from parent module
- Deploy: fix circular dependency (needs.detect → steps.detect)
- Health check: dynamic ALB DNS lookup via aws elbv2 CLI
- Health check: exit 1 on failure so rollback triggers

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
2026-05-10 02:28:48 -04:00
parent c7df40ac26
commit 4ddd24fd72
4 changed files with 62 additions and 35 deletions

View File

@@ -33,7 +33,7 @@ jobs:
- name: Calculate tag
id: tag
run: |
if [ "${{ needs.detect-environment.outputs.environment }}" = "production" ]; then
if [ "${{ steps.detect.outputs.environment }}" = "production" ]; then
echo "tag=${{ github.event.release.tag_name }}" >> $GITHUB_OUTPUT
else
echo "tag=${{ github.sha }}" >> $GITHUB_OUTPUT
@@ -169,36 +169,47 @@ jobs:
needs: [detect-environment, deploy-ecs]
environment: ${{ needs.detect-environment.outputs.environment }}
steps:
- name: Configure AWS
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Wait for deployment
run: sleep 30
- name: Health Check
uses: jasongd/retry-action@v2
with:
timeout-minutes: 5
retry-minutes: 10
command: |
ALB_DNS=$(aws ecs describe-services \
--cluster "shieldai-${{ needs.detect-environment.outputs.environment }}" \
--services "shieldai-${{ needs.detect-environment.outputs.environment }}-api" \
--query 'services[0].loadBalancers[0].targetGroupArn' --output text)
id: health
run: |
ENV="${{ needs.detect-environment.outputs.environment }}"
CLUSTER="shieldai-${ENV}"
for service in api darkwatch spamshield voiceprint; do
PORT=$(case $service in
api) echo 3000;;
darkwatch) echo 3001;;
spamshield) echo 3002;;
voiceprint) echo 3003;;
esac)
ALB_DNS=$(aws elbv2 describe-load-balancers \
--query "LoadBalancers[?contains(LoadBalancerName, '${CLUSTER}-alb')].DNSName" \
--output text)
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
"https://shieldai-${{ needs.detect-environment.outputs.environment }}-alb.us-east-1.elb.amazonaws.com/health" || true)
if [ -z "$ALB_DNS" ]; then
echo "Health check failed: ALB DNS not found"
exit 1
fi
if [ "$HTTP_CODE" = "200" ]; then
echo "Health check passed: $service"
else
echo "Health check failed: $service (HTTP $HTTP_CODE)"
fi
done
echo "ALB DNS: $ALB_DNS"
FAILED=0
for service in api darkwatch spamshield voiceprint; do
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
"http://${ALB_DNS}/health" || true)
if [ "$HTTP_CODE" = "200" ]; then
echo "Health check passed: $service"
else
echo "Health check failed: $service (HTTP $HTTP_CODE)"
FAILED=1
fi
done
if [ "$FAILED" -eq 1 ]; then
exit 1
fi
rollback:
name: Rollback on Failure