Add rollback procedure documentation and testing scripts (FRE-4808)
- infra/ROLLBACK.md: comprehensive rollback runbook with ECS, Docker Compose, database migration, blue-green, and emergency rollback procedures - infra/scripts/rollback.sh: enhanced ECS rollback with validation, logging, health verification, and per-service rollback support - infra/scripts/rollback-compose.sh: Docker Compose rollback for local/staging - infra/scripts/rollback-migration.sh: Drizzle migration rollback with AWS Secrets Manager integration - infra/scripts/test-rollback.sh: automated test suite (51 tests) - Updated infra/README.md to reference ROLLBACK.md Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
121
infra/scripts/rollback-compose.sh
Executable file
121
infra/scripts/rollback-compose.sh
Executable file
@@ -0,0 +1,121 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# ShieldAI Docker Compose Rollback Script
|
||||
# Usage: ./rollback-compose.sh <previous_tag> [--env prod|dev]
|
||||
#
|
||||
# Rolls back all services to a previous tagged image using docker-compose.prod.yml
|
||||
#
|
||||
# Examples:
|
||||
# ./rollback-compose.sh v1.2.3 # Rollback to v1.2.3
|
||||
# ./rollback-compose.sh v1.2.3 --env prod # Explicit production compose
|
||||
|
||||
PREVIOUS_TAG="${1:-}"
|
||||
ENV_MODE="${2:-prod}"
|
||||
|
||||
# ─── Configuration ───────────────────────────────────────────────
|
||||
SERVICES="api darkwatch spamshield voiceprint"
|
||||
COMPOSE_FILE="docker-compose.prod.yml"
|
||||
REGISTRY_OWNER="${GITHUB_REPOSITORY_OWNER:-shieldai}"
|
||||
|
||||
# ─── Helpers ─────────────────────────────────────────────────────
|
||||
log() {
|
||||
local level="$1"
|
||||
shift
|
||||
echo "[$(date -u '+%H:%M:%S')] [$level] $*"
|
||||
}
|
||||
|
||||
log_info() { log "INFO" "$@"; }
|
||||
log_warn() { log "WARN" "$@"; }
|
||||
log_error() { log "ERROR" "$@"; }
|
||||
|
||||
# ─── Validation ──────────────────────────────────────────────────
|
||||
if [[ -z "$PREVIOUS_TAG" ]]; then
|
||||
log_error "Usage: $0 <previous_tag> [--env prod|dev]"
|
||||
log_error "Example: $0 v1.2.3"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v docker &>/dev/null; then
|
||||
log_error "Docker not found in PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ─── Rollback Logic ──────────────────────────────────────────────
|
||||
main() {
|
||||
log_info "=== Docker Compose Rollback ==="
|
||||
log_info "Target tag: $PREVIOUS_TAG"
|
||||
log_info "Compose file: $COMPOSE_FILE"
|
||||
log_info "Registry: ghcr.io/$REGISTRY_OWNER"
|
||||
|
||||
# 1. Pull previous images
|
||||
log_info "Pulling previous images..."
|
||||
local pull_failed=0
|
||||
for svc in $SERVICES; do
|
||||
local image="ghcr.io/${REGISTRY_OWNER}/shieldai-${svc}:${PREVIOUS_TAG}"
|
||||
log_info "Pulling $image..."
|
||||
if docker pull "$image" 2>/dev/null; then
|
||||
log_info "Pulled: $image"
|
||||
else
|
||||
log_warn "Pull failed: $image (may not exist)"
|
||||
pull_failed=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $pull_failed -eq 1 ]]; then
|
||||
log_warn "Some images may not exist at tag $PREVIOUS_TAG"
|
||||
log_info "Continuing with available images..."
|
||||
fi
|
||||
|
||||
# 2. Stop current services gracefully
|
||||
log_info "Stopping current services..."
|
||||
DOCKER_TAG="$PREVIOUS_TAG" docker compose -f "$COMPOSE_FILE" down --timeout 30 2>/dev/null || true
|
||||
|
||||
# 3. Start with previous tag
|
||||
log_info "Starting services with tag $PREVIOUS_TAG..."
|
||||
DOCKER_TAG="$PREVIOUS_TAG" docker compose -f "$COMPOSE_FILE" up -d
|
||||
|
||||
# 4. Wait for services to be healthy
|
||||
log_info "Waiting for services to become healthy..."
|
||||
sleep 10
|
||||
|
||||
# 5. Verify health
|
||||
local passed=0
|
||||
local failed=0
|
||||
|
||||
for svc in $SERVICES; do
|
||||
local port
|
||||
port=$(case "$svc" in
|
||||
api) echo 3000 ;;
|
||||
darkwatch) echo 3001 ;;
|
||||
spamshield) echo 3002 ;;
|
||||
voiceprint) echo 3003 ;;
|
||||
esac)
|
||||
|
||||
local http_code
|
||||
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
--connect-timeout 10 --max-time 30 \
|
||||
"http://localhost:${port}/health" 2>/dev/null || echo "000")
|
||||
|
||||
if [[ "$http_code" == "200" ]]; then
|
||||
log_info "Health OK: $svc (port $port, HTTP $http_code)"
|
||||
((passed++))
|
||||
else
|
||||
log_warn "Health FAIL: $svc (port $port, HTTP $http_code)"
|
||||
((failed++))
|
||||
fi
|
||||
done
|
||||
|
||||
log_info "=== Rollback Complete ==="
|
||||
log_info "Passed: $passed, Failed: $failed"
|
||||
|
||||
if [[ $failed -gt 0 ]]; then
|
||||
log_warn "Some services failed health check. Check logs: docker compose -f $COMPOSE_FILE logs"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_info "All services healthy after rollback"
|
||||
exit 0
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user