13. Operation Runbook

Day-to-day operational procedures for MILU2 Infrastructure.

Daily Health Check

ItemCommandExpected
ECS Servicesaws ecs describe-services --cluster milu2-test-clusterACTIVE, runningCount = desiredCount
ALB Targetsaws elbv2 describe-target-healthhealthy
RDS Statusaws rds describe-db-clustersavailable
CloudFrontaws cloudfront list-distributionsDeployed

Scaling Operations

Manual Fargate Scaling

# Scale API service
aws ecs update-service \
  --cluster milu2-test-cluster \
  --service milu2-test-api-service \
  --desired-count 4

# Scale Web service
aws ecs update-service \
  --cluster milu2-test-cluster \
  --service milu2-test-web-service \
  --desired-count 4

Scaling Game Nodes

# Invoke scale Lambda
aws lambda invoke \
  --function-name milu2-test-scale-node-service \
  --payload '{"action": "scale", "server": "world", "channel": "CH2", "desired": 1}' \
  output.json

cat output.json

Database Operations

Connecting to RDS

# Get bastion IP and key
make output-test | grep bastion

# SSH to bastion
ssh -i bastion_key.pem ec2-user@<bastion-ip>

# Connect to RDS
mysql -h milu2-test-db.cluster-xxx.ap-northeast-1.rds.amazonaws.com \
      -P 3307 \
      -u admin \
      -p

Creating Database Backup

# Manual snapshot
aws rds create-db-cluster-snapshot \
  --db-cluster-identifier milu2-test-db \
  --db-cluster-snapshot-identifier milu2-test-db-manual-$(date +%Y%m%d)

ECS Container Operations

Exec into Container

# API container
make ecs-exec-api-test

# Or directly
aws ecs execute-command \
  --cluster milu2-test-cluster \
  --task <task-id> \
  --container milu2-test-api-php \
  --interactive \
  --command "/bin/sh"

Viewing Container Logs

# Recent logs
aws logs tail app/ecs/milu2-test-api-php --since 1h

# Follow logs
aws logs tail app/ecs/milu2-test-api-php --follow

Force Service Restart

# Force new deployment
aws ecs update-service \
  --cluster milu2-test-cluster \
  --service milu2-test-api-service \
  --force-new-deployment

CodeDeploy Operations

# Listing Deployments
aws deploy list-deployments \
  --application-name milu2-test \
  --deployment-group-name api \
  --include-only-statuses InProgress

# Approving Deployment
aws deploy continue-deployment \
  --deployment-id d-XXXXXXXXX \
  --deployment-wait-type READY_WAIT

# Rolling Back Deployment
aws deploy stop-deployment \
  --deployment-id d-XXXXXXXXX \
  --auto-rollback-enabled

Quick Reference Card

OperationCommand
Check ECSaws ecs describe-services --cluster milu2-test-cluster --services <svc>
Scale ECSaws ecs update-service --cluster <cluster> --service <svc> --desired-count <n>
Restart ECSaws ecs update-service --cluster <cluster> --service <svc> --force-new-deployment
View Logsaws logs tail <log-group> --since 1h
Exec Shellmake ecs-exec-api-test
Check RDSaws rds describe-db-clusters --db-cluster-identifier milu2-test-db
Invalidate CFaws cloudfront create-invalidation --distribution-id <id> --paths "/*"
Deploy Continueaws deploy continue-deployment --deployment-id <id>
Deploy Rollbackaws deploy stop-deployment --deployment-id <id> --auto-rollback-enabled

Emergency Procedures

Service Down - Quick Recovery

# 1. Check service status
aws ecs describe-services --cluster milu2-test-cluster --services milu2-test-api-service

# 2. Force new deployment
aws ecs update-service \
  --cluster milu2-test-cluster \
  --service milu2-test-api-service \
  --force-new-deployment

# 3. Monitor events
aws ecs describe-services \
  --cluster milu2-test-cluster \
  --services milu2-test-api-service \
  --query 'services[0].events[:5]'