Auto Recovery

Configure EC2 instances with auto-recovery to automatically recover from underlying hardware failures.

module "critical_server" {
  source  = "registry.patterneddesigns.ca/patterneddesigns/ec2-instance/aws"
  version = "1.5.0"

  instance_name      = "critical-app"
  instance_type      = "t3.large"
  ami_id             = data.aws_ami.amazon_linux.id
  subnet_id          = module.vpc.private_subnets[0]
  security_group_ids = [aws_security_group.app.id]

  enable_auto_recovery = true

  monitoring = true

  tags = {
    Environment = "production"
    Criticality = "high"
  }
}

# CloudWatch alarm for auto-recovery
resource "aws_cloudwatch_metric_alarm" "auto_recovery" {
  alarm_name          = "ec2-auto-recovery-${module.critical_server.instance_id}"
  comparison_operator = "GreaterThanOrEqualToThreshold"
  evaluation_periods  = 2
  metric_name         = "StatusCheckFailed_System"
  namespace           = "AWS/EC2"
  period              = 60
  statistic           = "Maximum"
  threshold           = 1

  dimensions = {
    InstanceId = module.critical_server.instance_id
  }

  alarm_actions = [
    "arn:aws:automate:${data.aws_region.current.name}:ec2:recover"
  ]

  alarm_description = "Auto-recover EC2 instance on system status check failure"

  tags = {
    Environment = "production"
  }
}

With Instance Recovery and SNS Notification

module "monitored_server" {
  source  = "registry.patterneddesigns.ca/patterneddesigns/ec2-instance/aws"
  version = "1.5.0"

  instance_name      = "monitored-app"
  instance_type      = "m6i.large"
  ami_id             = data.aws_ami.amazon_linux.id
  subnet_id          = module.vpc.private_subnets[0]

  enable_auto_recovery = true
  monitoring           = true

  tags = {
    Environment = "production"
  }
}

resource "aws_cloudwatch_metric_alarm" "recovery_with_notification" {
  alarm_name          = "ec2-recovery-${module.monitored_server.instance_id}"
  comparison_operator = "GreaterThanOrEqualToThreshold"
  evaluation_periods  = 2
  metric_name         = "StatusCheckFailed_System"
  namespace           = "AWS/EC2"
  period              = 60
  statistic           = "Maximum"
  threshold           = 1

  dimensions = {
    InstanceId = module.monitored_server.instance_id
  }

  alarm_actions = [
    "arn:aws:automate:${data.aws_region.current.name}:ec2:recover",
    aws_sns_topic.alerts.arn
  ]

  alarm_description = "Auto-recover and notify on system failure"
}

Requirements

  • Instance must use EBS-backed storage (not instance store)
  • Instance must be in a VPC
  • Detailed monitoring recommended for faster detection

Notes

  • Auto-recovery preserves instance ID, private IP, and EBS volumes
  • Recovery migrates the instance to new hardware
  • Not supported on bare metal instances or instances with instance store volumes