health-check.sh 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. #!/bin/bash
  2. # Health check script for apt-mirror2 container
  3. # This script monitors the health of nginx and apt-mirror2 services
  4. HEALTH_LOG="/var/log/health-check.log"
  5. NGINX_PID_FILE="/var/run/nginx.pid"
  6. MIRROR_LOCK_FILE="/var/run/apt-mirror.lock"
  7. HEALTH_STATUS_FILE="/var/run/health.status"
  8. # Function to log health check messages
  9. log_health() {
  10. echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$HEALTH_LOG"
  11. }
  12. # Function to check nginx status
  13. check_nginx() {
  14. if [ -f "$NGINX_PID_FILE" ]; then
  15. local pid=$(cat "$NGINX_PID_FILE")
  16. if kill -0 "$pid" 2>/dev/null; then
  17. # Check if nginx is listening on port 80
  18. if netstat -tlnp 2>/dev/null | grep -q ":80.*nginx" || ss -tlnp 2>/dev/null | grep -q ":80.*nginx"; then
  19. echo "nginx:running:$pid"
  20. return 0
  21. else
  22. echo "nginx:not_listening:$pid"
  23. return 1
  24. fi
  25. else
  26. echo "nginx:not_running:"
  27. return 1
  28. fi
  29. else
  30. echo "nginx:no_pid_file:"
  31. return 1
  32. fi
  33. }
  34. # Function to check apt-mirror2 status
  35. check_mirror() {
  36. if [ -f "$MIRROR_LOCK_FILE" ]; then
  37. local pid=$(cat "$MIRROR_LOCK_FILE")
  38. if kill -0 "$pid" 2>/dev/null; then
  39. echo "mirror:syncing:$pid"
  40. return 0
  41. else
  42. echo "mirror:stale_lock:$pid"
  43. return 1
  44. fi
  45. else
  46. echo "mirror:idle:"
  47. return 0
  48. fi
  49. }
  50. # Function to check disk usage
  51. check_disk() {
  52. local usage=$(df /var/spool/apt-mirror | tail -1 | awk '{print $5}' | sed 's/%//')
  53. echo "disk:$usage%"
  54. if [ "$usage" -gt 90 ]; then
  55. return 1
  56. fi
  57. return 0
  58. }
  59. # Function to check last sync time
  60. check_last_sync() {
  61. if [ -f "/var/spool/apt-mirror/last-sync.txt" ]; then
  62. local last_sync=$(cat "/var/spool/apt-mirror/last-sync.txt")
  63. echo "last_sync:$last_sync"
  64. else
  65. echo "last_sync:never"
  66. fi
  67. }
  68. # Function to get system uptime
  69. get_uptime() {
  70. local uptime=$(uptime -p 2>/dev/null || echo "unknown")
  71. echo "uptime:$uptime"
  72. }
  73. # Function to get memory usage
  74. get_memory() {
  75. local mem_info=$(free -m | grep Mem)
  76. local total=$(echo $mem_info | awk '{print $2}')
  77. local used=$(echo $mem_info | awk '{print $3}')
  78. local usage=$((used * 100 / total))
  79. echo "memory:${usage}%"
  80. }
  81. # Function to perform comprehensive health check
  82. do_health_check() {
  83. local status="healthy"
  84. local details=()
  85. # Check nginx
  86. local nginx_status=$(check_nginx)
  87. details+=("nginx:$nginx_status")
  88. if ! echo "$nginx_status" | grep -q "running"; then
  89. status="unhealthy"
  90. fi
  91. # Check mirror
  92. local mirror_status=$(check_mirror)
  93. details+=("mirror:$mirror_status")
  94. # Check disk
  95. local disk_status=$(check_disk)
  96. details+=("disk:$disk_status")
  97. if echo "$disk_status" | grep -q "9[0-9]%\|100%"; then
  98. status="warning"
  99. fi
  100. # Get additional info
  101. details+=("$(check_last_sync)")
  102. details+=("$(get_uptime)")
  103. details+=("$(get_memory)")
  104. # Write status to file
  105. cat > "$HEALTH_STATUS_FILE" << EOF
  106. {
  107. "status": "$status",
  108. "timestamp": "$(date -Iseconds)",
  109. "details": {
  110. $(printf '%s\n' "${details[@]}" | sed 's/:/": "/; s/$/"/; s/^/ "/; s/:/": "/')
  111. }
  112. }
  113. EOF
  114. echo "$status"
  115. }
  116. # Function to run continuous monitoring
  117. run_monitoring() {
  118. log_health "Starting health monitoring"
  119. while true; do
  120. local health_status=$(do_health_check)
  121. log_health "Health check result: $health_status"
  122. # Sleep for 30 seconds before next check
  123. sleep 30
  124. done
  125. }
  126. # Function to run single health check
  127. run_once() {
  128. local health_status=$(do_health_check)
  129. echo "$health_status"
  130. if [ "$health_status" = "healthy" ]; then
  131. exit 0
  132. else
  133. exit 1
  134. fi
  135. }
  136. # Function to get detailed status
  137. get_status() {
  138. if [ -f "$HEALTH_STATUS_FILE" ]; then
  139. cat "$HEALTH_STATUS_FILE"
  140. else
  141. echo '{"status": "unknown", "timestamp": "", "details": {}}'
  142. fi
  143. }
  144. # Main execution
  145. case "${1:-once}" in
  146. "once")
  147. run_once
  148. ;;
  149. "monitor")
  150. run_monitoring
  151. ;;
  152. "status")
  153. get_status
  154. ;;
  155. *)
  156. echo "Usage: $0 {once|monitor|status}"
  157. echo " once - Run health check once and exit"
  158. echo " monitor - Run continuous health monitoring"
  159. echo " status - Get detailed status information"
  160. exit 1
  161. ;;
  162. esac