check-secondaries.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. from datetime import timedelta
  2. from socket import gethostbyname
  3. from time import sleep
  4. from django.conf import settings
  5. from django.core.mail import get_connection, mail_admins
  6. from django.core.management import BaseCommand
  7. from django.utils import timezone
  8. import dns.exception, dns.message, dns.query, dns.rdatatype
  9. from desecapi import pdns
  10. from desecapi.models import Domain
  11. def query_serial(zone, server):
  12. """
  13. Checks a zone's serial on a server.
  14. :return: serial if received; None if the server did not know; False on error
  15. """
  16. query = dns.message.make_query(zone, 'SOA')
  17. try:
  18. response = dns.query.tcp(query, server, timeout=5)
  19. except dns.exception.Timeout:
  20. return False
  21. for rrset in response.answer:
  22. if rrset.rdtype == dns.rdatatype.SOA:
  23. return int(rrset[0].serial)
  24. return None
  25. class Command(BaseCommand):
  26. help = 'Check secondaries for consistency with nsmaster.'
  27. def __init__(self, *args, **kwargs):
  28. self.servers = {gethostbyname(server): server for server in settings.WATCHDOG_SECONDARIES}
  29. super().__init__(*args, **kwargs)
  30. def add_arguments(self, parser):
  31. parser.add_argument('domain-name', nargs='*',
  32. help='Domain name to check. If omitted, will check all recently published domains.')
  33. parser.add_argument('--delay', type=int, default=120, help='Delay SOA checks to allow pending AXFRs to finish.')
  34. parser.add_argument('--window', type=int, default=settings.WATCHDOG_WINDOW_SEC,
  35. help='Check domains that were published no longer than this many seconds ago.')
  36. def find_outdated_servers(self, zone, local_serial):
  37. """
  38. Returns a dict, the key being the outdated secondary name, and the value being the node's current zone serial.
  39. """
  40. outdated = {}
  41. for server in self.servers:
  42. remote_serial = query_serial(zone, server)
  43. if not remote_serial or remote_serial < local_serial:
  44. outdated[self.servers[server]] = remote_serial
  45. return outdated
  46. def handle(self, *args, **options):
  47. threshold = timezone.now() - timedelta(seconds=options['window'])
  48. recent_domain_names = Domain.objects.filter(published__gt=threshold).values_list('name', flat=True)
  49. serials = {zone: s for zone, s in pdns.get_serials().items() if zone.rstrip('.') in recent_domain_names}
  50. if options['domain-name']:
  51. serials = {zone: serial for zone, serial in serials.items() if zone.rstrip('.') in options['domain-name']}
  52. print('Sleeping for {} seconds before checking {} domains ...'.format(options['delay'], len(serials)))
  53. sleep(options['delay'])
  54. outdated_zone_count = 0
  55. outdated_secondaries = set()
  56. output = []
  57. timeouts = {}
  58. for zone, local_serial in serials.items():
  59. outdated_serials = self.find_outdated_servers(zone, local_serial)
  60. for server, serial in outdated_serials.items():
  61. if serial is False:
  62. timeouts.setdefault(server, [])
  63. timeouts[server].append(zone)
  64. outdated_serials = {k: serial for k, serial in outdated_serials.items() if serial is not False}
  65. if outdated_serials:
  66. outdated_secondaries.update(outdated_serials.keys())
  67. output.append(f'{zone} ({local_serial}) is outdated on {outdated_serials}')
  68. print(output[-1])
  69. outdated_zone_count += 1
  70. else:
  71. print(f'{zone} ok')
  72. output.append(f'Checked {len(serials)} domains, {outdated_zone_count} were outdated.')
  73. print(output[-1])
  74. self.report(outdated_secondaries, output, timeouts)
  75. def report(self, outdated_secondaries, output, timeouts):
  76. if not outdated_secondaries and not timeouts:
  77. return
  78. subject = f'{timeouts and "CRITICAL ALERT" or "ALERT"} {len(outdated_secondaries)} secondaries out of sync'
  79. message = ''
  80. if timeouts:
  81. message += f'The following servers had timeouts:\n\n{timeouts}\n\n'
  82. if outdated_secondaries:
  83. message += f'The following {len(outdated_secondaries)} secondaries are out of sync:\n'
  84. for outdated_secondary in outdated_secondaries:
  85. message += f'* {outdated_secondary}\n'
  86. message += '\n'
  87. message += f'Current secondary IPs: {self.servers}\n'
  88. message += '\n'.join(output)
  89. mail_admins(subject, message, connection=get_connection('django.core.mail.backends.smtp.EmailBackend'))