소스 검색

feat(api): add watchdog

Peter Thomassen 5 년 전
부모
커밋
ad1c946dc5
7개의 변경된 파일108개의 추가작업 그리고 1개의 파일을 삭제
  1. 3 0
      .env.default
  2. 2 0
      README.md
  3. 4 0
      api/api/settings.py
  4. 2 1
      api/cronhook/crontab
  5. 95 0
      api/desecapi/management/commands/check-slaves.py
  6. 1 0
      api/requirements.txt
  7. 1 0
      docker-compose.yml

+ 3 - 0
.env.default

@@ -40,3 +40,6 @@ DESECSTACK_NSMASTER_CARBONOURNAME=
 
 # replication-manager
 DESECSTACK_REPLICATION_MANAGER_CERTS=
+
+# slave watchdog
+DESECSTACK_WATCHDOG_SLAVES=ns1.example.org ns2.example.net

+ 2 - 0
README.md

@@ -69,6 +69,8 @@ Although most configuration is contained in this repository, some external depen
       - `DESECSTACK_NSMASTER_CARBONOURNAME`: pdns `carbon-ourname` setting on nsmaster (optional)
     - replication-manager related
       - `DESECSTACK_REPLICATION_MANAGER_CERTS`: a directory where `replication-manager` (to configure slave replication) will dump the slave's TLS key and certificate
+    - watchdog-related
+      - `DESECSTACK_WATCHDOG_SLAVES`: space-separated list of slave hostnames; used to check correct replication of recent DNS changes
 
 How to Run
 ----------

+ 4 - 0
api/api/settings.py

@@ -200,6 +200,10 @@ VALIDITY_PERIOD_VERIFICATION_SIGNATURE = timedelta(hours=12)
 # CAPTCHA
 CAPTCHA_VALIDITY_PERIOD = timedelta(hours=24)
 
+# Watchdog
+WATCHDOG_SLAVES = os.environ.get('DESECSTACK_WATCHDOG_SLAVES', '').split()
+WATCHDOG_WINDOW_SEC = 600
+
 if DEBUG and not EMAIL_HOST:
     EMAIL_BACKEND = 'django.core.mail.backends.dummy.EmailBackend'
 

+ 2 - 1
api/cronhook/crontab

@@ -1 +1,2 @@
-55 2 * * * /usr/local/bin/python3 /usr/src/app/manage.py chores >> /var/log/cron.log 2>&1
+55 2 * * * /usr/local/bin/python3 -u /usr/src/app/manage.py chores >> /var/log/cron.log 2>&1
+*/5 * * * * /usr/local/bin/python3 -u /usr/src/app/manage.py check-slaves >> /var/log/cron.log 2>&1

+ 95 - 0
api/desecapi/management/commands/check-slaves.py

@@ -0,0 +1,95 @@
+from datetime import timedelta
+from socket import gethostbyname
+from time import sleep
+
+from django.conf import settings
+from django.core.mail import get_connection, mail_admins
+from django.core.management import BaseCommand
+from django.utils import timezone
+import dns.message, dns.query, dns.rdatatype
+
+from desecapi import pdns
+from desecapi.models import Domain
+
+
+def query_serial(zone, server):
+    query = dns.message.make_query(zone, 'SOA')
+    response = dns.query.tcp(query, server)
+
+    for rrset in response.answer:
+        if rrset.rdtype == dns.rdatatype.SOA:
+            return int(rrset[0].serial)
+    return None
+
+
+class Command(BaseCommand):
+    help = 'Check slaves for consistency with nsmaster.'
+
+    def __init__(self, *args, **kwargs):
+        self.servers = {gethostbyname(server): server for server in settings.WATCHDOG_SLAVES}
+        super().__init__(*args, **kwargs)
+
+    def add_arguments(self, parser):
+        parser.add_argument('domain-name', nargs='*',
+                            help='Domain name to check. If omitted, will check all recently published domains.')
+        parser.add_argument('--delay', type=int, default=60, help='Delay SOA checks to allow pending AXFRs to finish.')
+        parser.add_argument('--window', type=int, default=settings.WATCHDOG_WINDOW_SEC,
+                            help='Check domains that were published no longer than this many seconds ago.')
+
+    def find_outdated_zones(self, zone, local_serial):
+        """
+        Returns a dict, the key being the outdated slave name, and the value being the slave's current zone serial.
+        """
+        outdated = {}
+        for server in self.servers:
+            remote_serial = query_serial(zone, server)
+            if not remote_serial or remote_serial < local_serial:
+                outdated[self.servers[server]] = remote_serial
+
+        return outdated
+
+    def handle(self, *args, **options):
+        threshold = timezone.now() - timedelta(seconds=options['window'])
+        recent_domain_names = Domain.objects.filter(published__gt=threshold).values_list('name', flat=True)
+        serials = {zone: s for zone, s in pdns.get_serials().items() if zone.rstrip('.') in recent_domain_names}
+
+        if options['domain-name']:
+            serials = {zone: serial for zone, serial in serials.items() if zone.rstrip('.') in options['domain-name']}
+
+        print('Sleeping for {} seconds before checking {} domains ...'.format(options['delay'], len(serials)))
+        sleep(options['delay'])
+
+        outdated_zone_count = 0
+        outdated_slaves = set()
+
+        output = []
+        for zone, local_serial in serials.items():
+            outdated_serials = self.find_outdated_servers(zone, local_serial)
+            outdated_slaves.update(outdated_serials.keys())
+
+            if outdated_serials:
+                output.append(f'{zone} ({local_serial}) is outdated on {outdated_serials}')
+                print(output[-1])
+                outdated_zone_count += 1
+            else:
+                print(f'{zone} ok')
+
+        output.append(f'Checked {len(serials)} domains, {outdated_zone_count} were outdated.')
+        print(output[-1])
+
+        self.report(outdated_slaves, output)
+
+    def report(self, outdated_slaves, output):
+        if not outdated_slaves:
+            return
+
+        subject = f'ALERT {len(outdated_slaves)} slaves out of sync'
+        message = f'The following {len(outdated_slaves)} slaves are out of sync:\n'
+        for outdated_slave in outdated_slaves:
+            message += f'* {outdated_slave}\n'
+        message += '\n'
+        message += f'Current slave IPs: {self.servers}'
+        message += '\n'
+        message += '\n'.join(output)
+
+        mail_admins(subject, message, connection=get_connection('django.core.mail.backends.smtp.EmailBackend'))

+ 1 - 0
api/requirements.txt

@@ -6,6 +6,7 @@ Django~=3.0.0
 django-cors-headers~=3.2.0
 djangorestframework~=3.11.0
 django-celery-email~=3.0.0
+dnspython~=1.16.0
 httpretty~=0.9.0
 mysqlclient~=1.4.0
 psl-dns~=1.0

+ 1 - 0
docker-compose.yml

@@ -117,6 +117,7 @@ services:
     - DESECSTACK_NSLORD_DEFAULT_TTL
     - DESECSTACK_NSMASTER_APIKEY
     - DESECSTACK_MINIMUM_TTL_DEFAULT
+    - DESECSTACK_WATCHDOG_SLAVES
     networks:
     - rearapi_celery
     - rearapi_dbapi