From 4917b882d2939c1deb79516237c7e677560cd7a9 Mon Sep 17 00:00:00 2001 From: Daoud Clarke Date: Wed, 18 Oct 2023 17:02:45 +0100 Subject: [PATCH] Exclude more spam sites --- mwmbl/settings.py | 2 +- test/test_blacklist.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/mwmbl/settings.py b/mwmbl/settings.py index 3f7580a..4d8f5d2 100644 --- a/mwmbl/settings.py +++ b/mwmbl/settings.py @@ -32,7 +32,7 @@ SCORE_FOR_SAME_DOMAIN = 0.01 EXTRA_LINK_MULTIPLIER = 0.001 UNKNOWN_DOMAIN_MULTIPLIER = 0.001 EXCLUDED_DOMAINS = {'web.archive.org', 'forums.giantitp.com', 'www.crutchfield.com', 'plus.google.com'} -DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx") +DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx|jksu\.org|lwhyl\.org$|rgcd\.cn$|hzqwyou\.cn$") CORE_DOMAINS = { 'github.com', 'en.wikipedia.org', diff --git a/test/test_blacklist.py b/test/test_blacklist.py index b68c1ce..b98fd48 100644 --- a/test/test_blacklist.py +++ b/test/test_blacklist.py @@ -6,6 +6,7 @@ def test_blacklist_excludes_bad_pattern(): "brofqpxj.uelinc.com", "gzsmjc.fba01.com", "59648.etnomurcia.com", + "something.hzqwyou.cn", ] for domain in bad_domains: