Exclude more spam sites

This commit is contained in:
Daoud Clarke 2023-10-18 17:02:45 +01:00
parent 78a9bfbb11
commit 4917b882d2
2 changed files with 2 additions and 1 deletions

View file

@ -32,7 +32,7 @@ SCORE_FOR_SAME_DOMAIN = 0.01
EXTRA_LINK_MULTIPLIER = 0.001
UNKNOWN_DOMAIN_MULTIPLIER = 0.001
EXCLUDED_DOMAINS = {'web.archive.org', 'forums.giantitp.com', 'www.crutchfield.com', 'plus.google.com'}
DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx")
DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx|jksu\.org|lwhyl\.org$|rgcd\.cn$|hzqwyou\.cn$")
CORE_DOMAINS = {
'github.com',
'en.wikipedia.org',

View file

@ -6,6 +6,7 @@ def test_blacklist_excludes_bad_pattern():
"brofqpxj.uelinc.com",
"gzsmjc.fba01.com",
"59648.etnomurcia.com",
"something.hzqwyou.cn",
]
for domain in bad_domains: