Exclude more spam sites
This commit is contained in:
parent
78a9bfbb11
commit
4917b882d2
2 changed files with 2 additions and 1 deletions
|
@ -32,7 +32,7 @@ SCORE_FOR_SAME_DOMAIN = 0.01
|
|||
EXTRA_LINK_MULTIPLIER = 0.001
|
||||
UNKNOWN_DOMAIN_MULTIPLIER = 0.001
|
||||
EXCLUDED_DOMAINS = {'web.archive.org', 'forums.giantitp.com', 'www.crutchfield.com', 'plus.google.com'}
|
||||
DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx")
|
||||
DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx|jksu\.org|lwhyl\.org$|rgcd\.cn$|hzqwyou\.cn$")
|
||||
CORE_DOMAINS = {
|
||||
'github.com',
|
||||
'en.wikipedia.org',
|
||||
|
|
|
@ -6,6 +6,7 @@ def test_blacklist_excludes_bad_pattern():
|
|||
"brofqpxj.uelinc.com",
|
||||
"gzsmjc.fba01.com",
|
||||
"59648.etnomurcia.com",
|
||||
"something.hzqwyou.cn",
|
||||
]
|
||||
|
||||
for domain in bad_domains:
|
||||
|
|
Loading…
Reference in a new issue