Exclude more spam sites
This commit is contained in:
parent
78a9bfbb11
commit
4917b882d2
2 changed files with 2 additions and 1 deletions
|
@ -32,7 +32,7 @@ SCORE_FOR_SAME_DOMAIN = 0.01
|
||||||
EXTRA_LINK_MULTIPLIER = 0.001
|
EXTRA_LINK_MULTIPLIER = 0.001
|
||||||
UNKNOWN_DOMAIN_MULTIPLIER = 0.001
|
UNKNOWN_DOMAIN_MULTIPLIER = 0.001
|
||||||
EXCLUDED_DOMAINS = {'web.archive.org', 'forums.giantitp.com', 'www.crutchfield.com', 'plus.google.com'}
|
EXCLUDED_DOMAINS = {'web.archive.org', 'forums.giantitp.com', 'www.crutchfield.com', 'plus.google.com'}
|
||||||
DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx")
|
DOMAIN_BLACKLIST_REGEX = re.compile(r"porn|xxx|jksu\.org|lwhyl\.org$|rgcd\.cn$|hzqwyou\.cn$")
|
||||||
CORE_DOMAINS = {
|
CORE_DOMAINS = {
|
||||||
'github.com',
|
'github.com',
|
||||||
'en.wikipedia.org',
|
'en.wikipedia.org',
|
||||||
|
|
|
@ -6,6 +6,7 @@ def test_blacklist_excludes_bad_pattern():
|
||||||
"brofqpxj.uelinc.com",
|
"brofqpxj.uelinc.com",
|
||||||
"gzsmjc.fba01.com",
|
"gzsmjc.fba01.com",
|
||||||
"59648.etnomurcia.com",
|
"59648.etnomurcia.com",
|
||||||
|
"something.hzqwyou.cn",
|
||||||
]
|
]
|
||||||
|
|
||||||
for domain in bad_domains:
|
for domain in bad_domains:
|
||||||
|
|
Loading…
Reference in a new issue