385 lines
15 KiB
Diff
385 lines
15 KiB
Diff
From: csagan5 <32685696+csagan5@users.noreply.github.com>
|
|
Date: Mon, 12 Feb 2018 21:36:33 +0100
|
|
Subject: ungoogled-chromium: Disable Google host detection
|
|
|
|
Disables various detections of Google hosts and functionality specific to them
|
|
---
|
|
components/google/core/common/google_util.cc | 218 +-----------------
|
|
.../browser/page_load_metrics_util.cc | 4 +-
|
|
.../common/page_load_metrics_util.cc | 33 +--
|
|
components/search_engines/template_url.cc | 6 +-
|
|
net/base/url_util.cc | 21 --
|
|
5 files changed, 9 insertions(+), 273 deletions(-)
|
|
|
|
diff --git a/components/google/core/common/google_util.cc b/components/google/core/common/google_util.cc
|
|
--- a/components/google/core/common/google_util.cc
|
|
+++ b/components/google/core/common/google_util.cc
|
|
@@ -29,129 +29,10 @@
|
|
|
|
namespace google_util {
|
|
|
|
-// Helpers --------------------------------------------------------------------
|
|
-
|
|
-namespace {
|
|
-
|
|
-bool IsPathHomePageBase(base::StringPiece path) {
|
|
- return (path == "/") || (path == "/webhp");
|
|
-}
|
|
-
|
|
-// Removes a single trailing dot if present in |host|.
|
|
-void StripTrailingDot(base::StringPiece* host) {
|
|
- if (base::EndsWith(*host, "."))
|
|
- host->remove_suffix(1);
|
|
-}
|
|
-
|
|
-// True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>"
|
|
-// with a valid TLD that appears in |allowed_tlds|. If |subdomain_permission| is
|
|
-// ALLOW_SUBDOMAIN, we check against host "*.<domain_in_lower_case>.<TLD>"
|
|
-// instead.
|
|
-bool IsValidHostName(base::StringPiece host,
|
|
- base::StringPiece domain_in_lower_case,
|
|
- SubdomainPermission subdomain_permission,
|
|
- const base::flat_set<base::StringPiece>& allowed_tlds) {
|
|
- // Fast path to avoid searching the registry set.
|
|
- if (host.find(domain_in_lower_case) == base::StringPiece::npos)
|
|
- return false;
|
|
-
|
|
- size_t tld_length =
|
|
- net::registry_controlled_domains::GetCanonicalHostRegistryLength(
|
|
- host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
|
|
- net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
|
|
- if ((tld_length == 0) || (tld_length == std::string::npos))
|
|
- return false;
|
|
-
|
|
- // Removes the tld and the preceding dot.
|
|
- base::StringPiece host_minus_tld =
|
|
- host.substr(0, host.length() - tld_length - 1);
|
|
-
|
|
- base::StringPiece tld = host.substr(host.length() - tld_length);
|
|
- // Remove the trailing dot from tld if present, as for Google domains it's the
|
|
- // same page.
|
|
- StripTrailingDot(&tld);
|
|
- if (!allowed_tlds.contains(tld))
|
|
- return false;
|
|
-
|
|
- if (base::LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case))
|
|
- return true;
|
|
-
|
|
- if (subdomain_permission == ALLOW_SUBDOMAIN) {
|
|
- std::string dot_domain = base::StrCat({".", domain_in_lower_case});
|
|
- return base::EndsWith(host_minus_tld, dot_domain,
|
|
- base::CompareCase::INSENSITIVE_ASCII);
|
|
- }
|
|
-
|
|
- std::string www_domain = base::StrCat({"www.", domain_in_lower_case});
|
|
- return base::LowerCaseEqualsASCII(host_minus_tld, www_domain);
|
|
-}
|
|
-
|
|
-// True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission|
|
|
-// is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard
|
|
-// port for its scheme (80 for HTTP, 443 for HTTPS).
|
|
-bool IsValidURL(const GURL& url, PortPermission port_permission) {
|
|
- static bool g_ignore_port_numbers =
|
|
- base::CommandLine::ForCurrentProcess()->HasSwitch(
|
|
- switches::kIgnoreGooglePortNumbers);
|
|
- return url.is_valid() && url.SchemeIsHTTPOrHTTPS() &&
|
|
- (url.port().empty() || g_ignore_port_numbers ||
|
|
- (port_permission == ALLOW_NON_STANDARD_PORTS));
|
|
-}
|
|
-
|
|
-bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host,
|
|
- SubdomainPermission subdomain_permission) {
|
|
- const GURL& base_url(CommandLineGoogleBaseURL());
|
|
- if (base_url.is_valid() && (canonical_host == base_url.host_piece()))
|
|
- return true;
|
|
-
|
|
- static const base::NoDestructor<base::flat_set<base::StringPiece>>
|
|
- google_tlds(std::initializer_list<base::StringPiece>({GOOGLE_TLD_LIST}));
|
|
- return IsValidHostName(canonical_host, "google", subdomain_permission,
|
|
- *google_tlds);
|
|
-}
|
|
-
|
|
-bool IsCanonicalHostYoutubeHostname(base::StringPiece canonical_host,
|
|
- SubdomainPermission subdomain_permission) {
|
|
- static const base::NoDestructor<base::flat_set<base::StringPiece>>
|
|
- youtube_tlds(
|
|
- std::initializer_list<base::StringPiece>({YOUTUBE_TLD_LIST}));
|
|
- return IsValidHostName(canonical_host, "youtube", subdomain_permission,
|
|
- *youtube_tlds);
|
|
-}
|
|
-
|
|
-// True if |url| is a valid URL with a host that is in the static list of
|
|
-// Google subdomains for google search, and an HTTP or HTTPS scheme. Requires
|
|
-// |url| to use the standard port for its scheme (80 for HTTP, 443 for HTTPS).
|
|
-bool IsGoogleSearchSubdomainUrl(const GURL& url) {
|
|
- if (!IsValidURL(url, PortPermission::DISALLOW_NON_STANDARD_PORTS))
|
|
- return false;
|
|
-
|
|
- base::StringPiece host(url.host_piece());
|
|
- StripTrailingDot(&host);
|
|
-
|
|
- static const base::NoDestructor<base::flat_set<base::StringPiece>>
|
|
- google_subdomains(std::initializer_list<base::StringPiece>(
|
|
- {"ipv4.google.com", "ipv6.google.com"}));
|
|
-
|
|
- return google_subdomains->contains(host);
|
|
-}
|
|
-
|
|
-} // namespace
|
|
-
|
|
// Global functions -----------------------------------------------------------
|
|
|
|
const char kGoogleHomepageURL[] = "https://www.google.com/";
|
|
|
|
-bool HasGoogleSearchQueryParam(base::StringPiece str) {
|
|
- url::Component query(0, static_cast<int>(str.length())), key, value;
|
|
- while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) {
|
|
- base::StringPiece key_str = str.substr(key.begin, key.len);
|
|
- if (key_str == "q" || key_str == "as_q")
|
|
- return true;
|
|
- }
|
|
- return false;
|
|
-}
|
|
-
|
|
std::string GetGoogleLocale(const std::string& application_locale) {
|
|
// Google does not recognize "nb" for Norwegian Bokmal; it uses "no".
|
|
return (application_locale == "nb") ? "no" : application_locale;
|
|
@@ -164,25 +45,7 @@ GURL AppendGoogleLocaleParam(const GURL& url,
|
|
}
|
|
|
|
std::string GetGoogleCountryCode(const GURL& google_homepage_url) {
|
|
- base::StringPiece google_hostname = google_homepage_url.host_piece();
|
|
- // TODO(igorcov): This needs a fix for case when the host has a trailing dot,
|
|
- // like "google.com./". https://crbug.com/720295.
|
|
- const size_t last_dot = google_hostname.find_last_of('.');
|
|
- if (last_dot == std::string::npos)
|
|
- return std::string();
|
|
- base::StringPiece country_code = google_hostname.substr(last_dot + 1);
|
|
- // Assume the com TLD implies the US.
|
|
- if (country_code == "com")
|
|
- return "us";
|
|
- // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR
|
|
- // code for the UK is "gb".
|
|
- if (country_code == "uk")
|
|
- return "gb";
|
|
- // Catalonia does not have a CLDR country code, since it's a region in Spain,
|
|
- // so use Spain instead.
|
|
- if (country_code == "cat")
|
|
- return "es";
|
|
- return country_code.as_string();
|
|
+ return std::string();
|
|
}
|
|
|
|
GURL GetGoogleSearchURL(const GURL& google_homepage_url) {
|
|
@@ -221,99 +84,30 @@ bool StartsWithCommandLineGoogleBaseURL(const GURL& url) {
|
|
|
|
bool IsGoogleHostname(base::StringPiece host,
|
|
SubdomainPermission subdomain_permission) {
|
|
- url::CanonHostInfo host_info;
|
|
- return IsCanonicalHostGoogleHostname(net::CanonicalizeHost(host, &host_info),
|
|
- subdomain_permission);
|
|
+ return false;
|
|
}
|
|
|
|
bool IsGoogleDomainUrl(const GURL& url,
|
|
SubdomainPermission subdomain_permission,
|
|
PortPermission port_permission) {
|
|
- return IsValidURL(url, port_permission) &&
|
|
- IsCanonicalHostGoogleHostname(url.host_piece(), subdomain_permission);
|
|
+ return false;
|
|
}
|
|
|
|
bool IsGoogleHomePageUrl(const GURL& url) {
|
|
- // First check to see if this has a Google domain.
|
|
- if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN,
|
|
- DISALLOW_NON_STANDARD_PORTS) &&
|
|
- !IsGoogleSearchSubdomainUrl(url)) {
|
|
- return false;
|
|
- }
|
|
-
|
|
- // Make sure the path is a known home page path.
|
|
- base::StringPiece path(url.path_piece());
|
|
- return IsPathHomePageBase(path) ||
|
|
- base::StartsWith(path, "/ig", base::CompareCase::INSENSITIVE_ASCII);
|
|
+ return false;
|
|
}
|
|
|
|
bool IsGoogleSearchUrl(const GURL& url) {
|
|
- // First check to see if this has a Google domain.
|
|
- if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN,
|
|
- DISALLOW_NON_STANDARD_PORTS) &&
|
|
- !IsGoogleSearchSubdomainUrl(url)) {
|
|
- return false;
|
|
- }
|
|
-
|
|
- // Make sure the path is a known search path.
|
|
- base::StringPiece path(url.path_piece());
|
|
- bool is_home_page_base = IsPathHomePageBase(path);
|
|
- if (!is_home_page_base && (path != "/search"))
|
|
- return false;
|
|
-
|
|
- // Check for query parameter in URL parameter and hash fragment, depending on
|
|
- // the path type.
|
|
- return HasGoogleSearchQueryParam(url.ref_piece()) ||
|
|
- (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece()));
|
|
+ return false;
|
|
}
|
|
|
|
bool IsYoutubeDomainUrl(const GURL& url,
|
|
SubdomainPermission subdomain_permission,
|
|
PortPermission port_permission) {
|
|
- return IsValidURL(url, port_permission) &&
|
|
- IsCanonicalHostYoutubeHostname(url.host_piece(), subdomain_permission);
|
|
+ return false;
|
|
}
|
|
|
|
bool IsGoogleAssociatedDomainUrl(const GURL& url) {
|
|
- if (IsGoogleDomainUrl(url, ALLOW_SUBDOMAIN, ALLOW_NON_STANDARD_PORTS))
|
|
- return true;
|
|
-
|
|
- if (IsYoutubeDomainUrl(url, ALLOW_SUBDOMAIN, ALLOW_NON_STANDARD_PORTS))
|
|
- return true;
|
|
-
|
|
- // Some domains don't have international TLD extensions, so testing for them
|
|
- // is very straightforward.
|
|
- static const char* kSuffixesToSetHeadersFor[] = {
|
|
- ".android.com",
|
|
- ".doubleclick.com",
|
|
- ".doubleclick.net",
|
|
- ".ggpht.com",
|
|
- ".googleadservices.com",
|
|
- ".googleapis.com",
|
|
- ".googlesyndication.com",
|
|
- ".googleusercontent.com",
|
|
- ".googlevideo.com",
|
|
- ".gstatic.com",
|
|
- ".litepages.googlezip.net",
|
|
- ".ytimg.com",
|
|
- };
|
|
- const std::string host = url.host();
|
|
- for (size_t i = 0; i < base::size(kSuffixesToSetHeadersFor); ++i) {
|
|
- if (base::EndsWith(host, kSuffixesToSetHeadersFor[i],
|
|
- base::CompareCase::INSENSITIVE_ASCII)) {
|
|
- return true;
|
|
- }
|
|
- }
|
|
-
|
|
- // Exact hostnames in lowercase to set headers for.
|
|
- static const char* kHostsToSetHeadersFor[] = {
|
|
- "googleweblight.com",
|
|
- };
|
|
- for (size_t i = 0; i < base::size(kHostsToSetHeadersFor); ++i) {
|
|
- if (base::LowerCaseEqualsASCII(host, kHostsToSetHeadersFor[i]))
|
|
- return true;
|
|
- }
|
|
-
|
|
return false;
|
|
}
|
|
|
|
diff --git a/components/page_load_metrics/browser/page_load_metrics_util.cc b/components/page_load_metrics/browser/page_load_metrics_util.cc
|
|
--- a/components/page_load_metrics/browser/page_load_metrics_util.cc
|
|
+++ b/components/page_load_metrics/browser/page_load_metrics_util.cc
|
|
@@ -188,9 +188,7 @@ bool DidObserveLoadingBehaviorInAnyFrame(
|
|
}
|
|
|
|
bool IsGoogleSearchHostname(const GURL& url) {
|
|
- base::Optional<std::string> result =
|
|
- page_load_metrics::GetGoogleHostnamePrefix(url);
|
|
- return result && result.value() == "www";
|
|
+ return false;
|
|
}
|
|
|
|
bool IsGoogleSearchResultUrl(const GURL& url) {
|
|
diff --git a/components/page_load_metrics/common/page_load_metrics_util.cc b/components/page_load_metrics/common/page_load_metrics_util.cc
|
|
--- a/components/page_load_metrics/common/page_load_metrics_util.cc
|
|
+++ b/components/page_load_metrics/common/page_load_metrics_util.cc
|
|
@@ -12,38 +12,7 @@
|
|
namespace page_load_metrics {
|
|
|
|
base::Optional<std::string> GetGoogleHostnamePrefix(const GURL& url) {
|
|
- const size_t registry_length =
|
|
- net::registry_controlled_domains::GetRegistryLength(
|
|
- url,
|
|
-
|
|
- // Do not include unknown registries (registries that don't have any
|
|
- // matches in effective TLD names).
|
|
- net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
|
|
-
|
|
- // Do not include private registries, such as appspot.com. We don't
|
|
- // want to match URLs like www.google.appspot.com.
|
|
- net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
|
|
-
|
|
- const base::StringPiece hostname = url.host_piece();
|
|
- if (registry_length == 0 || registry_length == std::string::npos ||
|
|
- registry_length >= hostname.length()) {
|
|
- return base::Optional<std::string>();
|
|
- }
|
|
-
|
|
- // Removes the tld and the preceding dot.
|
|
- const base::StringPiece hostname_minus_registry =
|
|
- hostname.substr(0, hostname.length() - (registry_length + 1));
|
|
-
|
|
- if (hostname_minus_registry == "google")
|
|
- return std::string("");
|
|
-
|
|
- if (!base::EndsWith(hostname_minus_registry, ".google",
|
|
- base::CompareCase::INSENSITIVE_ASCII)) {
|
|
- return base::Optional<std::string>();
|
|
- }
|
|
-
|
|
- return std::string(hostname_minus_registry.substr(
|
|
- 0, hostname_minus_registry.length() - strlen(".google")));
|
|
+ return base::Optional<std::string>();
|
|
}
|
|
|
|
bool IsGoogleHostname(const GURL& url) {
|
|
diff --git a/components/search_engines/template_url.cc b/components/search_engines/template_url.cc
|
|
--- a/components/search_engines/template_url.cc
|
|
+++ b/components/search_engines/template_url.cc
|
|
@@ -513,11 +513,7 @@ base::string16 TemplateURLRef::SearchTermToString16(
|
|
bool TemplateURLRef::HasGoogleBaseURLs(
|
|
const SearchTermsData& search_terms_data) const {
|
|
ParseIfNecessary(search_terms_data);
|
|
- return std::any_of(replacements_.begin(), replacements_.end(),
|
|
- [](const Replacement& replacement) {
|
|
- return replacement.type == GOOGLE_BASE_URL ||
|
|
- replacement.type == GOOGLE_BASE_SUGGEST_URL;
|
|
- });
|
|
+ return false;
|
|
}
|
|
|
|
bool TemplateURLRef::ExtractSearchTermsFromURL(
|
|
diff --git a/net/base/url_util.cc b/net/base/url_util.cc
|
|
--- a/net/base/url_util.cc
|
|
+++ b/net/base/url_util.cc
|
|
@@ -415,27 +415,6 @@ bool HasGoogleHost(const GURL& url) {
|
|
}
|
|
|
|
bool IsGoogleHost(base::StringPiece host) {
|
|
- static const char* kGoogleHostSuffixes[] = {
|
|
- ".google.com",
|
|
- ".youtube.com",
|
|
- ".gmail.com",
|
|
- ".doubleclick.net",
|
|
- ".gstatic.com",
|
|
- ".googlevideo.com",
|
|
- ".googleusercontent.com",
|
|
- ".googlesyndication.com",
|
|
- ".google-analytics.com",
|
|
- ".googleadservices.com",
|
|
- ".googleapis.com",
|
|
- ".ytimg.com",
|
|
- };
|
|
- for (const char* suffix : kGoogleHostSuffixes) {
|
|
- // Here it's possible to get away with faster case-sensitive comparisons
|
|
- // because the list above is all lowercase, and a GURL's host name will
|
|
- // always be canonicalized to lowercase as well.
|
|
- if (base::EndsWith(host, suffix))
|
|
- return true;
|
|
- }
|
|
return false;
|
|
}
|
|
|
|
--
|
|
2.17.1
|
|
|