From: csagan5 <32685696+csagan5@users.noreply.github.com> Date: Mon, 12 Feb 2018 21:36:33 +0100 Subject: ungoogled-chromium: Disable Google host detection Disables various detections of Google hosts and functionality specific to them --- components/google/core/common/google_util.cc | 218 +----------------- .../browser/page_load_metrics_util.cc | 4 +- .../common/page_load_metrics_util.cc | 33 +-- components/search_engines/template_url.cc | 6 +- net/base/url_util.cc | 21 -- 5 files changed, 9 insertions(+), 273 deletions(-) diff --git a/components/google/core/common/google_util.cc b/components/google/core/common/google_util.cc --- a/components/google/core/common/google_util.cc +++ b/components/google/core/common/google_util.cc @@ -29,129 +29,10 @@ namespace google_util { -// Helpers -------------------------------------------------------------------- - -namespace { - -bool IsPathHomePageBase(base::StringPiece path) { - return (path == "/") || (path == "/webhp"); -} - -// Removes a single trailing dot if present in |host|. -void StripTrailingDot(base::StringPiece* host) { - if (base::EndsWith(*host, ".")) - host->remove_suffix(1); -} - -// True if the given canonical |host| is "[www.]." -// with a valid TLD that appears in |allowed_tlds|. If |subdomain_permission| is -// ALLOW_SUBDOMAIN, we check against host "*.." -// instead. -bool IsValidHostName(base::StringPiece host, - base::StringPiece domain_in_lower_case, - SubdomainPermission subdomain_permission, - const base::flat_set& allowed_tlds) { - // Fast path to avoid searching the registry set. - if (host.find(domain_in_lower_case) == base::StringPiece::npos) - return false; - - size_t tld_length = - net::registry_controlled_domains::GetCanonicalHostRegistryLength( - host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, - net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); - if ((tld_length == 0) || (tld_length == std::string::npos)) - return false; - - // Removes the tld and the preceding dot. - base::StringPiece host_minus_tld = - host.substr(0, host.length() - tld_length - 1); - - base::StringPiece tld = host.substr(host.length() - tld_length); - // Remove the trailing dot from tld if present, as for Google domains it's the - // same page. - StripTrailingDot(&tld); - if (!allowed_tlds.contains(tld)) - return false; - - if (base::LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case)) - return true; - - if (subdomain_permission == ALLOW_SUBDOMAIN) { - std::string dot_domain = base::StrCat({".", domain_in_lower_case}); - return base::EndsWith(host_minus_tld, dot_domain, - base::CompareCase::INSENSITIVE_ASCII); - } - - std::string www_domain = base::StrCat({"www.", domain_in_lower_case}); - return base::LowerCaseEqualsASCII(host_minus_tld, www_domain); -} - -// True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission| -// is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard -// port for its scheme (80 for HTTP, 443 for HTTPS). -bool IsValidURL(const GURL& url, PortPermission port_permission) { - static bool g_ignore_port_numbers = - base::CommandLine::ForCurrentProcess()->HasSwitch( - switches::kIgnoreGooglePortNumbers); - return url.is_valid() && url.SchemeIsHTTPOrHTTPS() && - (url.port().empty() || g_ignore_port_numbers || - (port_permission == ALLOW_NON_STANDARD_PORTS)); -} - -bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host, - SubdomainPermission subdomain_permission) { - const GURL& base_url(CommandLineGoogleBaseURL()); - if (base_url.is_valid() && (canonical_host == base_url.host_piece())) - return true; - - static const base::NoDestructor> - google_tlds(std::initializer_list({GOOGLE_TLD_LIST})); - return IsValidHostName(canonical_host, "google", subdomain_permission, - *google_tlds); -} - -bool IsCanonicalHostYoutubeHostname(base::StringPiece canonical_host, - SubdomainPermission subdomain_permission) { - static const base::NoDestructor> - youtube_tlds( - std::initializer_list({YOUTUBE_TLD_LIST})); - return IsValidHostName(canonical_host, "youtube", subdomain_permission, - *youtube_tlds); -} - -// True if |url| is a valid URL with a host that is in the static list of -// Google subdomains for google search, and an HTTP or HTTPS scheme. Requires -// |url| to use the standard port for its scheme (80 for HTTP, 443 for HTTPS). -bool IsGoogleSearchSubdomainUrl(const GURL& url) { - if (!IsValidURL(url, PortPermission::DISALLOW_NON_STANDARD_PORTS)) - return false; - - base::StringPiece host(url.host_piece()); - StripTrailingDot(&host); - - static const base::NoDestructor> - google_subdomains(std::initializer_list( - {"ipv4.google.com", "ipv6.google.com"})); - - return google_subdomains->contains(host); -} - -} // namespace - // Global functions ----------------------------------------------------------- const char kGoogleHomepageURL[] = "https://www.google.com/"; -bool HasGoogleSearchQueryParam(base::StringPiece str) { - url::Component query(0, static_cast(str.length())), key, value; - while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) { - base::StringPiece key_str = str.substr(key.begin, key.len); - if (key_str == "q" || key_str == "as_q") - return true; - } - return false; -} - std::string GetGoogleLocale(const std::string& application_locale) { // Google does not recognize "nb" for Norwegian Bokmal; it uses "no". return (application_locale == "nb") ? "no" : application_locale; @@ -164,25 +45,7 @@ GURL AppendGoogleLocaleParam(const GURL& url, } std::string GetGoogleCountryCode(const GURL& google_homepage_url) { - base::StringPiece google_hostname = google_homepage_url.host_piece(); - // TODO(igorcov): This needs a fix for case when the host has a trailing dot, - // like "google.com./". https://crbug.com/720295. - const size_t last_dot = google_hostname.find_last_of('.'); - if (last_dot == std::string::npos) - return std::string(); - base::StringPiece country_code = google_hostname.substr(last_dot + 1); - // Assume the com TLD implies the US. - if (country_code == "com") - return "us"; - // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR - // code for the UK is "gb". - if (country_code == "uk") - return "gb"; - // Catalonia does not have a CLDR country code, since it's a region in Spain, - // so use Spain instead. - if (country_code == "cat") - return "es"; - return country_code.as_string(); + return std::string(); } GURL GetGoogleSearchURL(const GURL& google_homepage_url) { @@ -221,99 +84,30 @@ bool StartsWithCommandLineGoogleBaseURL(const GURL& url) { bool IsGoogleHostname(base::StringPiece host, SubdomainPermission subdomain_permission) { - url::CanonHostInfo host_info; - return IsCanonicalHostGoogleHostname(net::CanonicalizeHost(host, &host_info), - subdomain_permission); + return false; } bool IsGoogleDomainUrl(const GURL& url, SubdomainPermission subdomain_permission, PortPermission port_permission) { - return IsValidURL(url, port_permission) && - IsCanonicalHostGoogleHostname(url.host_piece(), subdomain_permission); + return false; } bool IsGoogleHomePageUrl(const GURL& url) { - // First check to see if this has a Google domain. - if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, - DISALLOW_NON_STANDARD_PORTS) && - !IsGoogleSearchSubdomainUrl(url)) { - return false; - } - - // Make sure the path is a known home page path. - base::StringPiece path(url.path_piece()); - return IsPathHomePageBase(path) || - base::StartsWith(path, "/ig", base::CompareCase::INSENSITIVE_ASCII); + return false; } bool IsGoogleSearchUrl(const GURL& url) { - // First check to see if this has a Google domain. - if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, - DISALLOW_NON_STANDARD_PORTS) && - !IsGoogleSearchSubdomainUrl(url)) { - return false; - } - - // Make sure the path is a known search path. - base::StringPiece path(url.path_piece()); - bool is_home_page_base = IsPathHomePageBase(path); - if (!is_home_page_base && (path != "/search")) - return false; - - // Check for query parameter in URL parameter and hash fragment, depending on - // the path type. - return HasGoogleSearchQueryParam(url.ref_piece()) || - (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece())); + return false; } bool IsYoutubeDomainUrl(const GURL& url, SubdomainPermission subdomain_permission, PortPermission port_permission) { - return IsValidURL(url, port_permission) && - IsCanonicalHostYoutubeHostname(url.host_piece(), subdomain_permission); + return false; } bool IsGoogleAssociatedDomainUrl(const GURL& url) { - if (IsGoogleDomainUrl(url, ALLOW_SUBDOMAIN, ALLOW_NON_STANDARD_PORTS)) - return true; - - if (IsYoutubeDomainUrl(url, ALLOW_SUBDOMAIN, ALLOW_NON_STANDARD_PORTS)) - return true; - - // Some domains don't have international TLD extensions, so testing for them - // is very straightforward. - static const char* kSuffixesToSetHeadersFor[] = { - ".android.com", - ".doubleclick.com", - ".doubleclick.net", - ".ggpht.com", - ".googleadservices.com", - ".googleapis.com", - ".googlesyndication.com", - ".googleusercontent.com", - ".googlevideo.com", - ".gstatic.com", - ".litepages.googlezip.net", - ".ytimg.com", - }; - const std::string host = url.host(); - for (size_t i = 0; i < base::size(kSuffixesToSetHeadersFor); ++i) { - if (base::EndsWith(host, kSuffixesToSetHeadersFor[i], - base::CompareCase::INSENSITIVE_ASCII)) { - return true; - } - } - - // Exact hostnames in lowercase to set headers for. - static const char* kHostsToSetHeadersFor[] = { - "googleweblight.com", - }; - for (size_t i = 0; i < base::size(kHostsToSetHeadersFor); ++i) { - if (base::LowerCaseEqualsASCII(host, kHostsToSetHeadersFor[i])) - return true; - } - return false; } diff --git a/components/page_load_metrics/browser/page_load_metrics_util.cc b/components/page_load_metrics/browser/page_load_metrics_util.cc --- a/components/page_load_metrics/browser/page_load_metrics_util.cc +++ b/components/page_load_metrics/browser/page_load_metrics_util.cc @@ -188,9 +188,7 @@ bool DidObserveLoadingBehaviorInAnyFrame( } bool IsGoogleSearchHostname(const GURL& url) { - base::Optional result = - page_load_metrics::GetGoogleHostnamePrefix(url); - return result && result.value() == "www"; + return false; } bool IsGoogleSearchResultUrl(const GURL& url) { diff --git a/components/page_load_metrics/common/page_load_metrics_util.cc b/components/page_load_metrics/common/page_load_metrics_util.cc --- a/components/page_load_metrics/common/page_load_metrics_util.cc +++ b/components/page_load_metrics/common/page_load_metrics_util.cc @@ -12,38 +12,7 @@ namespace page_load_metrics { base::Optional GetGoogleHostnamePrefix(const GURL& url) { - const size_t registry_length = - net::registry_controlled_domains::GetRegistryLength( - url, - - // Do not include unknown registries (registries that don't have any - // matches in effective TLD names). - net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, - - // Do not include private registries, such as appspot.com. We don't - // want to match URLs like www.google.appspot.com. - net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); - - const base::StringPiece hostname = url.host_piece(); - if (registry_length == 0 || registry_length == std::string::npos || - registry_length >= hostname.length()) { - return base::Optional(); - } - - // Removes the tld and the preceding dot. - const base::StringPiece hostname_minus_registry = - hostname.substr(0, hostname.length() - (registry_length + 1)); - - if (hostname_minus_registry == "google") - return std::string(""); - - if (!base::EndsWith(hostname_minus_registry, ".google", - base::CompareCase::INSENSITIVE_ASCII)) { - return base::Optional(); - } - - return std::string(hostname_minus_registry.substr( - 0, hostname_minus_registry.length() - strlen(".google"))); + return base::Optional(); } bool IsGoogleHostname(const GURL& url) { diff --git a/components/search_engines/template_url.cc b/components/search_engines/template_url.cc --- a/components/search_engines/template_url.cc +++ b/components/search_engines/template_url.cc @@ -513,11 +513,7 @@ base::string16 TemplateURLRef::SearchTermToString16( bool TemplateURLRef::HasGoogleBaseURLs( const SearchTermsData& search_terms_data) const { ParseIfNecessary(search_terms_data); - return std::any_of(replacements_.begin(), replacements_.end(), - [](const Replacement& replacement) { - return replacement.type == GOOGLE_BASE_URL || - replacement.type == GOOGLE_BASE_SUGGEST_URL; - }); + return false; } bool TemplateURLRef::ExtractSearchTermsFromURL( diff --git a/net/base/url_util.cc b/net/base/url_util.cc --- a/net/base/url_util.cc +++ b/net/base/url_util.cc @@ -415,27 +415,6 @@ bool HasGoogleHost(const GURL& url) { } bool IsGoogleHost(base::StringPiece host) { - static const char* kGoogleHostSuffixes[] = { - ".google.com", - ".youtube.com", - ".gmail.com", - ".doubleclick.net", - ".gstatic.com", - ".googlevideo.com", - ".googleusercontent.com", - ".googlesyndication.com", - ".google-analytics.com", - ".googleadservices.com", - ".googleapis.com", - ".ytimg.com", - }; - for (const char* suffix : kGoogleHostSuffixes) { - // Here it's possible to get away with faster case-sensitive comparisons - // because the list above is all lowercase, and a GURL's host name will - // always be canonicalized to lowercase as well. - if (base::EndsWith(host, suffix)) - return true; - } return false; } -- 2.17.1