Inject-scripts-for-AMP-tracking-ads-and-video-functionality.patch 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. From: csagan5 <32685696+csagan5@users.noreply.github.com>
  2. Date: Sat, 28 Oct 2017 10:09:41 +0200
  3. Subject: Inject scripts for AMP, tracking, ads and video functionality
  4. Remove AMP, tracking and ads from search/news results
  5. Break Page Visibility API and Fullscreen API for youtube.com and vimeo.com to allow playing videos in background (original Javascript code by timdream)
  6. Set proper injection script nonce
  7. Send a random key press to circumvent idle status detection
  8. ---
  9. third_party/blink/renderer/core/dom/BUILD.gn | 2 +
  10. .../blink/renderer/core/dom/document.cc | 60 ++++++++++++++++++-
  11. .../blink/renderer/core/dom/document.h | 2 +
  12. .../core/dom/extensions/anti_amp_cure.h | 6 ++
  13. .../core/dom/extensions/video_bg_play.h | 6 ++
  14. .../renderer/core/html/html_script_element.cc | 5 ++
  15. .../renderer/core/html/html_script_element.h | 1 +
  16. 7 files changed, 81 insertions(+), 1 deletion(-)
  17. create mode 100644 third_party/blink/renderer/core/dom/extensions/anti_amp_cure.h
  18. create mode 100644 third_party/blink/renderer/core/dom/extensions/video_bg_play.h
  19. diff --git a/third_party/blink/renderer/core/dom/BUILD.gn b/third_party/blink/renderer/core/dom/BUILD.gn
  20. --- a/third_party/blink/renderer/core/dom/BUILD.gn
  21. +++ b/third_party/blink/renderer/core/dom/BUILD.gn
  22. @@ -144,6 +144,8 @@ blink_core_sources("dom") {
  23. "global_event_handlers.h",
  24. "icon_url.cc",
  25. "icon_url.h",
  26. + "extensions/anti_amp_cure.h",
  27. + "extensions/video_bg_play.h",
  28. "id_target_observer.cc",
  29. "id_target_observer.h",
  30. "id_target_observer_registry.cc",
  31. diff --git a/third_party/blink/renderer/core/dom/document.cc b/third_party/blink/renderer/core/dom/document.cc
  32. --- a/third_party/blink/renderer/core/dom/document.cc
  33. +++ b/third_party/blink/renderer/core/dom/document.cc
  34. @@ -257,6 +257,7 @@
  35. #include "third_party/blink/renderer/core/page/scrolling/root_scroller_controller.h"
  36. #include "third_party/blink/renderer/core/page/scrolling/scroll_state_callback.h"
  37. #include "third_party/blink/renderer/core/page/scrolling/scrolling_coordinator.h"
  38. +#include "extensions/video_bg_play.h"
  39. #include "third_party/blink/renderer/core/page/scrolling/snap_coordinator.h"
  40. #include "third_party/blink/renderer/core/page/scrolling/text_fragment_anchor.h"
  41. #include "third_party/blink/renderer/core/page/scrolling/top_document_root_scroller_controller.h"
  42. @@ -321,6 +322,8 @@
  43. #include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"
  44. #include "third_party/blink/renderer/platform/wtf/text/text_encoding_registry.h"
  45. +#include "extensions/anti_amp_cure.h"
  46. +
  47. #ifndef NDEBUG
  48. using WeakDocumentSet = blink::HeapHashSet<blink::WeakMember<blink::Document>>;
  49. static WeakDocumentSet& liveDocumentSet();
  50. @@ -6770,8 +6773,63 @@ void Document::FinishedParsing() {
  51. // Parser should have picked up all preloads by now
  52. fetcher_->ClearPreloads(ResourceFetcher::kClearSpeculativeMarkupPreloads);
  53. - if (IsPrefetchOnly())
  54. + if (IsPrefetchOnly()) {
  55. WebPrerenderingSupport::Current()->PrefetchFinished();
  56. + return;
  57. + }
  58. +
  59. + // determine whether this is a search results page
  60. + const WTF::String& host = url_.Host();
  61. + if ((host == nullptr) || host.IsEmpty())
  62. + return;
  63. +
  64. + auto* bodyElement = body();
  65. + if (!bodyElement)
  66. + return;
  67. + int selected = 0;
  68. + size_t pos1 = host.Find("www.google."), pos2 = host.Find("news.google."), pos3 = url_.GetPath().Find("/search"), pos4 = host.Find("images.google.");
  69. + if (((pos1 == 0) && (pos3 == 0)) || (pos2 == 0) || (pos4 == 0)) {
  70. + LOG(INFO) << "injecting AMP removal Javascript payload, URL: " << url_.GetString();
  71. + selected = 1;
  72. + // check for eligibility of the video bg fix
  73. + } else if ((WTF::kNotFound != host.Find("youtube.com")) || (WTF::kNotFound != host.Find("vimeo.com"))) {
  74. + LOG(INFO) << "injecting video-bg-play Javascript payload, URL: " << url_.GetString();
  75. + selected = 2;
  76. + } else
  77. + return;
  78. +
  79. + // find out which nonce to use
  80. + const AtomicString& nonce = findFirstScriptNonce();
  81. +
  82. + HTMLScriptElement* e = MakeGarbageCollected<HTMLScriptElement>(*this, CreateElementFlags());
  83. + if (selected == 1)
  84. + e->setTextDirect(ANTI_AMP_CURE_JS);
  85. + else if (selected == 2)
  86. + e->setTextDirect(VIDEO_BG_PLAY_JS);
  87. + else
  88. + NOTREACHED();
  89. +
  90. + if (nonce != g_null_atom)
  91. + e->setNonce(nonce);
  92. + else
  93. + LOG(WARNING) << "could not find script nonce to use";
  94. +
  95. + bodyElement->AppendChild(e);
  96. +}
  97. +
  98. +const AtomicString& Document::findFirstScriptNonce() {
  99. + HTMLCollection* s = scripts();
  100. + unsigned source_length = (unsigned)s->length();
  101. + // all scripts are likely to have the nonce, thus scan only first 10
  102. + if (source_length > 10)
  103. + source_length = 10;
  104. + for (unsigned i = 0; i < source_length; ++i) {
  105. + Element* element = s->item(i);
  106. + const AtomicString& nonce = element->nonce();
  107. + if ((nonce != g_null_atom) && !nonce.IsEmpty())
  108. + return nonce;
  109. + }
  110. + return g_null_atom;
  111. }
  112. void Document::ElementDataCacheClearTimerFired(TimerBase*) {
  113. diff --git a/third_party/blink/renderer/core/dom/document.h b/third_party/blink/renderer/core/dom/document.h
  114. --- a/third_party/blink/renderer/core/dom/document.h
  115. +++ b/third_party/blink/renderer/core/dom/document.h
  116. @@ -1690,6 +1690,8 @@ class CORE_EXPORT Document : public ContainerNode,
  117. void AddAXContext(AXContext*);
  118. void RemoveAXContext(AXContext*);
  119. + const AtomicString& findFirstScriptNonce();
  120. +
  121. bool IsDocumentFragment() const =
  122. delete; // This will catch anyone doing an unnecessary check.
  123. bool IsDocumentNode() const =
  124. diff --git a/third_party/blink/renderer/core/dom/extensions/anti_amp_cure.h b/third_party/blink/renderer/core/dom/extensions/anti_amp_cure.h
  125. new file mode 100644
  126. --- /dev/null
  127. +++ b/third_party/blink/renderer/core/dom/extensions/anti_amp_cure.h
  128. @@ -0,0 +1,6 @@
  129. +#ifndef anti_amp_cure_h
  130. +#define anti_amp_cure_h
  131. +
  132. +#define ANTI_AMP_CURE_JS "/* Array of bytes to base64 string decoding */\n/* */\nfunction b64ToUint6(nChr) {\n return nChr > 64 && nChr < 91 ?\n nChr - 65 :\n nChr > 96 && nChr < 123 ?\n nChr - 71 :\n nChr > 47 && nChr < 58 ?\n nChr + 4 :\n nChr === 43 ?\n 62 :\n nChr === 47 ?\n 63 :\n 0;\n}\n\n/* returns an Uint8Array with decoded bytes */\n/* from https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#Appendix.3A_Decode_a_Base64_string_to_Uint8Array_or_ArrayBuffer */\nfunction base64DecToArr(sBase64, nBlockSize) {\n var\n // URL encoding variant\n sB64Enc = sBase64.replace('-', '+').replace('_', '/'),\n nInLen = sB64Enc.length,\n nOutLen = nBlockSize ? Math.ceil((nInLen * 3 + 1 >>> 2) / nBlockSize) * nBlockSize : nInLen * 3 + 1 >>> 2,\n aBytes = new Uint8Array(nOutLen);\n\n for (var nMod3, nMod4, nUint24 = 0, nOutIdx = 0, nInIdx = 0; nInIdx < nInLen; nInIdx++) {\n nMod4 = nInIdx & 3;\n nUint24 |= b64ToUint6(sB64Enc.charCodeAt(nInIdx)) << 18 - 6 * nMod4;\n if (nMod4 === 3 || nInLen - nInIdx === 1) {\n for (nMod3 = 0; nMod3 < 3 && nOutIdx < nOutLen; nMod3++, nOutIdx++) {\n aBytes[nOutIdx] = nUint24 >>> (16 >>> nMod3 & 24) & 255;\n }\n nUint24 = 0;\n }\n }\n\n return aBytes;\n}\n\nfunction replaceHyperlink(a, url) {\n // create new A element - old one has event listeners attached\n var newA = document.createElement('a');\n newA.referrerPolicy = 'no-referrer';\n // property set when hyperlink has been created by this script\n newA.sane = true;\n newA.href = url;\n // copy CSS classes - news have only one\n newA.className = a.className;\n // scan child nodes for SVGs\n // news nodes will only have a text node\n a.childNodes.forEach(function(n) {\n // remove icon from image result buttons\n if (n.nodeName == 'DIV') {\n var svgs = n.querySelectorAll('div svg');\n if (svgs.length == 2)\n n.removeChild(svgs[1]);\n }\n });\n // use direct HTML as appending nodes skips some e.g. heading\n newA.innerHTML = a.innerHTML;\n // replace hyperlink\n a.parentNode.replaceChild(newA, a);\n return newA;\n}\n\nfunction getURLFromJsData(jsdata) {\n if (!jsdata) return;\n\n var res = jsdata.split(';');\n if (res.length < 3) return;\n // decode the payload\n var data = base64DecToArr(res[1]);\n // 08 = field 1, type Variant\n if (data[0] != 0x08)\n return;\n // 13 = 19 (raw) or -10 (zigzag)\n if (data[1] != 0x13) {\n console.warn('could not decode:', res[1]);\n return;\n }\n // 22 = field 4, type String\n if (data[2] != 0x22)\n return;\n // usually 2 bytes varint e.g. 88-01 = length 136\n res = proto_read_uint32(data, 3);\n // extract slice with the URL\n return new TextDecoder().decode(data.slice(res.pos, res.pos + res.value));\n}\n\nfunction recreateNewsHyperlink(a) {\n var article = a.parentNode;\n if (!article) return false;\n // grab jsdata from parent\n var url = getURLFromJsData(article.getAttribute('jsdata'));\n // use the AMP (but external) URL as fallback\n var obfuscated = false;\n if (!url) {\n url = a.href;\n obfuscated = true;\n }\n\n // ready to replace the hyperlink\n var newA = replaceHyperlink(a, url);\n if (obfuscated)\n newA.setAttribute(\"_target\", \"blank\");\n\n // replace headline hyperlink\n var h4a = article.querySelector('h4 a');\n if (h4a) {\n var newH4a = replaceHyperlink(h4a, url);\n if (obfuscated)\n newH4a.setAttribute(\"_target\", \"blank\");\n }\n\n // remove icon by finding the time sibling\n var t = article.querySelector('time');\n if (t) {\n var found = false;\n t.parentNode.childNodes.forEach(function(n) {\n if (found) return;\n if (n.innerText == \"amp\") {\n t.parentNode.removeChild(n);\n found = true;\n }\n });\n }\n\n // cleanup of the article\n article.removeAttribute('jsmodel');\n article.removeAttribute('jsaction');\n article.removeAttribute('jscontroller');\n article.removeAttribute('jsdata');\n\n return true;\n}\n\n// from protobufjs\nfunction proto_read_uint32(buf, pos) {\n var value = (buf[pos] & 127) >>> 0;\n if (buf[pos++] < 128) return {\n value: value,\n pos: pos\n };\n value = (value | (buf[pos] & 127) << 7) >>> 0;\n if (buf[pos++] < 128) return {\n value: value,\n pos: pos\n };\n value = (value | (buf[pos] & 127) << 14) >>> 0;\n if (buf[pos++] < 128) return {\n value: value,\n pos: pos\n };\n value = (value | (buf[pos] & 127) << 21) >>> 0;\n if (buf[pos++] < 128) return {\n value: value,\n pos: pos\n };\n value = (value | (buf[pos] & 15) << 28) >>> 0;\n if (buf[pos++] < 128) return {\n value: value,\n pos: pos\n };\n\n if ((pos += 5) > buf.length)\n throw RangeError('cannot read string length');\n\n return {\n value: value,\n pos: pos\n };\n}\n\nfunction recreateResultHyperlink(a) {\n var url = a.href;\n // remove AMP class, get actual page URL\n var ampCur = a.getAttribute('data-amp-cur');\n if (ampCur) {\n url = ampCur;\n a.classList.remove('amp_r');\n } else {\n var realLink = getRealLinkFromGoogleUrl(a);\n if (realLink) {\n url = realLink;\n } else {\n // might not be an actual hyperlink, ignore it\n if (!a.href) {\n return false;\n }\n // leave original href unchanged\n }\n }\n\n // re-create with original CSS classes\n replaceHyperlink(a, url);\n\n return true;\n}\n\nfunction isResult(a) {\n if (a.getAttribute('data-amp-cur'))\n return true;\n var inlineMousedown = a.getAttribute('onmousedown');\n if (!inlineMousedown)\n return false;\n // return rwt(....); // E.g Google search results.\n // return google.rwt(...); // E.g. sponsored search results\n // return google.arwt(this); // E.g. sponsored search results (dec 2016).\n return /\\ba?rwt\\(/.test(inlineMousedown) || /\\bctpacw\\b/.test(inlineMousedown);\n}\n\n/**\n * @returns {String} the real URL if the given link is a Google redirect URL.\n */\nfunction getRealLinkFromGoogleUrl(a) {\n if ((a.hostname === location.hostname || a.hostname.indexOf('www.google.') == 0) &&\n /^\\/(local_)?url$/.test(a.pathname)) {\n // Google Maps / Dito (/local_url?q=<url>)\n // Mobile (/url?q=<url>)\n var url = /[?&](?:q|url)=((?:https?|ftp)[%:][^&]+)/.exec(a.search);\n if (url)\n return decodeURIComponent(url[1]);\n // Help pages, e.g. safe browsing (/url?...&q=%2Fsupport%2Fanswer...)\n url = /[?&](?:q|url)=((?:%2[Ff]|\\/)[^&]+)/.exec(a.search);\n if (url)\n return a.origin + decodeURIComponent(url[1]);\n }\n}\n\nfunction sanitizeAds() {\n // scan all divs\n var div = document.getElementById('tads');\n if (div) {\n div.style.display = 'none';\n return true;\n }\n return false;\n}\n\nfunction hookMoreSearchResults() {\n var extrares = document.getElementById('extrares');\n if (!extrares) {\n console.warn(\"could not hook more results\");\n return;\n }\n // mutation observers are great but they don't work\n extrares.addEventListener(\"DOMNodeInserted\", function(e) {\n var node = e.target;\n if (node.id && node.id.startsWith(\"arc-srp\"))\n sanitizeResultHyperlinks(node);\n });\n}\n\nfunction setMlogoClick() {\n // skip home page\n if (document.getElementById('hplogo')) return;\n\n var mlogo = document.getElementById('qslc');\n if (mlogo && mlogo.children[0]) {\n mlogo = mlogo.children[0];\n } else {\n mlogo = document.getElementById('mlogo');\n }\n if (mlogo) {\n mlogo.removeAttribute(\"href\");\n mlogo.setAttribute(\"onclick\", \"sanitizeAll()\");\n console.log(\"logo link replaced\");\n } else {\n console.warn(\"could not replace logo link\");\n }\n}\n\nfunction sanitizeResultHyperlinks(rootNode) {\n var sanitized = 0,\n total = 0;\n // exclude translation hyperlink nodes\n const exclude = rootNode.querySelectorAll('#tw-ob a');\n // selector for results (doesn't work with news anymore)\n rootNode.querySelectorAll('div[data-hveid]:not([data-hveid=\"\"]) a, div[data-ved]:not([data-ved=\"\"]) a').forEach(function(a) {\n // exclude nodes which should not be processed\n var excluded = false;\n exclude.forEach(function(e) {\n if (excluded) return;\n if (e == a) {\n excluded = true;\n }\n });\n if (excluded) return;\n\n total++;\n if (!a.sane && recreateResultHyperlink(a))\n sanitized++;\n });\n console.log(\"sanitized \", sanitized, \"/\", total, \" result hyperlinks\");\n}\n\nfunction sanitizeAllNews() {\n var sanitized = 0,\n total = 0;\n // pick all articles which have the associated data not yet wiped\n document.querySelectorAll('article[jsdata]:not([jsdata=\"\"]) a').forEach(function(a) {\n total++;\n if (!a.sane && recreateNewsHyperlink(a))\n sanitized++;\n });\n console.log(\"sanitized \", sanitized, \"/\", total, \" news hyperlinks\");\n}\n\nfunction hookMoreNews() {\n document.addEventListener(\"DOMNodeInserted\", function(e) {\n var node = e.target;\n // the real inserted node is 'C-WIZ', but we need to wait for loading to complete\n if (node.nodeName == '#text' && node.parentNode && node.parentNode.nodeName == 'TITLE')\n // title is updated when page loading completes, thus trigger hyperlinks fixing afterwards\n sanitizeAllNews();\n });\n}\n\nfunction hookMoreImageResults() {\n document.addEventListener(\"DOMNodeInserted\", function(e) {\n var node = e.target;\n // remove card and iframe, fix hyperlink\n if (node.nodeName == \"DIV\" && node.hasAttribute(\"data-query\"))\n sanitizeResultHyperlinks(node);\n else if (node.nodeName == \"IFRAME\")\n node.parentNode.removeChild(node);\n else if (node.nodeName == \"C-WIZ\")\n // replace instead of removing so that correlated image results will be displayed\n node.parentNode.replaceChild(node, document.createTextNode(\"iframe replaced\"));\n });\n}\n\nconsole.log('Bromite click-tracking and AMP removal v0.4.2');\n\nif (document.location.host.indexOf(\"news.google.\") === 0) {\n sanitizeAllNews();\n hookMoreNews();\n} else {\n // avoid running cleanup on non-result pages\n if (document.location.host.indexOf(\"accounts.google.\") == -1) {\n console.log(\"ads removed: \", sanitizeAds());\n\n if (document.location.search.match(/[?&]tbm=isch/)) {\n // find main c-wiz\n var cwizs = document.querySelectorAll('c-wiz[data-ssc=\"0\"]');\n if (cwizs.length)\n sanitizeResultHyperlinks(cwizs[0]);\n else\n console.warning('could not find main image results');\n // image search results\n hookMoreImageResults();\n } else {\n var main = document.getElementById('main');\n if (main)\n sanitizeResultHyperlinks(main);\n else\n console.warning('could not find main search results');\n // regular search results\n setMlogoClick();\n hookMoreSearchResults();\n }\n }\n}\n"
  133. +
  134. +#endif // anti_amp_cure_h
  135. diff --git a/third_party/blink/renderer/core/dom/extensions/video_bg_play.h b/third_party/blink/renderer/core/dom/extensions/video_bg_play.h
  136. new file mode 100644
  137. --- /dev/null
  138. +++ b/third_party/blink/renderer/core/dom/extensions/video_bg_play.h
  139. @@ -0,0 +1,6 @@
  140. +#ifndef video_bg_play_h
  141. +#define video_bg_play_h
  142. +
  143. +#define VIDEO_BG_PLAY_JS "'use strict';\n\nconst IS_YOUTUBE = window.location.hostname.search(/(?:^|.+\\.)youtube.com/) > -1 ||\n window.location.hostname.search(/(?:^|.+\\.)youtube-nocookie.com/) > -1;\nconst IS_MOBILE_YOUTUBE = window.location.hostname == 'm.youtube.com';\nconst IS_VIMEO = window.location.hostname.search(/(?:^|.+\\.)vimeo.com/) > -1;\n\n/* video background play fix - based on https://github.com/mozilla/video-bg-play */\ndocument.wrappedJSObject = {};\n\n// Page Visibility API\nObject.defineProperties(document.wrappedJSObject,\n { 'hidden': {value: false}, 'visibilityState': {value: 'visible'} });\n\nwindow.addEventListener(\n 'visibilitychange', evt => evt.stopImmediatePropagation(), true);\n\n// Fullscreen API\nif (IS_VIMEO) {\n window.addEventListener(\n 'fullscreenchange', evt => evt.stopImmediatePropagation(), true);\n}\n\n// User activity tracking\nif (IS_YOUTUBE) {\n const refreshInterval = 2 + 3 * 60 * 1000; // every 3 minutes\n waitForYoutubeLactInit(() => refreshLact(), refreshInterval);\n}\n\nfunction waitForYoutubeLactInit(aCallback, aCallbackInterval, aDelay) {\n let pageWin = window.wrappedJSObject;\n if (pageWin.hasOwnProperty('_lact')) {\n window.setInterval(aCallback, aCallbackInterval);\n } else {\n window.setTimeout(() => waitForYoutubeLactInit(aCallback,\n aCallbackInterval,\n aDelay * 2),\n aDelay);\n }\n}\n\nfunction refreshLact() {\n window.wrappedJSObject._lact = Date.now();\n}\n"
  144. +
  145. +#endif // video_bg_play_h
  146. diff --git a/third_party/blink/renderer/core/html/html_script_element.cc b/third_party/blink/renderer/core/html/html_script_element.cc
  147. --- a/third_party/blink/renderer/core/html/html_script_element.cc
  148. +++ b/third_party/blink/renderer/core/html/html_script_element.cc
  149. @@ -148,6 +148,11 @@ void HTMLScriptElement::setTextContent(
  150. }
  151. }
  152. +void HTMLScriptElement::setTextDirect(
  153. + const char *s) {
  154. + Node::setTextContent(s);
  155. +}
  156. +
  157. void HTMLScriptElement::setAsync(bool async) {
  158. SetBooleanAttribute(html_names::kAsyncAttr, async);
  159. loader_->HandleAsyncAttribute();
  160. diff --git a/third_party/blink/renderer/core/html/html_script_element.h b/third_party/blink/renderer/core/html/html_script_element.h
  161. --- a/third_party/blink/renderer/core/html/html_script_element.h
  162. +++ b/third_party/blink/renderer/core/html/html_script_element.h
  163. @@ -51,6 +51,7 @@ class CORE_EXPORT HTMLScriptElement final : public HTMLElement,
  164. void setText(const StringOrTrustedScript&, ExceptionState&);
  165. void setInnerText(const StringOrTrustedScript&, ExceptionState&) override;
  166. void setTextContent(const StringOrTrustedScript&, ExceptionState&) override;
  167. + void setTextDirect(const char*);
  168. void setAsync(bool);
  169. bool async() const;
  170. --
  171. 2.17.1