Browse Source

Fixed incomplete multi-character sanitization and incomplete URL substring sanitization issues.

n1474335 4 years ago
parent
commit
170e564319
2 changed files with 34 additions and 13 deletions
  1. 14 1
      src/core/Utils.mjs
  2. 20 12
      src/web/HTMLOperation.mjs

+ 14 - 1
src/core/Utils.mjs

@@ -704,8 +704,21 @@ class Utils {
      * Utils.stripHtmlTags("<div>Test</div>");
      */
     static stripHtmlTags(htmlStr, removeScriptAndStyle=false) {
+        /**
+         * Recursively remove a pattern from a string until there are no more matches.
+         * Avoids incomplete sanitization e.g. "aabcbc".replace(/abc/g, "") === "abc"
+         *
+         * @param {RegExp} pattern
+         * @param {string} str
+         * @returns {string}
+         */
+        function recursiveRemove(pattern, str) {
+            const newStr = str.replace(pattern, "");
+            return newStr.length === str.length ? newStr : recursiveRemove(pattern, newStr);
+        }
+
         if (removeScriptAndStyle) {
-            htmlStr = htmlStr.replace(/<(script|style)[^>]*>.*?<\/(script|style)>/gi, "");
+            htmlStr = recursiveRemove(/<(script|style)[^>]*>.*?<\/(script|style)>/gi, htmlStr);
         }
         return htmlStr.replace(/<[^>]+>/g, "");
     }

+ 20 - 12
src/web/HTMLOperation.mjs

@@ -6,6 +6,7 @@
 
 import HTMLIngredient from "./HTMLIngredient.mjs";
 import Utils from "../core/Utils.mjs";
+import url from "url";
 
 
 /**
@@ -147,22 +148,29 @@ class HTMLOperation {
 /**
  * Given a URL for a Wikipedia (or other wiki) page, this function returns a link to that page.
  *
- * @param {string} url
+ * @param {string} urlStr
  * @returns {string}
  */
-function titleFromWikiLink(url) {
-    const splitURL = url.split("/");
-    if (!splitURL.includes("wikipedia.org") && !splitURL.includes("forensicswiki.xyz")) {
-        // Not a wiki link, return full URL
-        return `<a href='${url}' target='_blank'>More Information<i class='material-icons inline-icon'>open_in_new</i></a>`;
+function titleFromWikiLink(urlStr) {
+    const urlObj = url.parse(urlStr);
+    let wikiName = "",
+        pageTitle = "";
+
+    switch (urlObj.host) {
+        case "forensicswiki.xyz":
+            wikiName = "Forensics Wiki";
+            pageTitle = urlObj.query.substr(6).replace(/_/g, " "); // Chop off 'title='
+            break;
+        case "wikipedia.org":
+            wikiName = "Wikipedia";
+            pageTitle = urlObj.pathname.substr(6).replace(/_/g, " "); // Chop off '/wiki/'
+            break;
+        default:
+            // Not a wiki link, return full URL
+            return `<a href='${urlStr}' target='_blank'>More Information<i class='material-icons inline-icon'>open_in_new</i></a>`;
     }
 
-    const wikiName = splitURL.includes("forensicswiki.xyz") ? "Forensics Wiki" : "Wikipedia";
-
-    const pageTitle = decodeURIComponent(splitURL[splitURL.length - 1])
-        .replace(/_/g, " ")
-        .replace(/index\.php\?title=/g, "");
-    return `<a href='${url}' target='_blank'>${pageTitle}<i class='material-icons inline-icon'>open_in_new</i></a> on ${wikiName}`;
+    return `<a href='${urlObj.href}' target='_blank'>${pageTitle}<i class='material-icons inline-icon'>open_in_new</i></a> on ${wikiName}`;
 }
 
 export default HTMLOperation;