Browse Source

Updated URL regexes to match more unescaped special characters

n1474335 8 years ago
parent
commit
a19b02aa8c
2 changed files with 3 additions and 3 deletions
  1. 2 2
      src/core/operations/Extract.js
  2. 1 1
      src/core/operations/StrUtils.js

+ 2 - 2
src/core/operations/Extract.js

@@ -170,9 +170,9 @@ const Extract = {
             protocol = "[A-Z]+://",
             protocol = "[A-Z]+://",
             hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
             hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
             port = ":\\d+",
             port = ":\\d+",
-            path = "/[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]*";
+            path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*";
 
 
-        path += "(?:[.!,?]+[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]+)*";
+        path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*";
         const regex = new RegExp(protocol + hostname + "(?:" + port +
         const regex = new RegExp(protocol + hostname + "(?:" + port +
             ")?(?:" + path + ")?", "ig");
             ")?(?:" + path + ")?", "ig");
         return Extract._search(input, regex, null, displayTotal);
         return Extract._search(input, regex, null, displayTotal);

+ 1 - 1
src/core/operations/StrUtils.js

@@ -36,7 +36,7 @@ const StrUtils = {
         },
         },
         {
         {
             name: "URL",
             name: "URL",
-            value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
+            value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"\<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"\<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
         },
         },
         {
         {
             name: "Domain",
             name: "Domain",