Browse Source

Improved domain name regex

n1474335 7 years ago
parent
commit
73823e3eb9

+ 0 - 1
.travis.yml

@@ -4,7 +4,6 @@ node_js:
 install: npm install
 before_script:
   - npm install -g grunt
-  - if [ "$TRAVIS_TAG" ]; then git checkout -b travis-build; fi
 script:
   - grunt lint
   - grunt test

+ 1 - 1
src/core/config/OperationConfig.js

@@ -2140,7 +2140,7 @@ const OperationConfig = {
         ]
     },
     "Extract domains": {
-        description: "Extracts domain names with common Top-Level Domains (TLDs).<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
+        description: "Extracts domain names.<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
         run: Extract.runDomains,
         inputType: "string",
         outputType: "string",

+ 2 - 5
src/core/operations/Extract.js

@@ -187,11 +187,8 @@ const Extract = {
      * @returns {string}
      */
     runDomains: function(input, args) {
-        let displayTotal = args[0],
-            protocol = "https?://",
-            hostname = "[-\\w\\.]+",
-            tld = "\\.(?:com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+",
-            regex = new RegExp("(?:" + protocol + ")?" + hostname + tld, "ig");
+        const displayTotal = args[0],
+            regex = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig;
 
         return Extract._search(input, regex, null, displayTotal);
     },

+ 1 - 1
src/core/operations/StrUtils.js

@@ -40,7 +40,7 @@ const StrUtils = {
         },
         {
             name: "Domain",
-            value: "(?:(https?):\\/\\/)?([-\\w.]+)\\.(com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+"
+            value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b"
         },
         {
             name: "Windows file path",