Browse Source

New rule "Identical content" (#181)

Gaël Métais 9 years ago
parent
commit
6da6377bb5

+ 11 - 0
front/src/views/rule.html

@@ -335,6 +335,17 @@
         </div>
     </div>
 
+    <div ng-if="policyName === 'identicalFiles'">
+        <div ng-repeat="offender in rule.offendersObj.list track by $index">
+            <h4>A file of {{offender.weight | bytes}} is loaded {{offender.urls.length}} times:</h4>
+            <div class="offendersTable">
+                <div ng-repeat="url in offender.urls">
+                    <div><url-link url="url" max-length="100"></url-link></div>
+                </div>
+            </div>
+        </div>
+    </div>
+
     <div ng-if="policyName === 'smallRequests'">
         <div ng-repeat="(type, requests) in rule.offendersObj.list.byType">
             <h3><ng-pluralize count="requests.length" when="{'0': 'small ' + type + ' file', 'one': '1 small ' + type + ' file', 'other': '{} small ' + type + ' files'}"></ng-pluralize></h3>

+ 10 - 7
lib/metadata/policies.js

@@ -994,14 +994,17 @@ var policies = {
         "isAbnormalThreshold": 20,
         "hasOffenders": true
     },
-    "multipleRequests": {
-        "tool": "phantomas",
-        "label": "Duplicated requests",
-        "message": "<p>This only happens when the asset has no cache and is requested more than once on the same page. Be very careful about it.</p>",
+    "identicalFiles": {
+        "tool": "redownload",
+        "label": "Identical content",
+        "message": "<p>This is the number of requests that could be avoided, because of downloaded files that have the same content but are loaded from different URLs.</p><p>Try to load them from the same URL.</p>",
         "isOkThreshold": 0,
-        "isBadThreshold": 3,
-        "isAbnormalThreshold": 10,
-        "hasOffenders": true
+        "isBadThreshold": 5,
+        "isAbnormalThreshold": 15,
+        "hasOffenders": true,
+        "offendersTransformFn": function(offenders) {
+            return offenders;
+        }
     },
     "emptyRequests": {
         "tool": "redownload",

+ 1 - 1
lib/metadata/scoreProfileGeneric.json

@@ -15,7 +15,7 @@
                 "totalRequests": 5,
                 "domains": 3,
                 "notFound": 3,
-                "multipleRequests": 2,
+                "identicalFiles": 2,
                 "emptyRequests": 3,
                 "smallRequests": 1,
                 "lazyLoadableImagesBelowTheFold": 2,

+ 47 - 2
lib/tools/redownload/redownload.js

@@ -11,6 +11,7 @@ var http                = require('http');
 var zlib                = require('zlib');
 var async               = require('async');
 var request             = require('request');
+var md5                 = require('md5');
 
 var imageOptimizer      = require('./imageOptimizer');
 var fileMinifier        = require('./fileMinifier');
@@ -106,9 +107,9 @@ var Redownload = function() {
                 metrics.emptyRequests = offenders.emptyRequests.length;
 
 
-                // Now emove unwanted responses (redirections)
+                // Now remove unwanted responses (redirections and empty files)
                 results = results.filter(function(result) {
-                    return (result.status < 300 || result.status >= 400);
+                    return ((result.status < 300 || result.status >= 400) && result.weightCheck.bodySize > 0);
                 });
 
 
@@ -128,6 +129,11 @@ var Redownload = function() {
                 offenders.smallRequests = listSmallRequests(results);
                 metrics.smallRequests = offenders.smallRequests.total;
 
+                // Detect identical files
+                offenders.identicalFiles = listIdenticalFiles(results);
+                metrics.identicalFiles = offenders.identicalFiles.avoidableRequests;
+
+
                 data.toolsResults.redownload = {
                     metrics: metrics,
                     offenders: offenders
@@ -368,6 +374,45 @@ var Redownload = function() {
         return results;
     }
 
+    function listIdenticalFiles(requests) {
+        var hashes = {};
+        var list = [];
+        var avoidableRequestsCount = 0;
+
+        requests.forEach(function(req) {
+            var requestHash = md5(req.weightCheck.body);
+            
+            // Try to exclude tracking pixels
+            if (req.weightCheck.bodySize < 80 && req.type === 'image') {
+                return;
+            }
+
+            if (!hashes[requestHash]) {
+                hashes[requestHash] = {
+                    weight: req.weightCheck.bodySize,
+                    urls: []
+                };
+            }
+            
+            if (hashes[requestHash].urls.indexOf(req.url) === -1) {
+                hashes[requestHash].urls.push(req.url);
+            }
+        });
+
+        for (var hash in hashes) {
+            if (hashes[hash].urls.length > 1) {
+                list.push(hashes[hash]);
+                avoidableRequestsCount += hashes[hash].urls.length - 1;
+            }
+        }
+
+        return {
+            avoidableRequests: avoidableRequestsCount,
+            count: list.length,
+            list: list
+        };
+    }
+
 
     function redownloadEntry(entry, httpAuth) {
         var deferred = Q.defer();

+ 1 - 0
package.json

@@ -54,6 +54,7 @@
     "is-woff": "1.0.3",
     "is-woff2": "1.0.0",
     "lwip": "0.0.9",
+    "md5": "2.1.0",
     "meow": "3.7.0",
     "minimize": "2.0.0",
     "parse-color": "1.0.0",