Pārlūkot izejas kodu

Improve content type detection (#179)

Gaël Métais 9 gadi atpakaļ
vecāks
revīzija
fe8466b0e3

+ 188 - 0
lib/tools/weightChecker/contentTypeChecker.js

@@ -0,0 +1,188 @@
+var debug   = require('debug')('ylt:contentTypeChecker');
+var Q       = require('q');
+var isJpg   = require('is-jpg');
+var isPng   = require('is-png');
+var isSvg   = require('is-svg');
+var isGif   = require('is-gif');
+var isWoff  = require('is-woff');
+var isWoff2 = require('is-woff2');
+var isOtf   = require('is-otf');
+var isTtf   = require('is-ttf');
+var isEot   = require('is-eot');
+
+var ContentTypeChecker = function() {
+
+    function checkContentType(entry) {
+        var deferred = Q.defer();
+
+        debug('Entering contentTypeChecker');
+        
+        // Ignore very small files as they are generally tracking pixels
+        if (entry.weightCheck && entry.weightCheck.body && entry.weightCheck.bodySize > 100) {
+            var foundType;
+
+            try {
+                foundType = findContentType(entry.weightCheck.body);
+            
+                if (!entry.contentType || entry.contentType === '') {
+                    if (foundType === null) {
+                        debug('ContentType is empty for file %s', entry.url);
+                    } else {
+                        debug('ContentType is empty for file %s. It should be %s.', entry.url, foundType.mimes[0]);
+                        entry.oldContentType = null;
+                        rewriteContentType(entry, foundType);
+                    }
+                } else {
+                    if (foundType !== null && foundType.mimes.indexOf(entry.contentType) === -1) {
+                        debug('ContentType %s is wrong for %s. It should be %s.', entry.contentType, entry.url, foundType.mimes[0]);
+                        entry.oldContentType = entry.contentType;
+                        rewriteContentType(entry, foundType);
+                    }
+                }
+
+            } catch(err) {
+                debug('Error while analyzing the contentType of %s', entry.url);
+                debug(err);
+            }
+        }
+
+        deferred.resolve(entry);
+
+        return deferred.promise;
+    }
+
+    function findContentType(body) {
+        var buffer = new Buffer(body, 'binary');
+
+        if (isJpg(buffer)) {
+            return contentTypes.jpeg;
+        }
+
+        if (isPng(buffer)) {
+            return contentTypes.png;
+        }
+
+        // https://github.com/sindresorhus/is-svg/issues/7
+        if (/<svg/.test(body) && isSvg(body)) {
+            return contentTypes.svg;
+        }
+
+        if (isGif(buffer)) {
+            return contentTypes.gif;
+        }
+
+        if (isWoff(buffer)) {
+            return contentTypes.woff;
+        }
+
+        if (isWoff2(buffer)) {
+            return contentTypes.woff2;
+        }
+
+        if (isOtf(buffer)) {
+            return contentTypes.otf;
+        }
+
+        if (isTtf(buffer)) {
+            return contentTypes.ttf;
+        }
+
+        if (isEot(buffer)) {
+            return contentTypes.eot;
+        }
+
+        return null;
+    }
+
+
+    function rewriteContentType(entry, contentTypeObj) {
+        delete(entry.isHTML);
+        delete(entry.isXML);
+        delete(entry.isCSS);
+        delete(entry.isJS);
+        delete(entry.isJSON);
+        delete(entry.isImage);
+        delete(entry.isSVG);
+        delete(entry.isVideo);
+        delete(entry.isWebFont);
+        delete(entry.isTTF);
+        delete(entry.isFavicon);
+
+        entry.contentType = contentTypeObj.mimes[0];
+        contentTypeObj.updateFn(entry);
+    }
+
+    var contentTypes = {
+        jpeg: {
+            mimes: ['image/jpeg'],
+            updateFn: function(entry) {
+                entry.type = 'image';
+                entry.isImage = true;
+            }
+        },
+        png: {
+            mimes: ['image/png'],
+            updateFn: function(entry) {
+                entry.type = 'image';
+                entry.isImage = true;
+            }
+        },
+        svg: {
+            mimes: ['image/svg+xml'],
+            updateFn: function(entry) {
+                entry.type = 'image';
+                entry.isImage = true;
+                entry.isSVG = true;
+            }
+        },
+        gif: {
+            mimes: ['image/gif'],
+            updateFn: function(entry) {
+                entry.type = 'image';
+                entry.isImage = true;
+            }
+        },
+        woff: {
+            mimes: ['application/x-font-woff', 'application/font-woff', 'font/woff'],
+            updateFn: function(entry) {
+                entry.type = 'webfont';
+                entry.isWebFont = true;
+            }
+        },
+        woff2: {
+            mimes: ['font/woff2', 'application/x-font-woff2', 'application/font-woff2'],
+            updateFn: function(entry) {
+                entry.type = 'webfont';
+                entry.isWebFont = true;
+            }
+        },
+        otf: {
+            mimes: ['application/x-font-otf', 'font/otf', 'font/opentype', 'application/x-font-opentype'],
+            updateFn: function(entry) {
+                entry.type = 'webfont';
+                entry.isWebFont = true;
+            }
+        },
+        ttf: {
+            mimes: ['application/x-font-ttf', 'font/ttf', 'application/x-font-truetype'],
+            updateFn: function(entry) {
+                entry.type = 'webfont';
+                entry.isWebFont = true;
+            }
+        },
+        eot: {
+            mimes: ['application/vnd.ms-fontobject', 'font/eot'],
+            updateFn: function(entry) {
+                entry.type = 'webfont';
+                entry.isWebFont = true;
+            }
+        }
+    };
+    
+    return {
+        checkContentType: checkContentType,
+        findContentType: findContentType
+    };
+};
+
+module.exports = new ContentTypeChecker();

+ 31 - 9
lib/tools/weightChecker/weightChecker.js

@@ -5,16 +5,17 @@
  */
 
 
-var debug           = require('debug')('ylt:weightChecker');
-var Q               = require('q');
-var http            = require('http');
-var zlib            = require('zlib');
-var async           = require('async');
-var request         = require('request');
+var debug               = require('debug')('ylt:weightChecker');
+var Q                   = require('q');
+var http                = require('http');
+var zlib                = require('zlib');
+var async               = require('async');
+var request             = require('request');
 
-var imageOptimizer  = require('./imageOptimizer');
-var fileMinifier    = require('./fileMinifier');
-var gzipCompressor  = require('./gzipCompressor');
+var imageOptimizer      = require('./imageOptimizer');
+var fileMinifier        = require('./fileMinifier');
+var gzipCompressor      = require('./gzipCompressor');
+var contentTypeChecker  = require('./contentTypeChecker');
 
 
 var WeightChecker = function() {
@@ -47,6 +48,8 @@ var WeightChecker = function() {
                 
                 redownloadEntry(entry, httpAuth)
 
+                .then(contentTypeChecker.checkContentType)
+
                 .then(imageOptimizer.optimizeImage)
 
                 .then(fileMinifier.minifyFile)
@@ -89,6 +92,10 @@ var WeightChecker = function() {
                 });
 
 
+                // Wrong contentType
+                offenders.incorrectContentTypes = listIncorrectContentTypes(results);
+                metrics.incorrectContentTypes = offenders.incorrectContentTypes.length;
+
                 // Total weight
                 offenders.totalWeight = listRequestWeight(results);
                 metrics.totalWeight = offenders.totalWeight.totalWeight;
@@ -121,6 +128,21 @@ var WeightChecker = function() {
         return deferred.promise;
     }
 
+    function listIncorrectContentTypes(requests) {
+        var results = [];
+        
+        requests.forEach(function(req) {
+            if (req.oldContentType || req.oldContentType === null) {
+                results.push({
+                    url: req.url,
+                    current: req.oldContentType,
+                    correct: req.contentType
+                });
+            }
+        });
+
+        return results;
+    }
 
     function listRequestWeight(requests) {
         var results = {

+ 9 - 0
package.json

@@ -43,7 +43,16 @@
     "imagemin-jpegtran": "5.0.2",
     "imagemin-optipng": "5.1.0",
     "imagemin-svgo": "5.1.0",
+    "is-eot": "1.0.0",
+    "is-gif": "1.0.0",
     "is-http2": "1.0.4",
+    "is-jpg": "1.0.0",
+    "is-otf": "0.1.2",
+    "is-png": "1.0.0",
+    "is-svg": "2.0.1",
+    "is-ttf": "0.2.2",
+    "is-woff": "1.0.3",
+    "is-woff2": "1.0.0",
     "lwip": "0.0.9",
     "meow": "3.7.0",
     "minimize": "2.0.0",

+ 20 - 0
test/core/contentTypeCheckerTest.js

@@ -0,0 +1,20 @@
+var should = require('chai').should();
+var contentTypeChecker = require('../../lib/tools/weightChecker/contentTypeChecker');
+var fs = require('fs');
+var path = require('path');
+
+describe('contentTypeChecker', function() {
+
+    var jpgImageContent = fs.readFileSync(path.resolve(__dirname, '../www/jpeg-image.jpg'));
+    var pngImageContent = fs.readFileSync(path.resolve(__dirname, '../www/png-image.png'));
+    var svgImageContent = fs.readFileSync(path.resolve(__dirname, '../www/svg-image.svg'));
+    var cssFileContent = fs.readFileSync(path.resolve(__dirname, '../www/unminified-stylesheet.css'));
+    
+    it('detect the right content type', function() {
+        contentTypeChecker.findContentType(jpgImageContent).mimes.should.deep.equal(['image/jpeg']);
+        contentTypeChecker.findContentType(pngImageContent).mimes.should.deep.equal(['image/png']);
+        contentTypeChecker.findContentType(svgImageContent).mimes.should.deep.equal(['image/svg+xml']);
+        should.equal(contentTypeChecker.findContentType(cssFileContent), null);
+    });
+
+});