redownload.js 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029
  1. /*
  2. * Redownloading every files after Phantomas has finished
  3. * Checks weight and every kind of compression
  4. *
  5. */
  6. /*jshint -W069 */
  7. var debug = require('debug')('ylt:redownload');
  8. var Q = require('q');
  9. var http = require('http');
  10. var zlib = require('zlib');
  11. var async = require('async');
  12. var request = require('request');
  13. var md5 = require('md5');
  14. //var imageOptimizer = require('./imageOptimizer');
  15. var fileMinifier = require('./fileMinifier');
  16. var gzipCompressor = require('./gzipCompressor');
  17. var brotliCompressor = require('./brotliCompressor');
  18. var contentTypeChecker = require('./contentTypeChecker');
  19. var fontAnalyzer = require('./fontAnalyzer');
  20. //var imageDimensions = require('./imageDimensions');
  21. var Redownload = function() {
  22. var MAX_PARALLEL_DOWNLOADS = 5;
  23. var REQUEST_TIMEOUT = 30000; // 30 seconds
  24. // This function will re-download every asset and check if it could be optimized
  25. function recheckAllFiles(data) {
  26. var startTime = Date.now();
  27. debug('Redownload started');
  28. var deferred = Q.defer();
  29. var requestsOffenders = data.toolsResults.phantomas.offenders.requests;
  30. var gzipOffenders = data.toolsResults.phantomas.offenders.gzipRequests;
  31. var postOffenders = data.toolsResults.phantomas.offenders.postRequests;
  32. var notFoundOffenders = data.toolsResults.phantomas.offenders.notFound;
  33. var redirectOffenders = data.toolsResults.phantomas.offenders.redirects;
  34. var requestsList = mergeOffenders(requestsOffenders, gzipOffenders, postOffenders, notFoundOffenders, redirectOffenders);
  35. var totalCount = requestsList.length;
  36. var doneCount = 0;
  37. var httpAuth = null;
  38. if (data.params && data.params.options && data.params.options.authUser && data.params.options.authPass) {
  39. httpAuth = {
  40. username: data.params.options.authUser,
  41. password: data.params.options.authPass
  42. };
  43. }
  44. var proxy = null;
  45. if (data.params && data.params.options && data.params.options.proxy) {
  46. proxy = data.params.options.proxy;
  47. if (proxy.indexOf('http:') === -1) {
  48. proxy = 'http://' + proxy;
  49. }
  50. }
  51. var differentCharacters = '';
  52. if (data.toolsResults.phantomas.offenders.differentCharacters && data.toolsResults.phantomas.offenders.differentCharacters.length > 0) {
  53. differentCharacters = data.toolsResults.phantomas.offenders.differentCharacters[0];
  54. }
  55. // Transform every request into a download function with a callback when done
  56. var redownloadList = requestsList.map(function(entry) {
  57. return function(callback) {
  58. redownloadEntry(entry, httpAuth, proxy)
  59. .then(contentTypeChecker.checkContentType)
  60. //.then(imageOptimizer.optimizeImage)
  61. //.then(imageDimensions.getDimensions)
  62. .then(fileMinifier.minifyFile)
  63. .then(gzipCompressor.compressFile)
  64. .then(brotliCompressor.compressFile)
  65. .then(function(entry) {
  66. return fontAnalyzer.analyzeFont(entry, differentCharacters);
  67. })
  68. .then(function(newEntry) {
  69. debug('File %s - Redownloaded, optimized, minified, compressed, analyzed: done', entry.url);
  70. // For the progress bar
  71. doneCount ++;
  72. deferred.notify(doneCount/totalCount);
  73. callback(null, newEntry);
  74. })
  75. .fail(function(err) {
  76. callback(err);
  77. });
  78. };
  79. });
  80. // Lanch all redownload functions and wait for completion
  81. async.parallelLimit(redownloadList, MAX_PARALLEL_DOWNLOADS, function(err, results) {
  82. if (err) {
  83. debug(err);
  84. deferred.reject(err);
  85. } else {
  86. debug('All files checked');
  87. endTime = Date.now();
  88. debug('Redownload took %d ms', endTime - startTime);
  89. var metrics = {};
  90. var offenders = {};
  91. // Count requests
  92. offenders.totalRequests = listRequestsByType(results);
  93. metrics.totalRequests = offenders.totalRequests.total;
  94. // Remove unwanted responses (redownload failed, about:blank)
  95. results = results.filter(function(result) {
  96. return (result !== null && result.weightCheck && result.url.indexOf('about:blank') !== 0);
  97. });
  98. // Wrong contentType
  99. offenders.incorrectContentTypes = listIncorrectContentTypes(results);
  100. metrics.incorrectContentTypes = offenders.incorrectContentTypes.length;
  101. // Total weight
  102. offenders.totalWeight = listRequestWeight(results);
  103. metrics.totalWeight = offenders.totalWeight.totalWeight;
  104. // Empty files
  105. offenders.emptyRequests = listEmptyRequests(results);
  106. metrics.emptyRequests = offenders.emptyRequests.length;
  107. // Remove some more unwanted responses (redirections and empty files)
  108. results = results.filter(function(result) {
  109. return (/* (result.status < 300 || result.status >= 400) && */ result.weightCheck.bodySize > 0);
  110. });
  111. // Image compression
  112. //offenders.imageOptimization = listImagesNotOptimized(results);
  113. //metrics.imageOptimization = offenders.imageOptimization.totalGain;
  114. // Image width
  115. //offenders.imagesTooLarge = listImagesTooLarge(results, data.params.options.device);
  116. //metrics.imagesTooLarge = offenders.imagesTooLarge.length;
  117. // File minification
  118. offenders.fileMinification = listFilesNotMinified(results);
  119. metrics.fileMinification = offenders.fileMinification.totalGain;
  120. // Gzip/Brotli compression
  121. offenders.compression = listFilesNotBrotlified(results);
  122. metrics.compression = offenders.compression.totalGain;
  123. // Detect identical files
  124. offenders.identicalFiles = listIdenticalFiles(results);
  125. metrics.identicalFiles = offenders.identicalFiles.avoidableRequests;
  126. // Fonts count
  127. offenders.fontsCount = listFonts(results);
  128. metrics.fontsCount = offenders.fontsCount.count;
  129. // Conversion to woff2
  130. offenders.nonWoff2Fonts = listNonWoff2Fonts(results);
  131. metrics.nonWoff2Fonts = offenders.nonWoff2Fonts.totalGain;
  132. // Heavy fonts
  133. offenders.heavyFonts = listHeavyFonts(results);
  134. metrics.heavyFonts = offenders.heavyFonts.totalGain;
  135. // Unused Unicode ranges
  136. offenders.unusedUnicodeRanges = listUnusedUnicodeRanges(results);
  137. metrics.unusedUnicodeRanges = offenders.unusedUnicodeRanges.count;
  138. // Detect WordPress
  139. metrics.isWordPress = detectWordPress(results);
  140. data.toolsResults.redownload = {
  141. metrics: metrics,
  142. offenders: offenders
  143. };
  144. cleanResults(results);
  145. deferred.resolve(data);
  146. }
  147. });
  148. return deferred.promise;
  149. }
  150. function mergeOffenders(requests, compressedOffenders, postOffenders, notFoundOffenders, redirectOffenders) {
  151. // Parse each request and check if it can be found in other offenders
  152. requests.forEach(function(request) {
  153. // Is it compressed?
  154. if (compressedOffenders) {
  155. compressedOffenders.some(function(entry) {
  156. if (entry.url === request.url) {
  157. request.compressed = true;
  158. request.bodySize = entry.bodySize;
  159. request.transferedSize = entry.transferedSize;
  160. return true;
  161. }
  162. });
  163. }
  164. // Is it a POST request?
  165. if (postOffenders) {
  166. postOffenders.some(function(url) {
  167. if (url === request.url) {
  168. request.post = true;
  169. return true;
  170. }
  171. });
  172. }
  173. // Is it a 404?
  174. if (notFoundOffenders) {
  175. notFoundOffenders.some(function(url) {
  176. if (url === request.url) {
  177. request.notFound = true;
  178. return true;
  179. }
  180. });
  181. }
  182. // Is it a redirection?
  183. if (redirectOffenders) {
  184. redirectOffenders.some(function(message) {
  185. if (message.split(' ')[0] === request.url) {
  186. request.redirect = true;
  187. return true;
  188. }
  189. });
  190. }
  191. });
  192. return requests;
  193. }
  194. function listIncorrectContentTypes(requests) {
  195. var results = [];
  196. requests.forEach(function(req) {
  197. if (req.oldContentType || req.oldContentType === null) {
  198. results.push({
  199. url: req.url,
  200. current: req.oldContentType,
  201. correct: req.contentType
  202. });
  203. }
  204. });
  205. return results;
  206. }
  207. function listEmptyRequests(requests) {
  208. var results = [];
  209. requests.forEach(function(req) {
  210. var weight = req.weightCheck.bodySize || req.contentLength;
  211. if (weight === 0 && req.method === 'GET' && req.status === 200) {
  212. results.push(req.url);
  213. }
  214. });
  215. return results;
  216. }
  217. function listRequestWeight(requests) {
  218. var results = {
  219. totalWeight: 0,
  220. byType: {
  221. html: {
  222. totalWeight: 0,
  223. requests: []
  224. },
  225. css: {
  226. totalWeight: 0,
  227. requests: []
  228. },
  229. js: {
  230. totalWeight: 0,
  231. requests: []
  232. },
  233. json: {
  234. totalWeight: 0,
  235. requests: []
  236. },
  237. image: {
  238. totalWeight: 0,
  239. requests: []
  240. },
  241. video: {
  242. totalWeight: 0,
  243. requests: []
  244. },
  245. webfont: {
  246. totalWeight: 0,
  247. requests: []
  248. },
  249. other: {
  250. totalWeight: 0,
  251. requests: []
  252. }
  253. }
  254. };
  255. requests.forEach(function(req) {
  256. var weight = ((typeof req.weightCheck.bodySize === 'number') ? req.weightCheck.bodySize + req.weightCheck.headersSize : req.contentLength) || 0;
  257. var type = req.type || 'other';
  258. type = (results.byType[type]) ? type : 'other';
  259. results.totalWeight += weight;
  260. results.byType[type].totalWeight += weight;
  261. results.byType[type].requests.push({
  262. url: req.url,
  263. weight: weight
  264. });
  265. });
  266. return results;
  267. }
  268. /*function listImagesNotOptimized(requests) {
  269. var results = {
  270. totalGain: 0,
  271. images: []
  272. };
  273. requests.forEach(function(req) {
  274. if (req.weightCheck.bodySize > 0 && imageOptimizer.entryTypeCanBeOptimized(req) && req.weightCheck.isOptimized === false) {
  275. var before, after;
  276. if (req.weightCheck.isCompressed === true) {
  277. // The resource is compressed
  278. before = req.weightCheck.bodySize;
  279. if (req.weightCheck.compressionTool === 'brotli') {
  280. after = req.weightCheck.afterOptimizationAndBrotliCompression;
  281. } else {
  282. after = req.weightCheck.afterOptimizationAndGzipCompression;
  283. }
  284. } else if (req.weightCheck.afterBrotliCompression) {
  285. // The resource is not compressed but should be
  286. before = req.weightCheck.afterBrotliCompression;
  287. after = req.weightCheck.afterOptimizationAndBrotliCompression;
  288. } else {
  289. // The resource is not compressed but is not subject to compression
  290. before = req.weightCheck.bodySize;
  291. after = req.weightCheck.optimized;
  292. }
  293. var gain = before - after;
  294. if (gain > 200) {
  295. results.totalGain += gain;
  296. results.images.push({
  297. url: req.url,
  298. originalWeigth: req.weightCheck.bodySize,
  299. isCompressible: (req.weightCheck.afterBrotliCompression > 0),
  300. isCompressed: req.weightCheck.isCompressed,
  301. originalCompressedWeight: before,
  302. afterOptimizationAndCompression: after,
  303. lossless: req.weightCheck.lossless,
  304. lossy: req.weightCheck.lossy,
  305. gain: gain
  306. });
  307. }
  308. }
  309. });
  310. return results;
  311. }*/
  312. /*function listImagesTooLarge(requests, device) {
  313. var results = [];
  314. requests.forEach(function(req) {
  315. const thresholds = {
  316. 'phone': 1200,
  317. 'tablet': 1800,
  318. 'desktop': 2400,
  319. 'desktop-hd': 3200
  320. };
  321. if (req.weightCheck.bodySize > 0 &&
  322. req.imageDimensions &&
  323. req.imageDimensions.width > thresholds[device]) {
  324. results.push({
  325. url: req.url,
  326. weight: req.weightCheck.bodySize,
  327. width: req.imageDimensions.width,
  328. height: req.imageDimensions.height
  329. });
  330. }
  331. });
  332. return results;
  333. }*/
  334. function listFilesNotMinified(requests) {
  335. var results = {
  336. totalGain: 0,
  337. files: []
  338. };
  339. requests.forEach(function(req) {
  340. if (req.weightCheck.bodySize > 0 && fileMinifier.entryTypeCanBeMinified(req) && req.weightCheck.isOptimized === false) {
  341. var before, after;
  342. if (req.weightCheck.isCompressed === true) {
  343. // The resource is compressed
  344. before = req.weightCheck.bodySize;
  345. if (req.weightCheck.compressionTool === 'brotli') {
  346. after = req.weightCheck.afterOptimizationAndBrotliCompression;
  347. } else {
  348. after = req.weightCheck.afterOptimizationAndGzipCompression;
  349. }
  350. } else if (req.weightCheck.afterBrotliCompression) {
  351. // The resource is not compressed but should be
  352. before = req.weightCheck.afterBrotliCompression;
  353. after = req.weightCheck.afterOptimizationAndBrotliCompression;
  354. } else {
  355. // The resource is not compressed but is not subject to compression
  356. before = req.weightCheck.bodySize;
  357. after = req.weightCheck.optimized;
  358. }
  359. var gain = before - after;
  360. if (gain > 200) {
  361. results.totalGain += gain;
  362. results.files.push({
  363. url: req.url,
  364. originalWeigth: req.weightCheck.bodySize,
  365. isCompressible: (req.weightCheck.afterBrotliCompression > 0),
  366. isCompressed: req.weightCheck.isCompressed,
  367. originalCompressedWeight: before,
  368. afterOptimizationAndCompression: after,
  369. optimized: req.weightCheck.optimized,
  370. gain: gain
  371. });
  372. }
  373. }
  374. });
  375. return results;
  376. }
  377. function listFilesNotBrotlified(requests) {
  378. var results = {
  379. totalGain: 0,
  380. files: []
  381. };
  382. requests.forEach(function(req) {
  383. if (req.weightCheck.compressionTool !== 'brotli') {
  384. var file = {
  385. url: req.url,
  386. wasCompressed: req.weightCheck.isCompressed,
  387. brotlified: req.weightCheck.afterBrotliCompression
  388. };
  389. if (req.weightCheck.isCompressed) {
  390. // The file was already gzipped (or deflated)
  391. file.originalSize = req.weightCheck.bodySize;
  392. file.gain = req.weightCheck.bodySize - req.weightCheck.afterBrotliCompression;
  393. } else {
  394. // The file was not compressed at all
  395. file.originalSize = req.weightCheck.uncompressedSize;
  396. file.gzipped = req.weightCheck.afterGzipCompression;
  397. file.gain = req.weightCheck.uncompressedSize - req.weightCheck.afterBrotliCompression;
  398. }
  399. // Just checking a last time if the gain is positive
  400. if (file.gain > 200) {
  401. results.totalGain += file.gain;
  402. results.files.push(file);
  403. }
  404. }
  405. });
  406. return results;
  407. }
  408. function listRequestsByType(requests) {
  409. var results = {
  410. total: 0,
  411. byType: {
  412. html: [],
  413. css: [],
  414. js: [],
  415. json: [],
  416. image: [],
  417. video: [],
  418. webfont: [],
  419. other: []
  420. }
  421. };
  422. requests.forEach(function(req) {
  423. if (req.url !== 'about:blank') {
  424. var type = req.type || 'other';
  425. type = (results.byType[type]) ? type : 'other';
  426. results.byType[type].push(req.url);
  427. results.total ++;
  428. }
  429. });
  430. return results;
  431. }
  432. function listIdenticalFiles(requests) {
  433. var hashes = {};
  434. var list = [];
  435. var avoidableRequestsCount = 0;
  436. requests.forEach(function(req) {
  437. var requestHash = md5(req.weightCheck.bodyBuffer);
  438. // Try to exclude tracking pixels
  439. if (req.weightCheck.bodySize < 80 && req.type === 'image') {
  440. return;
  441. }
  442. if (!hashes[requestHash]) {
  443. hashes[requestHash] = {
  444. weight: req.weightCheck.bodySize,
  445. urls: []
  446. };
  447. }
  448. if (hashes[requestHash].urls.indexOf(req.url) === -1) {
  449. hashes[requestHash].urls.push(req.url);
  450. }
  451. });
  452. for (var hash in hashes) {
  453. if (hashes[hash].urls.length > 1) {
  454. list.push(hashes[hash]);
  455. avoidableRequestsCount += hashes[hash].urls.length - 1;
  456. }
  457. }
  458. return {
  459. avoidableRequests: avoidableRequestsCount,
  460. count: list.length,
  461. list: list
  462. };
  463. }
  464. function listFonts(requests) {
  465. var list = [];
  466. requests.forEach(function(req) {
  467. if (req.isWebFont) {
  468. list.push({
  469. url: req.url,
  470. size: req.weightCheck.bodySize
  471. });
  472. }
  473. });
  474. return {
  475. count: list.length,
  476. list: list
  477. };
  478. }
  479. function listNonWoff2Fonts(requests) {
  480. var results = {
  481. totalGain: 0,
  482. fonts: []
  483. };
  484. requests.forEach(function(req) {
  485. if (!req.isWoff2 && req.weightCheck.sizeAsWoff2) {
  486. var before = req.weightCheck.bodySize;
  487. var after = req.weightCheck.sizeAsWoff2;
  488. var gain = before - after;
  489. var type = null;
  490. if (req.isWoff) {
  491. type = 'woff';
  492. } else if (req.isTTF) {
  493. type = 'ttf';
  494. }
  495. if (gain > 200) {
  496. results.totalGain += gain;
  497. results.fonts.push({
  498. url: req.url,
  499. originalSize: before,
  500. type: type,
  501. woff2Size: after,
  502. gain: gain
  503. });
  504. }
  505. }
  506. });
  507. return results;
  508. }
  509. function listHeavyFonts(requests) {
  510. var list = [];
  511. var totalGain = 0;
  512. var heavyFontsCount = 0;
  513. var MAX_FONT_WEIGHT = 40 * 1024;
  514. requests.forEach(function(req) {
  515. if (req.isWebFont && req.fontMetrics) {
  516. list.push({
  517. url: req.url,
  518. weight: req.weightCheck.bodySize,
  519. numGlyphs: req.fontMetrics.numGlyphs,
  520. averageGlyphComplexity: req.fontMetrics.averageGlyphComplexity
  521. });
  522. if (req.weightCheck.bodySize > MAX_FONT_WEIGHT) {
  523. totalGain += req.weightCheck.bodySize - MAX_FONT_WEIGHT;
  524. heavyFontsCount ++;
  525. }
  526. }
  527. });
  528. return {
  529. count: heavyFontsCount,
  530. fonts: list,
  531. totalGain: totalGain
  532. };
  533. }
  534. function listUnusedUnicodeRanges(requests) {
  535. var list = [];
  536. var unusedUnicodeRanges = 0;
  537. requests.forEach(function(req) {
  538. if (req.isWebFont && req.fontMetrics && req.fontMetrics.unicodeRanges) {
  539. var ranges = [];
  540. var others = null;
  541. var rangeNames = Object.keys(req.fontMetrics.unicodeRanges);
  542. var unicodePointsCount = 0;
  543. var unusedRangesInFont = 0;
  544. rangeNames.forEach(function(rangeName) {
  545. var range = req.fontMetrics.unicodeRanges[rangeName];
  546. // Exclude "Others"
  547. if (rangeName === 'Others') {
  548. if (range.numGlyphsInCommonWithPageContent === 0 && range.charset.length > 50) {
  549. range.underused = true;
  550. unusedRangesInFont ++;
  551. }
  552. unicodePointsCount += range.charset.length;
  553. others = range;
  554. } else if (range.charset.length > 0) {
  555. // Now lets detect if the current Unicode range is unused.
  556. // Reminder: range.coverage = glyphs declared in this range, divided by the range size
  557. if (range.coverage > 0.25 && range.numGlyphsInCommonWithPageContent === 0) {
  558. range.underused = true;
  559. unusedRangesInFont ++;
  560. }
  561. unicodePointsCount += range.charset.length;
  562. ranges.push(range);
  563. }
  564. });
  565. // Detect if it's a icons font : if more than 90% of the icons are
  566. // in the "Others", it looks like one.
  567. if (others && others.charset.length / req.fontMetrics.numGlyphs > 0.9) {
  568. list.push({
  569. url: req.url,
  570. weight: req.weightCheck.bodySize,
  571. isIconFont: true,
  572. glyphs: req.fontMetrics.numGlyphs,
  573. numGlyphsInCommonWithPageContent: req.fontMetrics.numGlyphsInCommonWithPageContent
  574. });
  575. // And if less than 5% of the icons are used, let's report it as underused
  576. if (others && others.numGlyphsInCommonWithPageContent / others.charset.length <= 0.05) {
  577. unusedRangesInFont = 1;
  578. }
  579. // Not an icons font
  580. } else {
  581. if (others) {
  582. // Insert back "Others" at the end of the list
  583. ranges.push(others);
  584. }
  585. var ligaturesOrHiddenChars = req.fontMetrics.numGlyphs - unicodePointsCount;
  586. if (ligaturesOrHiddenChars > 25) {
  587. unusedUnicodeRanges ++;
  588. }
  589. list.push({
  590. url: req.url,
  591. weight: req.weightCheck.bodySize,
  592. isIconFont: false,
  593. unicodeRanges: ranges,
  594. ligaturesOrHiddenChars: ligaturesOrHiddenChars
  595. });
  596. }
  597. unusedUnicodeRanges += unusedRangesInFont;
  598. }
  599. });
  600. return {
  601. count: unusedUnicodeRanges,
  602. fonts: list
  603. };
  604. }
  605. function detectWordPress(requests) {
  606. // Check the first HTML file only
  607. var htmlRequest = requests.find(function(request) {
  608. return request.isHTML && request.weightCheck && request.weightCheck.bodyBuffer;
  609. });
  610. if (!htmlRequest) {
  611. return false;
  612. }
  613. debug('Checking if it is WordPress on file %s', htmlRequest.url);
  614. if (htmlRequest.weightCheck.bodyBuffer.indexOf('/wp-content/') >= 0 || htmlRequest.weightCheck.bodyBuffer.indexOf('/wp-includes/') >= 0) {
  615. debug('Response: yes.');
  616. return true;
  617. } else {
  618. debug('Response: no.');
  619. return false;
  620. }
  621. }
  622. function redownloadEntry(entry, httpAuth, proxy) {
  623. var deferred = Q.defer();
  624. function downloadError(message) {
  625. debug('Could not download %s Error: %s', entry.url, message);
  626. entry.weightCheck = {
  627. message: message
  628. };
  629. deferred.resolve(entry);
  630. }
  631. // Not downloaded again but will be counted in totalWeight
  632. function notDownloadableFile(message) {
  633. entry.weightCheck = {
  634. message: message
  635. };
  636. deferred.resolve(entry);
  637. }
  638. // Not counted in totalWeight
  639. function unwantedFile(message) {
  640. debug(message);
  641. deferred.resolve(entry);
  642. }
  643. if (entry.post) {
  644. notDownloadableFile('only downloading GET');
  645. // ... at least trying to
  646. return deferred.promise;
  647. }
  648. if (entry.notFound || entry.redirect) {
  649. unwantedFile('only downloading requests with status code 200');
  650. // ...at least trying to
  651. return deferred.promise;
  652. }
  653. if (entry.url === 'about:blank') {
  654. unwantedFile('not downloading about:blank');
  655. return deferred.promise;
  656. }
  657. debug('Downloading %s', entry.url);
  658. // Always add compression and webp/avif headers before sending, in case the server listens to them
  659. var reqHeaders = [];
  660. reqHeaders['Accept'] = '*/*,image/webp,image/avif';
  661. reqHeaders['Accept-Encoding'] = 'gzip, deflate, br';
  662. reqHeaders['Connection'] = 'keep-alive';
  663. reqHeaders['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36';
  664. var requestOptions = {
  665. method: entry.method,
  666. url: entry.url,
  667. headers: reqHeaders,
  668. timeout: REQUEST_TIMEOUT,
  669. proxy: proxy
  670. };
  671. // Basic auth
  672. if (httpAuth) {
  673. requestOptions.auth = {
  674. user: httpAuth.username,
  675. pass: httpAuth.password,
  676. sendImmediately: false // Tries a first time without auth, wait for a 401 error before resending
  677. };
  678. }
  679. download(requestOptions, entry.contentType, function(error, result) {
  680. if (error) {
  681. if (error.code === 'ETIMEDOUT') {
  682. downloadError('timeout after ' + REQUEST_TIMEOUT + 'ms');
  683. } else {
  684. downloadError('error while downloading: ' + error.code);
  685. }
  686. return;
  687. }
  688. debug('%s downloaded correctly', entry.url);
  689. entry.weightCheck = result;
  690. deferred.resolve(entry);
  691. });
  692. return deferred.promise;
  693. }
  694. // Inspired by https://github.com/cvan/fastHAR-api/blob/10cec585/app.js
  695. function download(requestOptions, contentType, callback) {
  696. var statusCode;
  697. try {
  698. request(requestOptions)
  699. .on('response', function(res) {
  700. // Raw headers were added in NodeJS v0.12
  701. // (https://github.com/joyent/node/issues/4844), but let's
  702. // reconstruct them for backwards compatibility.
  703. var rawHeaders = ('HTTP/' + res.httpVersion + ' ' + res.statusCode +
  704. ' ' + http.STATUS_CODES[res.statusCode] + '\r\n');
  705. Object.keys(res.headers).forEach(function(headerKey) {
  706. rawHeaders += headerKey + ': ' + res.headers[headerKey] + '\r\n';
  707. });
  708. rawHeaders += '\r\n';
  709. var uncompressedSize = 0; // size after uncompression
  710. var bodySize = 0; // bytes size over the wire
  711. var bodyChunks = []; // an array of buffers
  712. var isCompressed = false;
  713. var compressionTool = '';
  714. function tally() {
  715. if (statusCode !== 200) {
  716. callback({code: statusCode});
  717. return;
  718. }
  719. var body = Buffer.concat(bodyChunks);
  720. var result = {
  721. bodyBuffer: body,
  722. headersSize: Buffer.byteLength(rawHeaders, 'utf8'),
  723. headers: res.headers,
  724. bodySize: bodySize,
  725. isCompressed: isCompressed,
  726. compressionTool: compressionTool,
  727. uncompressedSize: uncompressedSize
  728. };
  729. callback(null, result);
  730. }
  731. switch (res.headers['content-encoding']) {
  732. case 'gzip':
  733. var gzip = zlib.createGunzip();
  734. gzip.on('data', function (data) {
  735. bodyChunks.push(data);
  736. uncompressedSize += data.length;
  737. }).on('end', function () {
  738. isCompressed = true;
  739. compressionTool = 'gzip';
  740. tally();
  741. }).on('error', function(err) {
  742. debug('Error while decoding %s', requestOptions.url);
  743. debug(err);
  744. callback(err);
  745. });
  746. res.on('data', function (data) {
  747. bodySize += data.length;
  748. }).pipe(gzip);
  749. break;
  750. case 'deflate':
  751. res.setEncoding('utf8');
  752. var deflate = zlib.createInflate();
  753. deflate.on('data', function (data) {
  754. bodyChunks.push(data);
  755. uncompressedSize += data.length;
  756. }).on('end', function () {
  757. isCompressed = true;
  758. compressionTool = 'deflate';
  759. tally();
  760. }).on('error', function(err) {
  761. debug('Error while decoding %s', requestOptions.url);
  762. debug(err);
  763. callback(err);
  764. });
  765. res.on('data', function (data) {
  766. bodySize += data.length;
  767. }).pipe(deflate);
  768. break;
  769. case 'br':
  770. var brotli = zlib.createBrotliDecompress();
  771. brotli.on('data', function (data) {
  772. bodyChunks.push(data);
  773. uncompressedSize += data.length;
  774. }).on('end', function () {
  775. isCompressed = true;
  776. compressionTool = 'brotli';
  777. tally();
  778. }).on('error', function(err) {
  779. debug('Error while decoding %s', requestOptions.url);
  780. debug(err);
  781. callback(err);
  782. });
  783. res.on('data', function (data) {
  784. bodySize += data.length;
  785. }).pipe(brotli);
  786. break;
  787. default:
  788. res.on('data', function (data) {
  789. bodyChunks.push(data);
  790. uncompressedSize += data.length;
  791. bodySize += data.length;
  792. }).on('end', function () {
  793. tally();
  794. });
  795. break;
  796. }
  797. })
  798. .on('response', function(response) {
  799. statusCode = response.statusCode;
  800. })
  801. .on('error', function(err) {
  802. debug('Error while downloading %s', requestOptions.url);
  803. debug(err);
  804. callback(err);
  805. });
  806. } catch(err) {
  807. debug('Error while downloading %s', requestOptions.url);
  808. debug(err);
  809. callback(err);
  810. }
  811. }
  812. // Clean all the pollution this module added to the results
  813. function cleanResults(requests) {
  814. requests.forEach(function(req) {
  815. delete req.weightCheck;
  816. });
  817. }
  818. return {
  819. recheckAllFiles: recheckAllFiles,
  820. listRequestWeight: listRequestWeight,
  821. redownloadEntry: redownloadEntry,
  822. download: download
  823. };
  824. };
  825. module.exports = new Redownload();