redownload.js 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021
  1. /*
  2. * Redownloading every files after Phantomas has finished
  3. * Checks weight and every kind of compression
  4. *
  5. */
  6. /*jshint -W069 */
  7. var debug = require('debug')('ylt:redownload');
  8. var Q = require('q');
  9. var http = require('http');
  10. var zlib = require('zlib');
  11. var async = require('async');
  12. var request = require('request');
  13. var md5 = require('md5');
  14. var imageOptimizer = require('./imageOptimizer');
  15. var fileMinifier = require('./fileMinifier');
  16. var gzipCompressor = require('./gzipCompressor');
  17. var brotliCompressor = require('./brotliCompressor');
  18. var contentTypeChecker = require('./contentTypeChecker');
  19. var fontAnalyzer = require('./fontAnalyzer');
  20. var imageDimensions = require('./imageDimensions');
  21. var Redownload = function() {
  22. var MAX_PARALLEL_DOWNLOADS = 5;
  23. var REQUEST_TIMEOUT = 30000; // 30 seconds
  24. // This function will re-download every asset and check if it could be optimized
  25. function recheckAllFiles(data) {
  26. var startTime = Date.now();
  27. debug('Redownload started');
  28. var deferred = Q.defer();
  29. var requestsOffenders = data.toolsResults.phantomas.offenders.requests;
  30. var gzipOffenders = data.toolsResults.phantomas.offenders.gzipRequests;
  31. var postOffenders = data.toolsResults.phantomas.offenders.postRequests;
  32. var notFoundOffenders = data.toolsResults.phantomas.offenders.notFound;
  33. var redirectOffenders = data.toolsResults.phantomas.offenders.redirects;
  34. var requestsList = mergeOffenders(requestsOffenders, gzipOffenders, postOffenders, notFoundOffenders, redirectOffenders);
  35. var totalCount = requestsList.length;
  36. var doneCount = 0;
  37. var httpAuth = null;
  38. if (data.params && data.params.options && data.params.options.authUser && data.params.options.authPass) {
  39. httpAuth = {
  40. username: data.params.options.authUser,
  41. password: data.params.options.authPass
  42. };
  43. }
  44. var proxy = null;
  45. if (data.params && data.params.options && data.params.options.proxy) {
  46. proxy = data.params.options.proxy;
  47. if (proxy.indexOf('http:') === -1) {
  48. proxy = 'http://' + proxy;
  49. }
  50. }
  51. // Prevent a bug with the font analyzer on empty pages
  52. var differentCharacters = '';
  53. if (data.toolsResults.phantomas.offenders.charactersCount && data.toolsResults.phantomas.offenders.charactersCount.length > 0) {
  54. differentCharacters = data.toolsResults.phantomas.offenders.charactersCount[0];
  55. }
  56. // Transform every request into a download function with a callback when done
  57. var redownloadList = requestsList.map(function(entry) {
  58. return function(callback) {
  59. redownloadEntry(entry, httpAuth, proxy)
  60. .then(contentTypeChecker.checkContentType)
  61. .then(imageOptimizer.optimizeImage)
  62. .then(imageDimensions.getDimensions)
  63. .then(fileMinifier.minifyFile)
  64. .then(gzipCompressor.compressFile)
  65. .then(brotliCompressor.compressFile)
  66. .then(function(entry) {
  67. return fontAnalyzer.analyzeFont(entry, differentCharacters);
  68. })
  69. .then(function(newEntry) {
  70. debug('File %s - Redownloaded, optimized, minified, compressed, analyzed: done', entry.url);
  71. // For the progress bar
  72. doneCount ++;
  73. deferred.notify(doneCount/totalCount);
  74. callback(null, newEntry);
  75. })
  76. .fail(function(err) {
  77. callback(err);
  78. });
  79. };
  80. });
  81. // Lanch all redownload functions and wait for completion
  82. async.parallelLimit(redownloadList, MAX_PARALLEL_DOWNLOADS, function(err, results) {
  83. if (err) {
  84. debug(err);
  85. deferred.reject(err);
  86. } else {
  87. debug('All files checked');
  88. endTime = Date.now();
  89. debug('Redownload took %d ms', endTime - startTime);
  90. var metrics = {};
  91. var offenders = {};
  92. // Count requests
  93. offenders.totalRequests = listRequestsByType(results);
  94. metrics.totalRequests = offenders.totalRequests.total;
  95. // Remove unwanted responses (redownload failed, about:blank)
  96. results = results.filter(function(result) {
  97. return (result !== null && result.weightCheck && result.url.indexOf('about:blank') !== 0);
  98. });
  99. // Wrong contentType
  100. offenders.incorrectContentTypes = listIncorrectContentTypes(results);
  101. metrics.incorrectContentTypes = offenders.incorrectContentTypes.length;
  102. // Total weight
  103. offenders.totalWeight = listRequestWeight(results);
  104. metrics.totalWeight = offenders.totalWeight.totalWeight;
  105. // Empty files
  106. offenders.emptyRequests = listEmptyRequests(results);
  107. metrics.emptyRequests = offenders.emptyRequests.length;
  108. // Remove some more unwanted responses (redirections and empty files)
  109. results = results.filter(function(result) {
  110. return (/* (result.status < 300 || result.status >= 400) && */ result.weightCheck.bodySize > 0);
  111. });
  112. // Image compression
  113. offenders.imageOptimization = listImagesNotOptimized(results);
  114. metrics.imageOptimization = offenders.imageOptimization.totalGain;
  115. // Image width
  116. offenders.imagesTooLarge = listImagesTooLarge(results, data.params.options.device);
  117. metrics.imagesTooLarge = offenders.imagesTooLarge.length;
  118. // File minification
  119. offenders.fileMinification = listFilesNotMinified(results);
  120. metrics.fileMinification = offenders.fileMinification.totalGain;
  121. // Gzip/Brotli compression
  122. offenders.compression = listFilesNotBrotlified(results);
  123. metrics.compression = offenders.compression.totalGain;
  124. // Detect identical files
  125. offenders.identicalFiles = listIdenticalFiles(results);
  126. metrics.identicalFiles = offenders.identicalFiles.avoidableRequests;
  127. // Fonts count
  128. offenders.fontsCount = listFonts(results);
  129. metrics.fontsCount = offenders.fontsCount.count;
  130. // Conversion to woff2
  131. offenders.nonWoff2Fonts = listNonWoff2Fonts(results);
  132. metrics.nonWoff2Fonts = offenders.nonWoff2Fonts.totalGain;
  133. // Heavy fonts
  134. offenders.heavyFonts = listHeavyFonts(results);
  135. metrics.heavyFonts = offenders.heavyFonts.totalGain;
  136. // Unused Unicode ranges
  137. offenders.unusedUnicodeRanges = listUnusedUnicodeRanges(results);
  138. metrics.unusedUnicodeRanges = offenders.unusedUnicodeRanges.count;
  139. // Detect WordPress
  140. metrics.isWordPress = detectWordPress(results);
  141. data.toolsResults.redownload = {
  142. metrics: metrics,
  143. offenders: offenders
  144. };
  145. deferred.resolve(data);
  146. }
  147. });
  148. return deferred.promise;
  149. }
  150. function mergeOffenders(requests, compressedOffenders, postOffenders, notFoundOffenders, redirectOffenders) {
  151. // Parse each request and check if it can be found in other offenders
  152. requests.forEach(function(request) {
  153. // Is it compressed?
  154. if (compressedOffenders) {
  155. compressedOffenders.some(function(entry) {
  156. if (entry.url === request.url) {
  157. request.compressed = true;
  158. request.bodySize = entry.bodySize;
  159. request.transferedSize = entry.transferedSize;
  160. return true;
  161. }
  162. });
  163. }
  164. // Is it a POST request?
  165. if (postOffenders) {
  166. postOffenders.some(function(url) {
  167. if (url === request.url) {
  168. request.post = true;
  169. return true;
  170. }
  171. });
  172. }
  173. // Is it a 404?
  174. if (notFoundOffenders) {
  175. notFoundOffenders.some(function(url) {
  176. if (url === request.url) {
  177. request.notFound = true;
  178. return true;
  179. }
  180. });
  181. }
  182. // Is it a redirection?
  183. if (redirectOffenders) {
  184. redirectOffenders.some(function(message) {
  185. if (message.split(' ')[0] === request.url) {
  186. request.redirect = true;
  187. return true;
  188. }
  189. });
  190. }
  191. });
  192. return requests;
  193. }
  194. function listIncorrectContentTypes(requests) {
  195. var results = [];
  196. requests.forEach(function(req) {
  197. if (req.oldContentType || req.oldContentType === null) {
  198. results.push({
  199. url: req.url,
  200. current: req.oldContentType,
  201. correct: req.contentType
  202. });
  203. }
  204. });
  205. return results;
  206. }
  207. function listEmptyRequests(requests) {
  208. var results = [];
  209. requests.forEach(function(req) {
  210. var weight = req.weightCheck.bodySize || req.contentLength;
  211. if (weight === 0 && req.method === 'GET' && req.status === 200) {
  212. results.push(req.url);
  213. }
  214. });
  215. return results;
  216. }
  217. function listRequestWeight(requests) {
  218. var results = {
  219. totalWeight: 0,
  220. byType: {
  221. html: {
  222. totalWeight: 0,
  223. requests: []
  224. },
  225. css: {
  226. totalWeight: 0,
  227. requests: []
  228. },
  229. js: {
  230. totalWeight: 0,
  231. requests: []
  232. },
  233. json: {
  234. totalWeight: 0,
  235. requests: []
  236. },
  237. image: {
  238. totalWeight: 0,
  239. requests: []
  240. },
  241. video: {
  242. totalWeight: 0,
  243. requests: []
  244. },
  245. webfont: {
  246. totalWeight: 0,
  247. requests: []
  248. },
  249. other: {
  250. totalWeight: 0,
  251. requests: []
  252. }
  253. }
  254. };
  255. requests.forEach(function(req) {
  256. var weight = ((typeof req.weightCheck.bodySize === 'number') ? req.weightCheck.bodySize + req.weightCheck.headersSize : req.contentLength) || 0;
  257. var type = req.type || 'other';
  258. type = (results.byType[type]) ? type : 'other';
  259. results.totalWeight += weight;
  260. results.byType[type].totalWeight += weight;
  261. results.byType[type].requests.push({
  262. url: req.url,
  263. weight: weight
  264. });
  265. });
  266. return results;
  267. }
  268. function listImagesNotOptimized(requests) {
  269. var results = {
  270. totalGain: 0,
  271. images: []
  272. };
  273. requests.forEach(function(req) {
  274. if (req.weightCheck.bodySize > 0 && imageOptimizer.entryTypeCanBeOptimized(req) && req.weightCheck.isOptimized === false) {
  275. var before, after;
  276. if (req.weightCheck.isCompressed === true) {
  277. // The resource is compressed
  278. before = req.weightCheck.bodySize;
  279. if (req.weightCheck.compressionTool === 'brotli') {
  280. after = req.weightCheck.afterOptimizationAndBrotliCompression;
  281. } else {
  282. after = req.weightCheck.afterOptimizationAndGzipCompression;
  283. }
  284. } else if (req.weightCheck.afterBrotliCompression) {
  285. // The resource is not compressed but should be
  286. before = req.weightCheck.afterBrotliCompression;
  287. after = req.weightCheck.afterOptimizationAndBrotliCompression;
  288. } else {
  289. // The resource is not compressed but is not subject to compression
  290. before = req.weightCheck.bodySize;
  291. after = req.weightCheck.optimized;
  292. }
  293. var gain = before - after;
  294. if (gain > 200) {
  295. results.totalGain += gain;
  296. results.images.push({
  297. url: req.url,
  298. originalWeigth: req.weightCheck.bodySize,
  299. isCompressible: (req.weightCheck.afterBrotliCompression > 0),
  300. isCompressed: req.weightCheck.isCompressed,
  301. originalCompressedWeight: before,
  302. afterOptimizationAndCompression: after,
  303. lossless: req.weightCheck.lossless,
  304. lossy: req.weightCheck.lossy,
  305. gain: gain
  306. });
  307. }
  308. }
  309. });
  310. return results;
  311. }
  312. function listImagesTooLarge(requests, device) {
  313. var results = [];
  314. requests.forEach(function(req) {
  315. const thresholds = {
  316. 'phone': 1200,
  317. 'tablet': 1800,
  318. 'desktop': 2400,
  319. 'desktop-hd': 3200
  320. };
  321. if (req.weightCheck.bodySize > 0 &&
  322. req.imageDimensions &&
  323. req.imageDimensions.width > thresholds[device]) {
  324. results.push({
  325. url: req.url,
  326. weight: req.weightCheck.bodySize,
  327. width: req.imageDimensions.width,
  328. height: req.imageDimensions.height
  329. });
  330. }
  331. });
  332. return results;
  333. }
  334. function listFilesNotMinified(requests) {
  335. var results = {
  336. totalGain: 0,
  337. files: []
  338. };
  339. requests.forEach(function(req) {
  340. if (req.weightCheck.bodySize > 0 && fileMinifier.entryTypeCanBeMinified(req) && req.weightCheck.isOptimized === false) {
  341. var before, after;
  342. if (req.weightCheck.isCompressed === true) {
  343. // The resource is compressed
  344. before = req.weightCheck.bodySize;
  345. if (req.weightCheck.compressionTool === 'brotli') {
  346. after = req.weightCheck.afterOptimizationAndBrotliCompression;
  347. } else {
  348. after = req.weightCheck.afterOptimizationAndGzipCompression;
  349. }
  350. } else if (req.weightCheck.afterBrotliCompression) {
  351. // The resource is not compressed but should be
  352. before = req.weightCheck.afterBrotliCompression;
  353. after = req.weightCheck.afterOptimizationAndBrotliCompression;
  354. } else {
  355. // The resource is not compressed but is not subject to compression
  356. before = req.weightCheck.bodySize;
  357. after = req.weightCheck.optimized;
  358. }
  359. var gain = before - after;
  360. if (gain > 200) {
  361. results.totalGain += gain;
  362. results.files.push({
  363. url: req.url,
  364. originalWeigth: req.weightCheck.bodySize,
  365. isCompressible: (req.weightCheck.afterBrotliCompression > 0),
  366. isCompressed: req.weightCheck.isCompressed,
  367. originalCompressedWeight: before,
  368. afterOptimizationAndCompression: after,
  369. optimized: req.weightCheck.optimized,
  370. gain: gain
  371. });
  372. }
  373. }
  374. });
  375. return results;
  376. }
  377. function listFilesNotBrotlified(requests) {
  378. var results = {
  379. totalGain: 0,
  380. files: []
  381. };
  382. requests.forEach(function(req) {
  383. if (req.weightCheck.compressionTool !== 'brotli') {
  384. var file = {
  385. url: req.url,
  386. wasCompressed: req.weightCheck.isCompressed,
  387. brotlified: req.weightCheck.afterBrotliCompression
  388. };
  389. if (req.weightCheck.isCompressed) {
  390. // The file was already gzipped (or deflated)
  391. file.originalSize = req.weightCheck.bodySize;
  392. file.gain = req.weightCheck.bodySize - req.weightCheck.afterBrotliCompression;
  393. } else {
  394. // The file was not compressed at all
  395. file.originalSize = req.weightCheck.uncompressedSize;
  396. file.gzipped = req.weightCheck.afterGzipCompression;
  397. file.gain = req.weightCheck.uncompressedSize - req.weightCheck.afterBrotliCompression;
  398. }
  399. // Just checking a last time if the gain is positive
  400. if (file.gain > 200) {
  401. results.totalGain += file.gain;
  402. results.files.push(file);
  403. }
  404. }
  405. });
  406. return results;
  407. }
  408. function listRequestsByType(requests) {
  409. var results = {
  410. total: 0,
  411. byType: {
  412. html: [],
  413. css: [],
  414. js: [],
  415. json: [],
  416. image: [],
  417. video: [],
  418. webfont: [],
  419. other: []
  420. }
  421. };
  422. requests.forEach(function(req) {
  423. if (req.url !== 'about:blank') {
  424. var type = req.type || 'other';
  425. type = (results.byType[type]) ? type : 'other';
  426. results.byType[type].push(req.url);
  427. results.total ++;
  428. }
  429. });
  430. return results;
  431. }
  432. function listIdenticalFiles(requests) {
  433. var hashes = {};
  434. var list = [];
  435. var avoidableRequestsCount = 0;
  436. requests.forEach(function(req) {
  437. var requestHash = md5(req.weightCheck.bodyBuffer);
  438. // Try to exclude tracking pixels
  439. if (req.weightCheck.bodySize < 80 && req.type === 'image') {
  440. return;
  441. }
  442. if (!hashes[requestHash]) {
  443. hashes[requestHash] = {
  444. weight: req.weightCheck.bodySize,
  445. urls: []
  446. };
  447. }
  448. if (hashes[requestHash].urls.indexOf(req.url) === -1) {
  449. hashes[requestHash].urls.push(req.url);
  450. }
  451. });
  452. for (var hash in hashes) {
  453. if (hashes[hash].urls.length > 1) {
  454. list.push(hashes[hash]);
  455. avoidableRequestsCount += hashes[hash].urls.length - 1;
  456. }
  457. }
  458. return {
  459. avoidableRequests: avoidableRequestsCount,
  460. count: list.length,
  461. list: list
  462. };
  463. }
  464. function listFonts(requests) {
  465. var list = [];
  466. requests.forEach(function(req) {
  467. if (req.isWebFont) {
  468. list.push({
  469. url: req.url,
  470. size: req.weightCheck.bodySize
  471. });
  472. }
  473. });
  474. return {
  475. count: list.length,
  476. list: list
  477. };
  478. }
  479. function listNonWoff2Fonts(requests) {
  480. var results = {
  481. totalGain: 0,
  482. fonts: []
  483. };
  484. requests.forEach(function(req) {
  485. if (!req.isWoff2 && req.weightCheck.sizeAsWoff2) {
  486. var before = req.weightCheck.bodySize;
  487. var after = req.weightCheck.sizeAsWoff2;
  488. var gain = before - after;
  489. var type = null;
  490. if (req.isWoff) {
  491. type = 'woff';
  492. } else if (req.isTTF) {
  493. type = 'ttf';
  494. }
  495. if (gain > 200) {
  496. results.totalGain += gain;
  497. results.fonts.push({
  498. url: req.url,
  499. originalSize: before,
  500. type: type,
  501. woff2Size: after,
  502. gain: gain
  503. });
  504. }
  505. }
  506. });
  507. return results;
  508. }
  509. function listHeavyFonts(requests) {
  510. var list = [];
  511. var totalGain = 0;
  512. var heavyFontsCount = 0;
  513. var MAX_FONT_WEIGHT = 40 * 1024;
  514. requests.forEach(function(req) {
  515. if (req.isWebFont && req.fontMetrics) {
  516. list.push({
  517. url: req.url,
  518. weight: req.weightCheck.bodySize,
  519. numGlyphs: req.fontMetrics.numGlyphs,
  520. averageGlyphComplexity: req.fontMetrics.averageGlyphComplexity
  521. });
  522. if (req.weightCheck.bodySize > MAX_FONT_WEIGHT) {
  523. totalGain += req.weightCheck.bodySize - MAX_FONT_WEIGHT;
  524. heavyFontsCount ++;
  525. }
  526. }
  527. });
  528. return {
  529. count: heavyFontsCount,
  530. fonts: list,
  531. totalGain: totalGain
  532. };
  533. }
  534. function listUnusedUnicodeRanges(requests) {
  535. var list = [];
  536. var unusedUnicodeRanges = 0;
  537. requests.forEach(function(req) {
  538. if (req.isWebFont && req.fontMetrics && req.fontMetrics.unicodeRanges) {
  539. var ranges = [];
  540. var others = null;
  541. var rangeNames = Object.keys(req.fontMetrics.unicodeRanges);
  542. var unicodePointsCount = 0;
  543. var unusedRangesInFont = 0;
  544. rangeNames.forEach(function(rangeName) {
  545. var range = req.fontMetrics.unicodeRanges[rangeName];
  546. // Exclude "Others"
  547. if (rangeName === 'Others') {
  548. if (range.numGlyphsInCommonWithPageContent === 0 && range.charset.length > 50) {
  549. range.underused = true;
  550. unusedRangesInFont ++;
  551. }
  552. unicodePointsCount += range.charset.length;
  553. others = range;
  554. } else if (range.charset.length > 0) {
  555. // Now lets detect if the current Unicode range is unused.
  556. // Reminder: range.coverage = glyphs declared in this range, divided by the range size
  557. if (range.coverage > 0.25 && range.numGlyphsInCommonWithPageContent === 0) {
  558. range.underused = true;
  559. unusedRangesInFont ++;
  560. }
  561. unicodePointsCount += range.charset.length;
  562. ranges.push(range);
  563. }
  564. });
  565. // Detect if it's a icons font : if more than 90% of the icons are
  566. // in the "Others", it looks like one.
  567. if (others && others.charset.length / req.fontMetrics.numGlyphs > 0.9) {
  568. list.push({
  569. url: req.url,
  570. weight: req.weightCheck.bodySize,
  571. isIconFont: true,
  572. glyphs: req.fontMetrics.numGlyphs,
  573. numGlyphsInCommonWithPageContent: req.fontMetrics.numGlyphsInCommonWithPageContent
  574. });
  575. // And if less than 5% of the icons are used, let's report it as underused
  576. if (others && others.numGlyphsInCommonWithPageContent / others.charset.length <= 0.05) {
  577. unusedRangesInFont = 1;
  578. }
  579. // Not an icons font
  580. } else {
  581. if (others) {
  582. // Insert back "Others" at the end of the list
  583. ranges.push(others);
  584. }
  585. var ligaturesOrHiddenChars = req.fontMetrics.numGlyphs - unicodePointsCount;
  586. if (ligaturesOrHiddenChars > 25) {
  587. unusedUnicodeRanges ++;
  588. }
  589. list.push({
  590. url: req.url,
  591. weight: req.weightCheck.bodySize,
  592. isIconFont: false,
  593. unicodeRanges: ranges,
  594. ligaturesOrHiddenChars: ligaturesOrHiddenChars
  595. });
  596. }
  597. unusedUnicodeRanges += unusedRangesInFont;
  598. }
  599. });
  600. return {
  601. count: unusedUnicodeRanges,
  602. fonts: list
  603. };
  604. }
  605. function detectWordPress(requests) {
  606. // Check the first HTML file only
  607. var htmlRequest = requests.find(function(request) {
  608. return request.isHTML && request.weightCheck && request.weightCheck.bodyBuffer;
  609. });
  610. if (!htmlRequest) {
  611. return false;
  612. }
  613. debug('Checking if it is WordPress on file %s', htmlRequest.url);
  614. if (htmlRequest.weightCheck.bodyBuffer.indexOf('/wp-content/') >= 0 || htmlRequest.weightCheck.bodyBuffer.indexOf('/wp-includes/') >= 0) {
  615. debug('Response: yes.');
  616. return true;
  617. } else {
  618. debug('Response: no.');
  619. return false;
  620. }
  621. }
  622. function redownloadEntry(entry, httpAuth, proxy) {
  623. var deferred = Q.defer();
  624. function downloadError(message) {
  625. debug('Could not download %s Error: %s', entry.url, message);
  626. entry.weightCheck = {
  627. message: message
  628. };
  629. deferred.resolve(entry);
  630. }
  631. // Not downloaded again but will be counted in totalWeight
  632. function notDownloadableFile(message) {
  633. entry.weightCheck = {
  634. message: message
  635. };
  636. deferred.resolve(entry);
  637. }
  638. // Not counted in totalWeight
  639. function unwantedFile(message) {
  640. debug(message);
  641. deferred.resolve(entry);
  642. }
  643. if (entry.post) {
  644. notDownloadableFile('only downloading GET');
  645. // ... at least trying to
  646. return deferred.promise;
  647. }
  648. if (entry.notFound || entry.redirect) {
  649. unwantedFile('only downloading requests with status code 200');
  650. // ...at least trying to
  651. return deferred.promise;
  652. }
  653. if (entry.url === 'about:blank') {
  654. unwantedFile('not downloading about:blank');
  655. return deferred.promise;
  656. }
  657. debug('Downloading %s', entry.url);
  658. // Always add compression and webp headers before sending, in case the server listens to them
  659. var reqHeaders = [];
  660. reqHeaders['Accept'] = '*/*,image/webp';
  661. reqHeaders['Accept-Encoding'] = 'gzip, deflate, br';
  662. reqHeaders['Connection'] = 'keep-alive';
  663. reqHeaders['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36';
  664. var requestOptions = {
  665. method: entry.method,
  666. url: entry.url,
  667. headers: reqHeaders,
  668. timeout: REQUEST_TIMEOUT,
  669. proxy: proxy
  670. };
  671. // Basic auth
  672. if (httpAuth) {
  673. requestOptions.auth = {
  674. user: httpAuth.username,
  675. pass: httpAuth.password,
  676. sendImmediately: false // Tries a first time without auth, wait for a 401 error before resending
  677. };
  678. }
  679. download(requestOptions, entry.contentType, function(error, result) {
  680. if (error) {
  681. if (error.code === 'ETIMEDOUT') {
  682. downloadError('timeout after ' + REQUEST_TIMEOUT + 'ms');
  683. } else {
  684. downloadError('error while downloading: ' + error.code);
  685. }
  686. return;
  687. }
  688. debug('%s downloaded correctly', entry.url);
  689. entry.weightCheck = result;
  690. deferred.resolve(entry);
  691. });
  692. return deferred.promise;
  693. }
  694. // Inspired by https://github.com/cvan/fastHAR-api/blob/10cec585/app.js
  695. function download(requestOptions, contentType, callback) {
  696. var statusCode;
  697. try {
  698. request(requestOptions)
  699. .on('response', function(res) {
  700. // Raw headers were added in NodeJS v0.12
  701. // (https://github.com/joyent/node/issues/4844), but let's
  702. // reconstruct them for backwards compatibility.
  703. var rawHeaders = ('HTTP/' + res.httpVersion + ' ' + res.statusCode +
  704. ' ' + http.STATUS_CODES[res.statusCode] + '\r\n');
  705. Object.keys(res.headers).forEach(function(headerKey) {
  706. rawHeaders += headerKey + ': ' + res.headers[headerKey] + '\r\n';
  707. });
  708. rawHeaders += '\r\n';
  709. var uncompressedSize = 0; // size after uncompression
  710. var bodySize = 0; // bytes size over the wire
  711. var bodyChunks = []; // an array of buffers
  712. var isCompressed = false;
  713. var compressionTool = '';
  714. function tally() {
  715. if (statusCode !== 200) {
  716. callback({code: statusCode});
  717. return;
  718. }
  719. var body = Buffer.concat(bodyChunks);
  720. var result = {
  721. bodyBuffer: body,
  722. headersSize: Buffer.byteLength(rawHeaders, 'utf8'),
  723. headers: res.headers,
  724. bodySize: bodySize,
  725. isCompressed: isCompressed,
  726. compressionTool: compressionTool,
  727. uncompressedSize: uncompressedSize
  728. };
  729. callback(null, result);
  730. }
  731. switch (res.headers['content-encoding']) {
  732. case 'gzip':
  733. var gzip = zlib.createGunzip();
  734. gzip.on('data', function (data) {
  735. bodyChunks.push(data);
  736. uncompressedSize += data.length;
  737. }).on('end', function () {
  738. isCompressed = true;
  739. compressionTool = 'gzip';
  740. tally();
  741. }).on('error', function(err) {
  742. debug('Error while decoding %s', requestOptions.url);
  743. debug(err);
  744. callback(err);
  745. });
  746. res.on('data', function (data) {
  747. bodySize += data.length;
  748. }).pipe(gzip);
  749. break;
  750. case 'deflate':
  751. res.setEncoding('utf8');
  752. var deflate = zlib.createInflate();
  753. deflate.on('data', function (data) {
  754. bodyChunks.push(data);
  755. uncompressedSize += data.length;
  756. }).on('end', function () {
  757. isCompressed = true;
  758. compressionTool = 'deflate';
  759. tally();
  760. }).on('error', function(err) {
  761. debug('Error while decoding %s', requestOptions.url);
  762. debug(err);
  763. callback(err);
  764. });
  765. res.on('data', function (data) {
  766. bodySize += data.length;
  767. }).pipe(deflate);
  768. break;
  769. case 'br':
  770. var brotli = zlib.createBrotliDecompress();
  771. brotli.on('data', function (data) {
  772. bodyChunks.push(data);
  773. uncompressedSize += data.length;
  774. }).on('end', function () {
  775. isCompressed = true;
  776. compressionTool = 'brotli';
  777. tally();
  778. }).on('error', function(err) {
  779. debug('Error while decoding %s', requestOptions.url);
  780. debug(err);
  781. callback(err);
  782. });
  783. res.on('data', function (data) {
  784. bodySize += data.length;
  785. }).pipe(brotli);
  786. break;
  787. default:
  788. res.on('data', function (data) {
  789. bodyChunks.push(data);
  790. uncompressedSize += data.length;
  791. bodySize += data.length;
  792. }).on('end', function () {
  793. tally();
  794. });
  795. break;
  796. }
  797. })
  798. .on('response', function(response) {
  799. statusCode = response.statusCode;
  800. })
  801. .on('error', function(err) {
  802. debug('Error while downloading %s', requestOptions.url);
  803. debug(err);
  804. callback(err);
  805. });
  806. } catch(err) {
  807. debug('Error while downloading %s', requestOptions.url);
  808. debug(err);
  809. callback(err);
  810. }
  811. }
  812. return {
  813. recheckAllFiles: recheckAllFiles,
  814. listRequestWeight: listRequestWeight,
  815. redownloadEntry: redownloadEntry,
  816. download: download
  817. };
  818. };
  819. module.exports = new Redownload();