FileSignatures.mjs 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333
  1. /**
  2. * File signatures and extractor functions
  3. *
  4. * @author n1474335 [n1474335@gmail.com]
  5. * @copyright Crown Copyright 2018
  6. * @license Apache-2.0
  7. *
  8. */
  9. import Stream from "./Stream";
  10. /**
  11. * A categorised table of file types, including signatures to identify them and functions
  12. * to extract them where possible.
  13. */
  14. export const FILE_SIGNATURES = {
  15. "Images": [
  16. {
  17. name: "Joint Photographic Experts Group image",
  18. extension: "jpg,jpeg,jpe,thm,mpo",
  19. mime: "image/jpeg",
  20. description: "",
  21. signature: {
  22. 0: 0xff,
  23. 1: 0xd8,
  24. 2: 0xff,
  25. 3: [0xc0, 0xc4, 0xdb, 0xdd, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe7, 0xe8, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xfe]
  26. },
  27. extractor: extractJPEG
  28. },
  29. {
  30. name: "Graphics Interchange Format image",
  31. extension: "gif",
  32. mime: "image/gif",
  33. description: "",
  34. signature: {
  35. 0: 0x47, // GIF
  36. 1: 0x49,
  37. 2: 0x46,
  38. 3: 0x38, // 8
  39. 4: [0x37, 0x39], // 7|9
  40. 5: 0x61 // a
  41. },
  42. extractor: null
  43. },
  44. {
  45. name: "Portable Network Graphics image",
  46. extension: "png",
  47. mime: "image/png",
  48. description: "",
  49. signature: {
  50. 0: 0x89,
  51. 1: 0x50, // PNG
  52. 2: 0x4e,
  53. 3: 0x47,
  54. 4: 0x0d,
  55. 5: 0x0a,
  56. 6: 0x1a,
  57. 7: 0x0a
  58. },
  59. extractor: extractPNG
  60. },
  61. {
  62. name: "WEBP Image",
  63. extension: "webp",
  64. mime: "image/webp",
  65. description: "",
  66. signature: {
  67. 8: 0x57,
  68. 9: 0x45,
  69. 10: 0x42,
  70. 11: 0x50
  71. },
  72. extractor: null
  73. },
  74. {
  75. name: "Camera Image File Format",
  76. extension: "crw",
  77. mime: "image/x-canon-crw",
  78. description: "",
  79. signature: {
  80. 6: 0x48, // HEAPCCDR
  81. 7: 0x45,
  82. 8: 0x41,
  83. 9: 0x50,
  84. 10: 0x43,
  85. 11: 0x43,
  86. 12: 0x44,
  87. 13: 0x52
  88. },
  89. extractor: null
  90. },
  91. { // Place before tiff check
  92. name: "Canon CR2 raw image",
  93. extension: "cr2",
  94. mime: "image/x-canon-cr2",
  95. description: "",
  96. signature: [
  97. {
  98. 0: 0x49,
  99. 1: 0x49,
  100. 2: 0x2a,
  101. 3: 0x0,
  102. 8: 0x43,
  103. 9: 0x52
  104. },
  105. {
  106. 0: 0x4d,
  107. 1: 0x4d,
  108. 2: 0x0,
  109. 3: 0x2a,
  110. 8: 0x43,
  111. 9: 0x52
  112. }
  113. ],
  114. extractor: null
  115. },
  116. {
  117. name: "Tagged Image File Format image",
  118. extension: "tif",
  119. mime: "image/tiff",
  120. description: "",
  121. signature: [
  122. {
  123. 0: 0x49,
  124. 1: 0x49,
  125. 2: 0x2a,
  126. 3: 0x0
  127. },
  128. {
  129. 0: 0x4d,
  130. 1: 0x4d,
  131. 2: 0x0,
  132. 3: 0x2a
  133. }
  134. ],
  135. extractor: null
  136. },
  137. {
  138. name: "Bitmap image",
  139. extension: "bmp",
  140. mime: "image/bmp",
  141. description: "",
  142. signature: {
  143. 0: 0x42,
  144. 1: 0x4d,
  145. 7: 0x0,
  146. 9: 0x0,
  147. 14: [0x0c, 0x28, 0x38, 0x40, 0x6c, 0x7c],
  148. 15: 0x0,
  149. 16: 0x0,
  150. 17: 0x0
  151. },
  152. extractor: extractBMP
  153. },
  154. {
  155. name: "JPEG Extended Range image",
  156. extension: "jxr",
  157. mime: "image/vnd.ms-photo",
  158. description: "",
  159. signature: {
  160. 0: 0x49,
  161. 1: 0x49,
  162. 2: 0xbc
  163. },
  164. extractor: null
  165. },
  166. {
  167. name: "Photoshop image",
  168. extension: "psd",
  169. mime: "image/vnd.adobe.photoshop",
  170. description: "",
  171. signature: {
  172. 0: 0x38,
  173. 1: 0x42,
  174. 2: 0x50,
  175. 3: 0x53,
  176. 4: 0x0,
  177. 5: 0x1,
  178. 6: 0x0,
  179. 7: 0x0,
  180. 8: 0x0,
  181. 9: 0x0,
  182. 10: 0x0,
  183. 11: 0x0
  184. },
  185. extractor: null
  186. },
  187. {
  188. name: "Paint Shop Pro image",
  189. extension: "psp",
  190. mime: "image/psp",
  191. description: "",
  192. signature: [
  193. {
  194. 0: 0x50, // Paint Shop Pro Im
  195. 1: 0x61,
  196. 2: 0x69,
  197. 3: 0x6e,
  198. 4: 0x74,
  199. 5: 0x20,
  200. 6: 0x53,
  201. 7: 0x68,
  202. 8: 0x6f,
  203. 9: 0x70,
  204. 10: 0x20,
  205. 11: 0x50,
  206. 12: 0x72,
  207. 13: 0x6f,
  208. 14: 0x20,
  209. 15: 0x49,
  210. 16: 0x6d
  211. },
  212. {
  213. 0: 0x7e,
  214. 1: 0x42,
  215. 2: 0x4b,
  216. 3: 0x0
  217. }
  218. ],
  219. extractor: null
  220. },
  221. {
  222. name: "Icon image",
  223. extension: "ico",
  224. mime: "image/x-icon",
  225. description: "",
  226. signature: {
  227. 0: 0x0,
  228. 1: 0x0,
  229. 2: 0x1,
  230. 3: 0x0,
  231. 4: [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15],
  232. 5: 0x0,
  233. 6: [0x10, 0x20, 0x30, 0x40, 0x80],
  234. 7: [0x10, 0x20, 0x30, 0x40, 0x80],
  235. 9: 0x00,
  236. 10: [0x0, 0x1]
  237. },
  238. extractor: null
  239. }
  240. ],
  241. "Video": [
  242. { // Place before webm
  243. name: "Matroska Multimedia Container",
  244. extension: "mkv",
  245. mime: "video/x-matroska",
  246. description: "",
  247. signature: {
  248. 31: 0x6d,
  249. 32: 0x61,
  250. 33: 0x74,
  251. 34: 0x72,
  252. 35: 0x6f,
  253. 36: 0x73,
  254. 37: 0x6b,
  255. 38: 0x61
  256. },
  257. extractor: null
  258. },
  259. {
  260. name: "WEBM video",
  261. extension: "webm",
  262. mime: "video/webm",
  263. description: "",
  264. signature: {
  265. 0: 0x1a,
  266. 1: 0x45,
  267. 2: 0xdf,
  268. 3: 0xa3
  269. },
  270. extractor: null
  271. },
  272. {
  273. name: "MPEG-4 video",
  274. extension: "mp4",
  275. mime: "video/mp4",
  276. description: "",
  277. signature: [
  278. {
  279. 0: 0x0,
  280. 1: 0x0,
  281. 2: 0x0,
  282. 3: [0x18, 0x20],
  283. 4: 0x66,
  284. 5: 0x74,
  285. 6: 0x79,
  286. 7: 0x70
  287. },
  288. {
  289. 0: 0x33, // 3gp5
  290. 1: 0x67,
  291. 2: 0x70,
  292. 3: 0x35
  293. },
  294. {
  295. 0: 0x0,
  296. 1: 0x0,
  297. 2: 0x0,
  298. 3: 0x1c,
  299. 4: 0x66,
  300. 5: 0x74,
  301. 6: 0x79,
  302. 7: 0x70,
  303. 8: 0x6d,
  304. 9: 0x70,
  305. 10: 0x34,
  306. 11: 0x32,
  307. 16: 0x6d, // mp41mp42isom
  308. 17: 0x70,
  309. 18: 0x34,
  310. 19: 0x31,
  311. 20: 0x6d,
  312. 21: 0x70,
  313. 22: 0x34,
  314. 23: 0x32,
  315. 24: 0x69,
  316. 25: 0x73,
  317. 26: 0x6f,
  318. 27: 0x6d
  319. }
  320. ],
  321. extractor: null
  322. },
  323. {
  324. name: "M4V video",
  325. extension: "m4v",
  326. mime: "video/x-m4v",
  327. description: "",
  328. signature: {
  329. 0: 0x0,
  330. 1: 0x0,
  331. 2: 0x0,
  332. 3: 0x1c,
  333. 4: 0x66,
  334. 5: 0x74,
  335. 6: 0x79,
  336. 7: 0x70,
  337. 8: 0x4d,
  338. 9: 0x34,
  339. 10: 0x56
  340. },
  341. extractor: null
  342. },
  343. {
  344. name: "Quicktime video",
  345. extension: "mov",
  346. mime: "video/quicktime",
  347. description: "",
  348. signature: {
  349. 0: 0x0,
  350. 1: 0x0,
  351. 2: 0x0,
  352. 3: 0x14,
  353. 4: 0x66,
  354. 5: 0x74,
  355. 6: 0x79,
  356. 7: 0x70
  357. },
  358. extractor: null
  359. },
  360. {
  361. name: "Audio Video Interleave",
  362. extension: "avi",
  363. mime: "video/x-msvideo",
  364. description: "",
  365. signature: {
  366. 0: 0x52,
  367. 1: 0x49,
  368. 2: 0x46,
  369. 3: 0x46,
  370. 8: 0x41,
  371. 9: 0x56,
  372. 10: 0x49
  373. },
  374. extractor: null
  375. },
  376. {
  377. name: "Windows Media Video",
  378. extension: "wmv",
  379. mime: "video/x-ms-wmv",
  380. description: "",
  381. signature: {
  382. 0: 0x30,
  383. 1: 0x26,
  384. 2: 0xb2,
  385. 3: 0x75,
  386. 4: 0x8e,
  387. 5: 0x66,
  388. 6: 0xcf,
  389. 7: 0x11,
  390. 8: 0xa6,
  391. 9: 0xd9
  392. },
  393. extractor: null
  394. },
  395. {
  396. name: "MPEG video",
  397. extension: "mpg",
  398. mime: "video/mpeg",
  399. description: "",
  400. signature: {
  401. 0: 0x0,
  402. 1: 0x0,
  403. 2: 0x1,
  404. 3: 0xba
  405. },
  406. extractor: null
  407. },
  408. {
  409. name: "Flash Video",
  410. extension: "flv",
  411. mime: "video/x-flv",
  412. description: "",
  413. signature: {
  414. 0: 0x46,
  415. 1: 0x4c,
  416. 2: 0x56,
  417. 3: 0x1
  418. },
  419. extractor: extractFLV
  420. },
  421. ],
  422. "Audio": [
  423. {
  424. name: "Waveform Audio",
  425. extension: "wav",
  426. mime: "audio/x-wav",
  427. description: "",
  428. signature: {
  429. 0: 0x52,
  430. 1: 0x49,
  431. 2: 0x46,
  432. 3: 0x46,
  433. 8: 0x57,
  434. 9: 0x41,
  435. 10: 0x56,
  436. 11: 0x45
  437. },
  438. extractor: null
  439. },
  440. {
  441. name: "OGG audio",
  442. extension: "ogg",
  443. mime: "audio/ogg",
  444. description: "",
  445. signature: {
  446. 0: 0x4f,
  447. 1: 0x67,
  448. 2: 0x67,
  449. 3: 0x53
  450. },
  451. extractor: null
  452. },
  453. {
  454. name: "Musical Instrument Digital Interface audio",
  455. extension: "midi",
  456. mime: "audio/midi",
  457. description: "",
  458. signature: {
  459. 0: 0x4d,
  460. 1: 0x54,
  461. 2: 0x68,
  462. 3: 0x64
  463. },
  464. extractor: null
  465. },
  466. {
  467. name: "MPEG-3 audio",
  468. extension: "mp3",
  469. mime: "audio/mpeg",
  470. description: "",
  471. signature: [
  472. {
  473. 0: 0x49,
  474. 1: 0x44,
  475. 2: 0x33
  476. },
  477. {
  478. 0: 0xff,
  479. 1: 0xfb
  480. }
  481. ],
  482. extractor: null
  483. },
  484. {
  485. name: "MPEG-4 Part 14 audio",
  486. extension: "m4a",
  487. mime: "audio/m4a",
  488. description: "",
  489. signature: [
  490. {
  491. 4: 0x66,
  492. 5: 0x74,
  493. 6: 0x79,
  494. 7: 0x70,
  495. 8: 0x4d,
  496. 9: 0x34,
  497. 10: 0x41
  498. },
  499. {
  500. 0: 0x4d,
  501. 1: 0x34,
  502. 2: 0x41,
  503. 3: 0x20
  504. }
  505. ],
  506. extractor: null
  507. },
  508. {
  509. name: "Free Lossless Audio Codec",
  510. extension: "flac",
  511. mime: "audio/x-flac",
  512. description: "",
  513. signature: {
  514. 0: 0x66,
  515. 1: 0x4c,
  516. 2: 0x61,
  517. 3: 0x43
  518. },
  519. extractor: null
  520. },
  521. {
  522. name: "Adaptive Multi-Rate audio codec",
  523. extension: "amr",
  524. mime: "audio/amr",
  525. description: "",
  526. signature: {
  527. 0: 0x23,
  528. 1: 0x21,
  529. 2: 0x41,
  530. 3: 0x4d,
  531. 4: 0x52,
  532. 5: 0x0a
  533. },
  534. extractor: null
  535. },
  536. ],
  537. "Documents": [
  538. {
  539. name: "Portable Document Format",
  540. extension: "pdf",
  541. mime: "application/pdf",
  542. description: "",
  543. signature: {
  544. 0: 0x25,
  545. 1: 0x50,
  546. 2: 0x44,
  547. 3: 0x46
  548. },
  549. extractor: extractPDF
  550. },
  551. {
  552. name: "PostScript",
  553. extension: "ps",
  554. mime: "application/postscript",
  555. description: "",
  556. signature: {
  557. 0: 0x25,
  558. 1: 0x21
  559. },
  560. extractor: null
  561. },
  562. {
  563. name: "Rich Text Format",
  564. extension: "rtf",
  565. mime: "application/rtf",
  566. description: "",
  567. signature: {
  568. 0: 0x7b,
  569. 1: 0x5c,
  570. 2: 0x72,
  571. 3: 0x74,
  572. 4: 0x66
  573. },
  574. extractor: null
  575. },
  576. {
  577. name: "Microsoft Office documents/OLE2",
  578. extension: "ole2,doc,xls,dot,ppt,xla,ppa,pps,pot,msi,sdw,db,vsd,msg",
  579. mime: "application/msword,application/vnd.ms-excel,application/vnd.ms-powerpoint",
  580. description: "Microsoft Office documents",
  581. signature: {
  582. 0: 0xd0,
  583. 1: 0xcf,
  584. 2: 0x11,
  585. 3: 0xe0,
  586. 4: 0xa1,
  587. 5: 0xb1,
  588. 6: 0x1a,
  589. 7: 0xe1
  590. },
  591. extractor: null
  592. },
  593. {
  594. name: "Microsoft Office 2007+ documents",
  595. extension: "docx,xlsx,pptx",
  596. mime: "application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.openxmlformats-officedocument.presentationml.presentation",
  597. description: "",
  598. signature: {
  599. 38: 0x5f, // _Types].xml
  600. 39: 0x54,
  601. 40: 0x79,
  602. 41: 0x70,
  603. 42: 0x65,
  604. 43: 0x73,
  605. 44: 0x5d,
  606. 45: 0x2e,
  607. 46: 0x78,
  608. 47: 0x6d,
  609. 48: 0x6c
  610. },
  611. extractor: null
  612. },
  613. {
  614. name: "EPUB e-book",
  615. extension: "epub",
  616. mime: "application/epub+zip",
  617. description: "",
  618. signature: {
  619. 0: 0x50,
  620. 1: 0x4b,
  621. 2: 0x3,
  622. 3: 0x4,
  623. 30: 0x6d, // mimetypeapplication/epub_zip
  624. 31: 0x69,
  625. 32: 0x6d,
  626. 33: 0x65,
  627. 34: 0x74,
  628. 35: 0x79,
  629. 36: 0x70,
  630. 37: 0x65,
  631. 38: 0x61,
  632. 39: 0x70,
  633. 40: 0x70,
  634. 41: 0x6c,
  635. 42: 0x69,
  636. 43: 0x63,
  637. 44: 0x61,
  638. 45: 0x74,
  639. 46: 0x69,
  640. 47: 0x6f,
  641. 48: 0x6e,
  642. 49: 0x2f,
  643. 50: 0x65,
  644. 51: 0x70,
  645. 52: 0x75,
  646. 53: 0x62,
  647. 54: 0x2b,
  648. 55: 0x7a,
  649. 56: 0x69,
  650. 57: 0x70
  651. },
  652. extractor: null
  653. },
  654. ],
  655. "Applications": [
  656. {
  657. name: "Windows Portable Executable",
  658. extension: "exe,dll,drv,vxd,sys,ocx,vbx,com,fon,scr",
  659. mime: "application/x-msdownload",
  660. description: "",
  661. signature: {
  662. 0: 0x4d,
  663. 1: 0x5a,
  664. 3: [0x0, 0x1, 0x2],
  665. 5: [0x0, 0x1, 0x2]
  666. },
  667. extractor: extractMZPE
  668. },
  669. {
  670. name: "Executable and Linkable Format file",
  671. extension: "elf,bin,axf,o,prx,so",
  672. mime: "application/x-executable",
  673. description: "Executable and Linkable Format file. No standard file extension.",
  674. signature: {
  675. 0: 0x7f,
  676. 1: 0x45,
  677. 2: 0x4c,
  678. 3: 0x46
  679. },
  680. extractor: null
  681. },
  682. {
  683. name: "Adobe Flash",
  684. extension: "swf",
  685. mime: "application/x-shockwave-flash",
  686. description: "",
  687. signature: {
  688. 0: [0x43, 0x46],
  689. 1: 0x57,
  690. 2: 0x53
  691. },
  692. extractor: null
  693. },
  694. {
  695. name: "Java Class",
  696. extension: "class",
  697. mime: "application/java-vm",
  698. description: "",
  699. signature: {
  700. 0: 0xca,
  701. 1: 0xfe,
  702. 2: 0xba,
  703. 3: 0xbe
  704. },
  705. extractor: null
  706. },
  707. {
  708. name: "Dalvik Executable",
  709. extension: "dex",
  710. mime: "application/octet-stream",
  711. description: "Dalvik Executable as used by Android",
  712. signature: {
  713. 0: 0x64,
  714. 1: 0x65,
  715. 2: 0x78,
  716. 3: 0x0a,
  717. 4: 0x30,
  718. 5: 0x33,
  719. 6: 0x35,
  720. 7: 0x0
  721. },
  722. extractor: null
  723. },
  724. {
  725. name: "Google Chrome Extension",
  726. extension: "crx",
  727. mime: "application/crx",
  728. description: "Google Chrome extension or packaged app",
  729. signature: {
  730. 0: 0x43,
  731. 1: 0x72,
  732. 2: 0x32,
  733. 3: 0x34
  734. },
  735. extractor: null
  736. },
  737. ],
  738. "Archives": [
  739. {
  740. name: "PKZIP archive",
  741. extension: "zip",
  742. mime: "application/zip",
  743. description: "",
  744. signature: {
  745. 0: 0x50,
  746. 1: 0x4b,
  747. 2: [0x3, 0x5, 0x7],
  748. 3: [0x4, 0x6, 0x8]
  749. },
  750. extractor: extractZIP
  751. },
  752. {
  753. name: "TAR archive",
  754. extension: "tar",
  755. mime: "application/x-tar",
  756. description: "",
  757. signature: {
  758. 257: 0x75,
  759. 258: 0x73,
  760. 259: 0x74,
  761. 260: 0x61,
  762. 261: 0x72
  763. },
  764. extractor: null
  765. },
  766. {
  767. name: "Roshal Archive",
  768. extension: "rar",
  769. mime: "application/x-rar-compressed",
  770. description: "",
  771. signature: {
  772. 0: 0x52,
  773. 1: 0x61,
  774. 2: 0x72,
  775. 3: 0x21,
  776. 4: 0x1a,
  777. 5: 0x7,
  778. 6: [0x0, 0x1]
  779. },
  780. extractor: null
  781. },
  782. {
  783. name: "Gzip",
  784. extension: "gz",
  785. mime: "application/gzip",
  786. description: "",
  787. signature: {
  788. 0: 0x1f,
  789. 1: 0x8b,
  790. 2: 0x8
  791. },
  792. extractor: null
  793. },
  794. {
  795. name: "Bzip2",
  796. extension: "bz2",
  797. mime: "application/x-bzip2",
  798. description: "",
  799. signature: {
  800. 0: 0x42,
  801. 1: 0x5a,
  802. 2: 0x68
  803. },
  804. extractor: null
  805. },
  806. {
  807. name: "7zip",
  808. extension: "7z",
  809. mime: "application/x-7z-compressed",
  810. description: "",
  811. signature: {
  812. 0: 0x37,
  813. 1: 0x7a,
  814. 2: 0xbc,
  815. 3: 0xaf,
  816. 4: 0x27,
  817. 5: 0x1c
  818. },
  819. extractor: null
  820. },
  821. {
  822. name: "Zlib Deflate",
  823. extension: "zlib",
  824. mime: "application/x-deflate",
  825. description: "",
  826. signature: {
  827. 0: 0x78,
  828. 1: [0x1, 0x9c, 0xda, 0x5e]
  829. },
  830. extractor: null
  831. },
  832. {
  833. name: "xz compression",
  834. extension: "xz",
  835. mime: "application/x-xz",
  836. description: "",
  837. signature: {
  838. 0: 0xfd,
  839. 1: 0x37,
  840. 2: 0x7a,
  841. 3: 0x58,
  842. 4: 0x5a,
  843. 5: 0x0
  844. },
  845. extractor: null
  846. },
  847. {
  848. name: "Tarball",
  849. extension: "tar.z",
  850. mime: "application/x-gtar",
  851. description: "",
  852. signature: {
  853. 0: 0x1f,
  854. 1: [0x9d, 0xa0]
  855. },
  856. extractor: null
  857. },
  858. {
  859. name: "ISO disk image",
  860. extension: "iso",
  861. mime: "application/octet-stream",
  862. description: "ISO 9660 CD/DVD image file",
  863. signature: [
  864. {
  865. 0x8001: 0x43,
  866. 0x8002: 0x44,
  867. 0x8003: 0x30,
  868. 0x8004: 0x30,
  869. 0x8005: 0x31
  870. },
  871. {
  872. 0x8801: 0x43,
  873. 0x8802: 0x44,
  874. 0x8803: 0x30,
  875. 0x8804: 0x30,
  876. 0x8805: 0x31
  877. },
  878. {
  879. 0x9001: 0x43,
  880. 0x9002: 0x44,
  881. 0x9003: 0x30,
  882. 0x9004: 0x30,
  883. 0x9005: 0x31
  884. }
  885. ],
  886. extractor: null
  887. },
  888. {
  889. name: "Virtual Machine Disk",
  890. extension: "vmdk",
  891. mime: "application/vmdk,application/x-virtualbox-vmdk",
  892. description: "",
  893. signature: {
  894. 0: 0x4b,
  895. 1: 0x44,
  896. 2: 0x4d
  897. },
  898. extractor: null
  899. },
  900. ],
  901. "Miscellaneous": [
  902. {
  903. name: "UTF-8 text file",
  904. extension: "txt",
  905. mime: "text/plain",
  906. description: "UTF-8 encoded Unicode byte order mark, commonly but not exclusively seen in text files.",
  907. signature: {
  908. 0: 0xef,
  909. 1: 0xbb,
  910. 2: 0xbf
  911. },
  912. extractor: null
  913. },
  914. { // Place before UTF-16 LE file
  915. name: "UTF-32 LE file",
  916. extension: "utf32le",
  917. mime: "charset/utf32le",
  918. description: "Little-endian UTF-32 encoded Unicode byte order mark.",
  919. signature: {
  920. 0: 0xff,
  921. 1: 0xfe,
  922. 2: 0x00,
  923. 3: 0x00
  924. },
  925. extractor: null
  926. },
  927. {
  928. name: "UTF-16 LE file",
  929. extension: "utf16le",
  930. mime: "charset/utf16le",
  931. description: "Little-endian UTF-16 encoded Unicode byte order mark.",
  932. signature: {
  933. 0: 0xff,
  934. 1: 0xfe
  935. },
  936. extractor: null
  937. },
  938. {
  939. name: "Web Open Font Format",
  940. extension: "woff",
  941. mime: "application/font-woff",
  942. description: "",
  943. signature: {
  944. 0: 0x77,
  945. 1: 0x4f,
  946. 2: 0x46,
  947. 3: 0x46,
  948. 4: 0x0,
  949. 5: 0x1,
  950. 6: 0x0,
  951. 7: 0x0
  952. },
  953. extractor: null
  954. },
  955. {
  956. name: "Web Open Font Format 2",
  957. extension: "woff2",
  958. mime: "application/font-woff",
  959. description: "",
  960. signature: {
  961. 0: 0x77,
  962. 1: 0x4f,
  963. 2: 0x46,
  964. 3: 0x32,
  965. 4: 0x0,
  966. 5: 0x1,
  967. 6: 0x0,
  968. 7: 0x0
  969. },
  970. extractor: null
  971. },
  972. {
  973. name: "Embedded OpenType font",
  974. extension: "eot",
  975. mime: "application/octet-stream",
  976. description: "",
  977. signature: [
  978. {
  979. 8: 0x2,
  980. 9: 0x0,
  981. 10: 0x1,
  982. 34: 0x4c,
  983. 35: 0x50
  984. },
  985. {
  986. 8: 0x1,
  987. 9: 0x0,
  988. 10: 0x0,
  989. 34: 0x4c,
  990. 35: 0x50
  991. },
  992. {
  993. 8: 0x2,
  994. 9: 0x0,
  995. 10: 0x2,
  996. 34: 0x4c,
  997. 35: 0x50
  998. },
  999. ],
  1000. extractor: null
  1001. },
  1002. {
  1003. name: "TrueType Font",
  1004. extension: "ttf",
  1005. mime: "application/font-sfnt",
  1006. description: "",
  1007. signature: {
  1008. 0: 0x0,
  1009. 1: 0x1,
  1010. 2: 0x0,
  1011. 3: 0x0,
  1012. 4: 0x0
  1013. },
  1014. extractor: null
  1015. },
  1016. {
  1017. name: "OpenType Font",
  1018. extension: "otf",
  1019. mime: "application/font-sfnt",
  1020. description: "",
  1021. signature: {
  1022. 0: 0x4f,
  1023. 1: 0x54,
  1024. 2: 0x54,
  1025. 3: 0x4f,
  1026. 4: 0x0
  1027. },
  1028. extractor: null
  1029. },
  1030. {
  1031. name: "SQLite",
  1032. extension: "sqlite",
  1033. mime: "application/x-sqlite3",
  1034. description: "",
  1035. signature: {
  1036. 0: 0x53,
  1037. 1: 0x51,
  1038. 2: 0x4c,
  1039. 3: 0x69
  1040. },
  1041. extractor: null
  1042. },
  1043. ]
  1044. };
  1045. /**
  1046. * JPEG extractor.
  1047. *
  1048. * @param {Uint8Array} bytes
  1049. * @param {number} offset
  1050. * @returns {Uint8Array}
  1051. */
  1052. export function extractJPEG(bytes, offset) {
  1053. const stream = new Stream(bytes.slice(offset));
  1054. while (stream.hasMore()) {
  1055. const marker = stream.getBytes(2);
  1056. if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker);
  1057. let segmentSize = 0;
  1058. switch (marker[1]) {
  1059. // No length
  1060. case 0xd8: // Start of Image
  1061. case 0x01: // For temporary use in arithmetic coding
  1062. break;
  1063. case 0xd9: // End found
  1064. return stream.carve();
  1065. // Variable size segment
  1066. case 0xc0: // Start of frame (Baseline DCT)
  1067. case 0xc1: // Start of frame (Extended sequential DCT)
  1068. case 0xc2: // Start of frame (Progressive DCT)
  1069. case 0xc3: // Start of frame (Lossless sequential)
  1070. case 0xc4: // Define Huffman Table
  1071. case 0xc5: // Start of frame (Differential sequential DCT)
  1072. case 0xc6: // Start of frame (Differential progressive DCT)
  1073. case 0xc7: // Start of frame (Differential lossless)
  1074. case 0xc8: // Reserved for JPEG extensions
  1075. case 0xc9: // Start of frame (Extended sequential DCT)
  1076. case 0xca: // Start of frame (Progressive DCT)
  1077. case 0xcb: // Start of frame (Lossless sequential)
  1078. case 0xcc: // Define arithmetic conditioning table
  1079. case 0xcd: // Start of frame (Differential sequential DCT)
  1080. case 0xce: // Start of frame (Differential progressive DCT)
  1081. case 0xcf: // Start of frame (Differential lossless)
  1082. case 0xdb: // Define Quantization Table
  1083. case 0xde: // Define hierarchical progression
  1084. case 0xe0: // Application-specific
  1085. case 0xe1: // Application-specific
  1086. case 0xe2: // Application-specific
  1087. case 0xe3: // Application-specific
  1088. case 0xe4: // Application-specific
  1089. case 0xe5: // Application-specific
  1090. case 0xe6: // Application-specific
  1091. case 0xe7: // Application-specific
  1092. case 0xe8: // Application-specific
  1093. case 0xe9: // Application-specific
  1094. case 0xea: // Application-specific
  1095. case 0xeb: // Application-specific
  1096. case 0xec: // Application-specific
  1097. case 0xed: // Application-specific
  1098. case 0xee: // Application-specific
  1099. case 0xef: // Application-specific
  1100. case 0xfe: // Comment
  1101. segmentSize = stream.readInt(2, "be");
  1102. stream.position += segmentSize - 2;
  1103. break;
  1104. // 1 byte
  1105. case 0xdf: // Expand reference image
  1106. stream.position++;
  1107. break;
  1108. // 2 bytes
  1109. case 0xdc: // Define number of lines
  1110. case 0xdd: // Define restart interval
  1111. stream.position += 2;
  1112. break;
  1113. // Start scan
  1114. case 0xda: // Start of scan
  1115. segmentSize = stream.readInt(2, "be");
  1116. stream.position += segmentSize - 2;
  1117. stream.continueUntil(0xff);
  1118. break;
  1119. // Continue through encoded data
  1120. case 0x00: // Byte stuffing
  1121. case 0xd0: // Restart
  1122. case 0xd1: // Restart
  1123. case 0xd2: // Restart
  1124. case 0xd3: // Restart
  1125. case 0xd4: // Restart
  1126. case 0xd5: // Restart
  1127. case 0xd6: // Restart
  1128. case 0xd7: // Restart
  1129. stream.continueUntil(0xff);
  1130. break;
  1131. default:
  1132. stream.continueUntil(0xff);
  1133. break;
  1134. }
  1135. }
  1136. throw new Error("Unable to parse JPEG successfully");
  1137. }
  1138. /**
  1139. * Portable executable extractor.
  1140. * Assumes that the offset refers to an MZ header.
  1141. *
  1142. * @param {Uint8Array} bytes
  1143. * @param {number} offset
  1144. * @returns {Uint8Array}
  1145. */
  1146. export function extractMZPE(bytes, offset) {
  1147. const stream = new Stream(bytes.slice(offset));
  1148. // Move to PE header pointer
  1149. stream.moveTo(0x3c);
  1150. const peAddress = stream.readInt(4, "le");
  1151. // Move to PE header
  1152. stream.moveTo(peAddress);
  1153. // Get number of sections
  1154. stream.moveForwardsBy(6);
  1155. const numSections = stream.readInt(2, "le");
  1156. // Get optional header size
  1157. stream.moveForwardsBy(12);
  1158. const optionalHeaderSize = stream.readInt(2, "le");
  1159. // Move past optional header to section header
  1160. stream.moveForwardsBy(2 + optionalHeaderSize);
  1161. // Move to final section header
  1162. stream.moveForwardsBy((numSections - 1) * 0x28);
  1163. // Get raw data info
  1164. stream.moveForwardsBy(16);
  1165. const rawDataSize = stream.readInt(4, "le");
  1166. const rawDataAddress = stream.readInt(4, "le");
  1167. // Move to end of final section
  1168. stream.moveTo(rawDataAddress + rawDataSize);
  1169. return stream.carve();
  1170. }
  1171. /**
  1172. * PDF extractor.
  1173. *
  1174. * @param {Uint8Array} bytes
  1175. * @param {number} offset
  1176. * @returns {Uint8Array}
  1177. */
  1178. export function extractPDF(bytes, offset) {
  1179. const stream = new Stream(bytes.slice(offset));
  1180. // Find end-of-file marker (%%EOF)
  1181. stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]);
  1182. stream.moveForwardsBy(5);
  1183. stream.consumeIf(0x0d);
  1184. stream.consumeIf(0x0a);
  1185. return stream.carve();
  1186. }
  1187. /**
  1188. * ZIP extractor.
  1189. *
  1190. * @param {Uint8Array} bytes
  1191. * @param {number} offset
  1192. * @returns {Uint8Array}
  1193. */
  1194. export function extractZIP(bytes, offset) {
  1195. const stream = new Stream(bytes.slice(offset));
  1196. // Find End of central directory record
  1197. stream.continueUntil([0x50, 0x4b, 0x05, 0x06]);
  1198. // Get comment length and consume
  1199. stream.moveForwardsBy(20);
  1200. const commentLength = stream.readInt(2, "le");
  1201. stream.moveForwardsBy(commentLength);
  1202. return stream.carve();
  1203. }
  1204. /**
  1205. * PNG extractor.
  1206. *
  1207. * @param {Uint8Array} bytes
  1208. * @param {number} offset
  1209. * @returns {Uint8Array}
  1210. */
  1211. export function extractPNG(bytes, offset) {
  1212. const stream = new Stream(bytes.slice(offset));
  1213. // Move past signature to first chunk
  1214. stream.moveForwardsBy(8);
  1215. let chunkSize = 0,
  1216. chunkType = "";
  1217. while (chunkType !== "IEND") {
  1218. chunkSize = stream.readInt(4, "be");
  1219. chunkType = stream.readString(4);
  1220. // Chunk data size + CRC checksum
  1221. stream.moveForwardsBy(chunkSize + 4);
  1222. }
  1223. return stream.carve();
  1224. }
  1225. /**
  1226. * BMP extractor.
  1227. *
  1228. * @param {Uint8Array} bytes
  1229. * @param {number} offset
  1230. * @returns {Uint8Array}
  1231. */
  1232. export function extractBMP(bytes, offset) {
  1233. const stream = new Stream(bytes.slice(offset));
  1234. // Move past header
  1235. stream.moveForwardsBy(2);
  1236. // Read full file size
  1237. const bmpSize = stream.readInt(4, "le");
  1238. // Move to end of file (file size minus header and size field)
  1239. stream.moveForwardsBy(bmpSize - 6);
  1240. return stream.carve();
  1241. }
  1242. /**
  1243. * FLV extractor.
  1244. *
  1245. * @param {Uint8Array} bytes
  1246. * @param {number} offset
  1247. * @returns {Uint8Array}
  1248. */
  1249. export function extractFLV(bytes, offset) {
  1250. const stream = new Stream(bytes.slice(offset));
  1251. // Move past signature, version and flags
  1252. stream.moveForwardsBy(5);
  1253. // Read header size
  1254. const headerSize = stream.readInt(4, "be");
  1255. // Skip through the rest of the header
  1256. stream.moveForwardsBy(headerSize - 9);
  1257. let tagSize = -11; // Fake size of previous tag header
  1258. while (stream.position < stream.length) {
  1259. const prevTagSize = stream.readInt(4, "be");
  1260. const tagType = stream.readInt(1, "be");
  1261. if ([8, 9, 18].indexOf(tagType) < 0) {
  1262. // This tag is not valid
  1263. stream.moveBackwardsBy(1);
  1264. break;
  1265. }
  1266. if (prevTagSize !== tagSize + 11) {
  1267. // Previous tag was not valid
  1268. stream.moveBackwardsBy(tagSize + 11);
  1269. break;
  1270. }
  1271. tagSize = stream.readInt(3, "be");
  1272. // Move past the rest of the tag header and payload
  1273. stream.moveForwardsBy(7 + tagSize);
  1274. }
  1275. return stream.carve();
  1276. }