Protobuf.mjs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. import Utils from "../Utils.mjs";
  2. import protobuf from "protobufjs";
  3. /**
  4. * Protobuf lib. Contains functions to decode protobuf serialised
  5. * data without a schema or .proto file.
  6. *
  7. * Provides utility functions to encode and decode variable length
  8. * integers (varint).
  9. *
  10. * @author GCHQ Contributor [3]
  11. * @copyright Crown Copyright 2019
  12. * @license Apache-2.0
  13. */
  14. class Protobuf {
  15. /**
  16. * Protobuf constructor
  17. *
  18. * @param {byteArray|Uint8Array} data
  19. */
  20. constructor(data) {
  21. // Check we have a byteArray or Uint8Array
  22. if (data instanceof Array || data instanceof Uint8Array) {
  23. this.data = data;
  24. } else {
  25. throw new Error("Protobuf input must be a byteArray or Uint8Array");
  26. }
  27. // Set up masks
  28. this.TYPE = 0x07;
  29. this.NUMBER = 0x78;
  30. this.MSB = 0x80;
  31. this.VALUE = 0x7f;
  32. // Declare offset, length, and field type object
  33. this.offset = 0;
  34. this.LENGTH = data.length;
  35. this.fieldTypes = {};
  36. }
  37. // Public Functions
  38. /**
  39. * Encode a varint from a number
  40. *
  41. * @param {number} number
  42. * @returns {byteArray}
  43. */
  44. static varIntEncode(number) {
  45. const MSB = 0x80,
  46. VALUE = 0x7f,
  47. MSBALL = ~VALUE,
  48. INT = Math.pow(2, 31);
  49. const out = [];
  50. let offset = 0;
  51. while (number >= INT) {
  52. out[offset++] = (number & 0xff) | MSB;
  53. number /= 128;
  54. }
  55. while (number & MSBALL) {
  56. out[offset++] = (number & 0xff) | MSB;
  57. number >>>= 7;
  58. }
  59. out[offset] = number | 0;
  60. return out;
  61. }
  62. /**
  63. * Decode a varint from the byteArray
  64. *
  65. * @param {byteArray} input
  66. * @returns {number}
  67. */
  68. static varIntDecode(input) {
  69. const pb = new Protobuf(input);
  70. return pb._varInt();
  71. }
  72. /**
  73. * Encode input JSON according to the given schema
  74. *
  75. * @param {Object} input
  76. * @param {Object []} args
  77. * @returns {Object}
  78. */
  79. static encode(input, args) {
  80. this.updateProtoRoot(args[0]);
  81. if (!this.mainMessageName) {
  82. throw new Error("Schema Error: Schema not defined");
  83. }
  84. const message = this.parsedProto.root.nested[this.mainMessageName];
  85. // Convert input into instance of message, and verify instance
  86. input = message.fromObject(input);
  87. const error = message.verify(input);
  88. if (error) {
  89. throw new Error("Input Error: " + error);
  90. }
  91. // Encode input
  92. const output = message.encode(input).finish();
  93. return new Uint8Array(output).buffer;
  94. }
  95. /**
  96. * Parse Protobuf data
  97. *
  98. * @param {byteArray} input
  99. * @returns {Object}
  100. */
  101. static decode(input, args) {
  102. this.updateProtoRoot(args[0]);
  103. this.showUnknownFields = args[1];
  104. this.showTypes = args[2];
  105. return this.mergeDecodes(input);
  106. }
  107. /**
  108. * Update the parsedProto, throw parsing errors
  109. *
  110. * @param {string} protoText
  111. */
  112. static updateProtoRoot(protoText) {
  113. try {
  114. this.parsedProto = protobuf.parse(protoText);
  115. if (this.parsedProto.package) {
  116. this.parsedProto.root = this.parsedProto.root.nested[this.parsedProto.package];
  117. }
  118. this.updateMainMessageName();
  119. } catch (error) {
  120. throw new Error("Schema " + error);
  121. }
  122. }
  123. /**
  124. * Set mainMessageName to the first instance of a message defined in the schema that is not a submessage
  125. *
  126. */
  127. static updateMainMessageName() {
  128. const messageNames = [];
  129. const fieldTypes = [];
  130. this.parsedProto.root.nestedArray.forEach(block => {
  131. if (block instanceof protobuf.Type) {
  132. messageNames.push(block.name);
  133. this.parsedProto.root.nested[block.name].fieldsArray.forEach(field => {
  134. fieldTypes.push(field.type);
  135. });
  136. }
  137. });
  138. if (messageNames.length === 0) {
  139. this.mainMessageName = null;
  140. } else {
  141. // for (const name of messageNames) {
  142. // if (!fieldTypes.includes(name)) {
  143. // this.mainMessageName = name;
  144. // break;
  145. // }
  146. // }
  147. this.mainMessageName = messageNames[0];
  148. }
  149. }
  150. /**
  151. * Decode input using Protobufjs package and raw methods, compare, and merge results
  152. *
  153. * @param {byteArray} input
  154. * @returns {Object}
  155. */
  156. static mergeDecodes(input) {
  157. const pb = new Protobuf(input);
  158. let rawDecode = pb._parse();
  159. let message;
  160. if (this.showTypes) {
  161. rawDecode = this.showRawTypes(rawDecode, pb.fieldTypes);
  162. this.parsedProto.root = this.appendTypesToFieldNames(this.parsedProto.root);
  163. }
  164. try {
  165. message = this.parsedProto.root.nested[this.mainMessageName];
  166. const packageDecode = message.toObject(message.decode(input), {
  167. bytes: String,
  168. longs: Number,
  169. enums: String,
  170. defualts: true
  171. });
  172. const output = {};
  173. if (this.showUnknownFields) {
  174. output[message.name] = packageDecode;
  175. output["Unknown Fields"] = this.compareFields(rawDecode, message);
  176. return output;
  177. } else {
  178. return packageDecode;
  179. }
  180. } catch (error) {
  181. if (message) {
  182. throw new Error("Input " + error);
  183. } else {
  184. return rawDecode;
  185. }
  186. }
  187. }
  188. /**
  189. * Replace fieldnames with fieldname and type
  190. *
  191. * @param {Object} schemaRoot
  192. * @returns {Object}
  193. */
  194. static appendTypesToFieldNames(schemaRoot) {
  195. for (const block of schemaRoot.nestedArray) {
  196. if (block instanceof protobuf.Type) {
  197. for (const [fieldName, fieldData] of Object.entries(block.fields)) {
  198. schemaRoot.nested[block.name].remove(block.fields[fieldName]);
  199. schemaRoot.nested[block.name].add(new protobuf.Field(`${fieldName} (${fieldData.type})`, fieldData.id, fieldData.type, fieldData.rule));
  200. }
  201. }
  202. }
  203. return schemaRoot;
  204. }
  205. /**
  206. * Add field type to field name for fields in the raw decoded output
  207. *
  208. * @param {Object} rawDecode
  209. * @param {Object} fieldTypes
  210. * @returns {Object}
  211. */
  212. static showRawTypes(rawDecode, fieldTypes) {
  213. for (const [fieldNum, value] of Object.entries(rawDecode)) {
  214. const fieldType = fieldTypes[fieldNum];
  215. let outputFieldValue;
  216. let outputFieldType;
  217. // Submessages
  218. if (isNaN(fieldType)) {
  219. outputFieldType = 2;
  220. // Repeated submessages
  221. if (Array.isArray(value)) {
  222. const fieldInstances = [];
  223. for (const instance of Object.keys(value)) {
  224. if (typeof(value[instance]) !== "string") {
  225. fieldInstances.push(this.showRawTypes(value[instance], fieldType));
  226. } else {
  227. fieldInstances.push(value[instance]);
  228. }
  229. }
  230. outputFieldValue = fieldInstances;
  231. // Single submessage
  232. } else {
  233. outputFieldValue = this.showRawTypes(value, fieldType);
  234. }
  235. // Non-submessage field
  236. } else {
  237. outputFieldType = fieldType;
  238. outputFieldValue = value;
  239. }
  240. // Substitute fieldNum with field number and type
  241. rawDecode[`field #${fieldNum}: ${this.getTypeInfo(outputFieldType)}`] = outputFieldValue;
  242. delete rawDecode[fieldNum];
  243. }
  244. return rawDecode;
  245. }
  246. /**
  247. * Compare raw decode to package decode and return discrepancies
  248. *
  249. * @param rawDecodedMessage
  250. * @param schemaMessage
  251. * @returns {Object}
  252. */
  253. static compareFields(rawDecodedMessage, schemaMessage) {
  254. // Define message data using raw decode output and schema
  255. const schemaFieldProperties = {};
  256. const schemaFieldNames = Object.keys(schemaMessage.fields);
  257. schemaFieldNames.forEach(field => schemaFieldProperties[schemaMessage.fields[field].id] = field);
  258. // Loop over each field present in the raw decode output
  259. for (const fieldName in rawDecodedMessage) {
  260. let fieldId;
  261. if (isNaN(fieldName)) {
  262. fieldId = fieldName.match(/^field #(\d+)/)[1];
  263. } else {
  264. fieldId = fieldName;
  265. }
  266. // Check if this field is defined in the schema
  267. if (fieldId in schemaFieldProperties) {
  268. const schemaFieldName = schemaFieldProperties[fieldId];
  269. // Extract the current field data from the raw decode and schema
  270. const rawFieldData = rawDecodedMessage[fieldName];
  271. const schemaField = schemaMessage.fields[schemaFieldName];
  272. // Check for repeated fields
  273. if (Array.isArray(rawFieldData) && !schemaField.repeated) {
  274. rawDecodedMessage[`(${schemaMessage.name}) ${schemaFieldName} is a repeated field`] = rawFieldData;
  275. }
  276. // Check for submessage fields
  277. if (schemaField.resolvedType instanceof protobuf.Type) {
  278. const subMessageType = schemaMessage.fields[schemaFieldName].type;
  279. const schemaSubMessage = this.parsedProto.root.nested[subMessageType];
  280. const rawSubMessages = rawDecodedMessage[fieldName];
  281. let rawDecodedSubMessage = {};
  282. // Squash multiple submessage instances into one submessage
  283. if (Array.isArray(rawSubMessages)) {
  284. rawSubMessages.forEach(subMessageInstance => {
  285. const instanceFields = Object.entries(subMessageInstance);
  286. instanceFields.forEach(subField => {
  287. rawDecodedSubMessage[subField[0]] = subField[1];
  288. });
  289. });
  290. } else {
  291. rawDecodedSubMessage = rawSubMessages;
  292. }
  293. // Treat submessage as own message and compare its fields
  294. rawDecodedSubMessage = Protobuf.compareFields(rawDecodedSubMessage, schemaSubMessage);
  295. if (Object.entries(rawDecodedSubMessage).length !== 0) {
  296. rawDecodedMessage[`${schemaFieldName} (${subMessageType}) has missing fields`] = rawDecodedSubMessage;
  297. }
  298. }
  299. delete rawDecodedMessage[fieldName];
  300. }
  301. }
  302. return rawDecodedMessage;
  303. }
  304. /**
  305. * Returns wiretype information for input wiretype number
  306. *
  307. * @param {number} wireType
  308. * @returns {string}
  309. */
  310. static getTypeInfo(wireType) {
  311. switch (wireType) {
  312. case 0:
  313. return "VarInt (e.g. int32, bool)";
  314. case 1:
  315. return "64-Bit (e.g. fixed64, double)";
  316. case 2:
  317. return "L-delim (e.g. string, message)";
  318. case 5:
  319. return "32-Bit (e.g. fixed32, float)";
  320. }
  321. }
  322. // Private Class Functions
  323. /**
  324. * Main private parsing function
  325. *
  326. * @private
  327. * @returns {Object}
  328. */
  329. _parse() {
  330. let object = {};
  331. // Continue reading whilst we still have data
  332. while (this.offset < this.LENGTH) {
  333. const field = this._parseField();
  334. object = this._addField(field, object);
  335. }
  336. // Throw an error if we have gone beyond the end of the data
  337. if (this.offset > this.LENGTH) {
  338. throw new Error("Exhausted Buffer");
  339. }
  340. return object;
  341. }
  342. /**
  343. * Add a field read from the protobuf data into the Object. As
  344. * protobuf fields can appear multiple times, if the field already
  345. * exists we need to add the new field into an array of fields
  346. * for that key.
  347. *
  348. * @private
  349. * @param {Object} field
  350. * @param {Object} object
  351. * @returns {Object}
  352. */
  353. _addField(field, object) {
  354. // Get the field key/values
  355. const key = field.key;
  356. const value = field.value;
  357. object[key] = Object.prototype.hasOwnProperty.call(object, key) ?
  358. object[key] instanceof Array ?
  359. object[key].concat([value]) :
  360. [object[key], value] :
  361. value;
  362. return object;
  363. }
  364. /**
  365. * Parse a field and return the Object read from the record
  366. *
  367. * @private
  368. * @returns {Object}
  369. */
  370. _parseField() {
  371. // Get the field headers
  372. const header = this._fieldHeader();
  373. const type = header.type;
  374. const key = header.key;
  375. if (typeof(this.fieldTypes[key]) !== "object") {
  376. this.fieldTypes[key] = type;
  377. }
  378. switch (type) {
  379. // varint
  380. case 0:
  381. return { "key": key, "value": this._varInt() };
  382. // fixed 64
  383. case 1:
  384. return { "key": key, "value": this._uint64() };
  385. // length delimited
  386. case 2:
  387. return { "key": key, "value": this._lenDelim(key) };
  388. // fixed 32
  389. case 5:
  390. return { "key": key, "value": this._uint32() };
  391. // unknown type
  392. default:
  393. throw new Error("Unknown type 0x" + type.toString(16));
  394. }
  395. }
  396. /**
  397. * Parse the field header and return the type and key
  398. *
  399. * @private
  400. * @returns {Object}
  401. */
  402. _fieldHeader() {
  403. // Make sure we call type then number to preserve offset
  404. return { "type": this._fieldType(), "key": this._fieldNumber() };
  405. }
  406. /**
  407. * Parse the field type from the field header. Type is stored in the
  408. * lower 3 bits of the tag byte. This does not move the offset on as
  409. * we need to read the field number from the tag byte too.
  410. *
  411. * @private
  412. * @returns {number}
  413. */
  414. _fieldType() {
  415. // Field type stored in lower 3 bits of tag byte
  416. return this.data[this.offset] & this.TYPE;
  417. }
  418. /**
  419. * Parse the field number (i.e. the key) from the field header. The
  420. * field number is stored in the upper 5 bits of the tag byte - but
  421. * is also varint encoded so the follow on bytes may need to be read
  422. * when field numbers are > 15.
  423. *
  424. * @private
  425. * @returns {number}
  426. */
  427. _fieldNumber() {
  428. let shift = -3;
  429. let fieldNumber = 0;
  430. do {
  431. fieldNumber += shift < 28 ?
  432. shift === -3 ?
  433. (this.data[this.offset] & this.NUMBER) >> -shift :
  434. (this.data[this.offset] & this.VALUE) << shift :
  435. (this.data[this.offset] & this.VALUE) * Math.pow(2, shift);
  436. shift += 7;
  437. } while ((this.data[this.offset++] & this.MSB) === this.MSB);
  438. return fieldNumber;
  439. }
  440. // Field Parsing Functions
  441. /**
  442. * Read off a varint from the data
  443. *
  444. * @private
  445. * @returns {number}
  446. */
  447. _varInt() {
  448. let value = 0;
  449. let shift = 0;
  450. // Keep reading while upper bit set
  451. do {
  452. value += shift < 28 ?
  453. (this.data[this.offset] & this.VALUE) << shift :
  454. (this.data[this.offset] & this.VALUE) * Math.pow(2, shift);
  455. shift += 7;
  456. } while ((this.data[this.offset++] & this.MSB) === this.MSB);
  457. return value;
  458. }
  459. /**
  460. * Read off a 64 bit unsigned integer from the data
  461. *
  462. * @private
  463. * @returns {number}
  464. */
  465. _uint64() {
  466. // Read off a Uint64 with little-endian
  467. const lowerHalf = this.data[this.offset++] + (this.data[this.offset++] * 0x100) + (this.data[this.offset++] * 0x10000) + this.data[this.offset++] * 0x1000000;
  468. const upperHalf = this.data[this.offset++] + (this.data[this.offset++] * 0x100) + (this.data[this.offset++] * 0x10000) + this.data[this.offset++] * 0x1000000;
  469. return upperHalf * 0x100000000 + lowerHalf;
  470. }
  471. /**
  472. * Read off a length delimited field from the data
  473. *
  474. * @private
  475. * @returns {Object|string}
  476. */
  477. _lenDelim(fieldNum) {
  478. // Read off the field length
  479. const length = this._varInt();
  480. const fieldBytes = this.data.slice(this.offset, this.offset + length);
  481. let field;
  482. try {
  483. // Attempt to parse as a new Protobuf Object
  484. const pbObject = new Protobuf(fieldBytes);
  485. field = pbObject._parse();
  486. // Set field types object
  487. this.fieldTypes[fieldNum] = {...this.fieldTypes[fieldNum], ...pbObject.fieldTypes};
  488. } catch (err) {
  489. // Otherwise treat as bytes
  490. field = Utils.byteArrayToChars(fieldBytes);
  491. }
  492. // Move the offset and return the field
  493. this.offset += length;
  494. return field;
  495. }
  496. /**
  497. * Read a 32 bit unsigned integer from the data
  498. *
  499. * @private
  500. * @returns {number}
  501. */
  502. _uint32() {
  503. // Use a dataview to read off the integer
  504. const dataview = new DataView(new Uint8Array(this.data.slice(this.offset, this.offset + 4)).buffer);
  505. const value = dataview.getUint32(0, true);
  506. this.offset += 4;
  507. return value;
  508. }
  509. }
  510. export default Protobuf;