GeneratePnpIDs.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. /*
  2. * Copyright (c) 2022, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/SourceGenerator.h>
  7. #include <LibCore/ArgsParser.h>
  8. #include <LibCore/Stream.h>
  9. enum class PnpIdColumns {
  10. ManufacturerName,
  11. ManufacturerId,
  12. ApprovalDate,
  13. ColumnCount // Must be last
  14. };
  15. struct ApprovalDate {
  16. unsigned year;
  17. unsigned month;
  18. unsigned day;
  19. };
  20. struct PnpIdData {
  21. String manufacturer_name;
  22. ApprovalDate approval_date;
  23. };
  24. static ErrorOr<String> decode_html_entities(StringView const& str)
  25. {
  26. static constexpr struct {
  27. StringView entity_name;
  28. StringView value;
  29. } s_html_entities[] = {
  30. { "amp"sv, "&"sv },
  31. };
  32. StringBuilder decoded_str;
  33. size_t start = 0;
  34. for (;;) {
  35. auto entity_start = str.find('&', start);
  36. if (!entity_start.has_value()) {
  37. decoded_str.append(str.substring_view(start));
  38. break;
  39. }
  40. auto entity_end = str.find(';', entity_start.value() + 1);
  41. if (!entity_end.has_value() || entity_end.value() == entity_start.value() + 1) {
  42. decoded_str.append(str.substring_view(start, entity_start.value() - start + 1));
  43. start = entity_start.value() + 1;
  44. continue;
  45. }
  46. if (str[entity_start.value() + 1] == '#') {
  47. auto entity_number = str.substring_view(entity_start.value() + 2, entity_end.value() - entity_start.value() - 2).to_uint();
  48. if (!entity_number.has_value()) {
  49. decoded_str.append(str.substring_view(start, entity_end.value() - start + 1));
  50. start = entity_end.value() + 1;
  51. continue;
  52. }
  53. if (entity_start.value() != start)
  54. decoded_str.append(str.substring_view(start, entity_start.value() - start));
  55. decoded_str.append_code_point(entity_number.value());
  56. } else {
  57. auto entity_name = str.substring_view(entity_start.value() + 1, entity_end.value() - entity_start.value() - 1);
  58. bool found_entity = false;
  59. for (auto& html_entity : s_html_entities) {
  60. if (html_entity.entity_name == entity_name) {
  61. found_entity = true;
  62. if (entity_start.value() != start)
  63. decoded_str.append(str.substring_view(start, entity_start.value() - start));
  64. decoded_str.append(html_entity.value);
  65. break;
  66. }
  67. }
  68. if (!found_entity)
  69. return Error::from_string_literal("Failed to decode html entity");
  70. if (entity_start.value() != start)
  71. decoded_str.append(str.substring_view(start, entity_start.value() - start));
  72. }
  73. start = entity_end.value() + 1;
  74. }
  75. return decoded_str.build();
  76. }
  77. static ErrorOr<ApprovalDate> parse_approval_date(StringView const& str)
  78. {
  79. auto parts = str.trim_whitespace().split_view('/', true);
  80. if (parts.size() != 3)
  81. return Error::from_string_literal("Failed to parse approval date parts (mm/dd/yyyy)");
  82. auto month = parts[0].to_uint();
  83. if (!month.has_value())
  84. return Error::from_string_literal("Failed to parse month from approval date");
  85. if (month.value() == 0 || month.value() > 12)
  86. return Error::from_string_literal("Invalid month in approval date");
  87. auto day = parts[1].to_uint();
  88. if (!day.has_value())
  89. return Error::from_string_literal("Failed to parse day from approval date");
  90. if (day.value() == 0 || day.value() > 31)
  91. return Error::from_string_literal("Invalid day in approval date");
  92. auto year = parts[2].to_uint();
  93. if (!year.has_value())
  94. return Error::from_string_literal("Failed to parse year from approval date");
  95. if (year.value() < 1900 || year.value() > 2999)
  96. return Error::from_string_literal("Invalid year approval date");
  97. return ApprovalDate { .year = year.value(), .month = month.value(), .day = day.value() };
  98. }
  99. static ErrorOr<HashMap<String, PnpIdData>> parse_pnp_ids_database(Core::Stream::File& pnp_ids_file)
  100. {
  101. auto pnp_ids_file_bytes = TRY(pnp_ids_file.read_all());
  102. StringView pnp_ids_file_contents(pnp_ids_file_bytes);
  103. HashMap<String, PnpIdData> pnp_id_data;
  104. for (size_t row_content_offset = 0;;) {
  105. static auto const row_start_tag = "<tr class=\""sv;
  106. auto row_start = pnp_ids_file_contents.find(row_start_tag, row_content_offset);
  107. if (!row_start.has_value())
  108. break;
  109. auto row_start_tag_end = pnp_ids_file_contents.find(">"sv, row_start.value() + row_start_tag.length());
  110. if (!row_start_tag_end.has_value())
  111. return Error::from_string_literal("Incomplete row start tag");
  112. static auto const row_end_tag = "</tr>"sv;
  113. auto row_end = pnp_ids_file_contents.find(row_end_tag, row_start.value());
  114. if (!row_end.has_value())
  115. return Error::from_string_literal("No matching row end tag found");
  116. if (row_start_tag_end.value() > row_end.value() + row_end_tag.length())
  117. return Error::from_string_literal("Invalid row start tag");
  118. auto row_string = pnp_ids_file_contents.substring_view(row_start_tag_end.value() + 1, row_end.value() - row_start_tag_end.value() - 1);
  119. Vector<String, (size_t)PnpIdColumns::ColumnCount> columns;
  120. for (size_t column_row_offset = 0;;) {
  121. static auto const column_start_tag = "<td>"sv;
  122. auto column_start = row_string.find(column_start_tag, column_row_offset);
  123. if (!column_start.has_value())
  124. break;
  125. static auto const column_end_tag = "</td>"sv;
  126. auto column_end = row_string.find(column_end_tag, column_start.value() + column_start_tag.length());
  127. if (!column_end.has_value())
  128. return Error::from_string_literal("No matching column end tag found");
  129. auto column_content_row_offset = column_start.value() + column_start_tag.length();
  130. auto column_str = row_string.substring_view(column_content_row_offset, column_end.value() - column_content_row_offset).trim_whitespace();
  131. if (column_str.find('\"').has_value())
  132. return Error::from_string_literal("Found '\"' in column content, escaping not supported!");
  133. columns.append(column_str);
  134. column_row_offset = column_end.value() + column_end_tag.length();
  135. }
  136. if (columns.size() != (size_t)PnpIdColumns::ColumnCount)
  137. return Error::from_string_literal("Unexpected number of columns found");
  138. auto approval_date = TRY(parse_approval_date(columns[(size_t)PnpIdColumns::ApprovalDate]));
  139. auto decoded_manufacturer_name = TRY(decode_html_entities(columns[(size_t)PnpIdColumns::ManufacturerName]));
  140. auto hash_set_result = pnp_id_data.set(columns[(size_t)PnpIdColumns::ManufacturerId], PnpIdData { .manufacturer_name = decoded_manufacturer_name, .approval_date = move(approval_date) });
  141. if (hash_set_result != AK::HashSetResult::InsertedNewEntry)
  142. return Error::from_string_literal("Duplicate manufacturer ID encountered");
  143. row_content_offset = row_end.value() + row_end_tag.length();
  144. }
  145. if (pnp_id_data.size() <= 1)
  146. return Error::from_string_literal("Expected more than one row");
  147. return pnp_id_data;
  148. }
  149. static ErrorOr<void> generate_header(Core::Stream::File& file, HashMap<String, PnpIdData> const& pnp_ids)
  150. {
  151. StringBuilder builder;
  152. SourceGenerator generator { builder };
  153. generator.set("pnp_id_count", String::formatted("{}", pnp_ids.size()));
  154. generator.append(R"~~~(
  155. #pragma once
  156. #include <AK/Function.h>
  157. #include <AK/StringView.h>
  158. #include <AK/Types.h>
  159. namespace PnpIDs {
  160. struct PnpIDData {
  161. StringView manufacturer_id;
  162. StringView manufacturer_name;
  163. struct {
  164. u16 year{};
  165. u8 month{};
  166. u8 day{};
  167. } approval_date;
  168. };
  169. Optional<PnpIDData> find_by_manufacturer_id(StringView);
  170. IterationDecision for_each(Function<IterationDecision(PnpIDData const&)>);
  171. static constexpr size_t count = @pnp_id_count@;
  172. }
  173. )~~~");
  174. TRY(file.write(generator.as_string_view().bytes()));
  175. return {};
  176. }
  177. static ErrorOr<void> generate_source(Core::Stream::File& file, HashMap<String, PnpIdData> const& pnp_ids)
  178. {
  179. StringBuilder builder;
  180. SourceGenerator generator { builder };
  181. generator.append(R"~~~(
  182. #include "PnpIDs.h"
  183. namespace PnpIDs {
  184. static constexpr PnpIDData s_pnp_ids[] = {
  185. )~~~");
  186. for (auto& pnp_id_data : pnp_ids) {
  187. generator.set("manufacturer_id", pnp_id_data.key);
  188. generator.set("manufacturer_name", pnp_id_data.value.manufacturer_name);
  189. generator.set("approval_year", String::formatted("{}", pnp_id_data.value.approval_date.year));
  190. generator.set("approval_month", String::formatted("{}", pnp_id_data.value.approval_date.month));
  191. generator.set("approval_day", String::formatted("{}", pnp_id_data.value.approval_date.day));
  192. generator.append(R"~~~(
  193. { "@manufacturer_id@"sv, "@manufacturer_name@"sv, { @approval_year@, @approval_month@, @approval_day@ } },
  194. )~~~");
  195. }
  196. generator.append(R"~~~(
  197. };
  198. Optional<PnpIDData> find_by_manufacturer_id(StringView manufacturer_id)
  199. {
  200. for (auto& pnp_data : s_pnp_ids) {
  201. if (pnp_data.manufacturer_id == manufacturer_id)
  202. return pnp_data;
  203. }
  204. return {};
  205. }
  206. IterationDecision for_each(Function<IterationDecision(PnpIDData const&)> callback)
  207. {
  208. for (auto& pnp_data : s_pnp_ids) {
  209. auto decision = callback(pnp_data);
  210. if (decision != IterationDecision::Continue)
  211. return decision;
  212. }
  213. return IterationDecision::Continue;
  214. }
  215. }
  216. )~~~");
  217. TRY(file.write(generator.as_string_view().bytes()));
  218. return {};
  219. }
  220. ErrorOr<int> serenity_main(Main::Arguments arguments)
  221. {
  222. StringView generated_header_path;
  223. StringView generated_implementation_path;
  224. StringView pnp_ids_file_path;
  225. Core::ArgsParser args_parser;
  226. args_parser.add_option(generated_header_path, "Path to the header file to generate", "generated-header-path", 'h', "generated-header-path");
  227. args_parser.add_option(generated_implementation_path, "Path to the implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  228. args_parser.add_option(pnp_ids_file_path, "Path to the input PNP ID database file", "pnp-ids-file", 'p', "pnp-ids-file");
  229. args_parser.parse(arguments);
  230. auto open_file = [&](StringView path, Core::Stream::OpenMode mode = Core::Stream::OpenMode::Read) -> ErrorOr<NonnullOwnPtr<Core::Stream::File>> {
  231. if (path.is_empty()) {
  232. args_parser.print_usage(stderr, arguments.argv[0]);
  233. return Error::from_string_literal("Must provide all command line options");
  234. }
  235. return Core::Stream::File::open(path, mode);
  236. };
  237. auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::ReadWrite));
  238. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::ReadWrite));
  239. auto pnp_ids_file = TRY(open_file(pnp_ids_file_path));
  240. auto pnp_id_map = TRY(parse_pnp_ids_database(*pnp_ids_file));
  241. TRY(generate_header(*generated_header_file, pnp_id_map));
  242. TRY(generate_source(*generated_implementation_file, pnp_id_map));
  243. return 0;
  244. }