GeneratePnpIDs.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. /*
  2. * Copyright (c) 2022, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <AK/SourceGenerator.h>
  8. #include <LibCore/ArgsParser.h>
  9. #include <LibCore/File.h>
  10. enum class PnpIdColumns {
  11. ManufacturerName,
  12. ManufacturerId,
  13. ApprovalDate,
  14. ColumnCount // Must be last
  15. };
  16. struct ApprovalDate {
  17. unsigned year;
  18. unsigned month;
  19. unsigned day;
  20. };
  21. struct PnpIdData {
  22. String manufacturer_name;
  23. ApprovalDate approval_date;
  24. };
  25. static ErrorOr<String> decode_html_entities(StringView const& str)
  26. {
  27. static constexpr struct {
  28. StringView entity_name;
  29. StringView value;
  30. } s_html_entities[] = {
  31. { "amp"sv, "&"sv },
  32. };
  33. StringBuilder decoded_str;
  34. size_t start = 0;
  35. for (;;) {
  36. auto entity_start = str.find('&', start);
  37. if (!entity_start.has_value()) {
  38. decoded_str.append(str.substring_view(start));
  39. break;
  40. }
  41. auto entity_end = str.find(';', entity_start.value() + 1);
  42. if (!entity_end.has_value() || entity_end.value() == entity_start.value() + 1) {
  43. decoded_str.append(str.substring_view(start, entity_start.value() - start + 1));
  44. start = entity_start.value() + 1;
  45. continue;
  46. }
  47. if (str[entity_start.value() + 1] == '#') {
  48. auto entity_number = str.substring_view(entity_start.value() + 2, entity_end.value() - entity_start.value() - 2).to_uint();
  49. if (!entity_number.has_value()) {
  50. decoded_str.append(str.substring_view(start, entity_end.value() - start + 1));
  51. start = entity_end.value() + 1;
  52. continue;
  53. }
  54. if (entity_start.value() != start)
  55. decoded_str.append(str.substring_view(start, entity_start.value() - start));
  56. decoded_str.append_code_point(entity_number.value());
  57. } else {
  58. auto entity_name = str.substring_view(entity_start.value() + 1, entity_end.value() - entity_start.value() - 1);
  59. bool found_entity = false;
  60. for (auto& html_entity : s_html_entities) {
  61. if (html_entity.entity_name == entity_name) {
  62. found_entity = true;
  63. if (entity_start.value() != start)
  64. decoded_str.append(str.substring_view(start, entity_start.value() - start));
  65. decoded_str.append(html_entity.value);
  66. break;
  67. }
  68. }
  69. if (!found_entity)
  70. return Error::from_string_literal("Failed to decode html entity"sv);
  71. if (entity_start.value() != start)
  72. decoded_str.append(str.substring_view(start, entity_start.value() - start));
  73. }
  74. start = entity_end.value() + 1;
  75. }
  76. return decoded_str.build();
  77. }
  78. static ErrorOr<ApprovalDate> parse_approval_date(StringView const& str)
  79. {
  80. auto parts = str.trim_whitespace().split_view('/', true);
  81. if (parts.size() != 3)
  82. return Error::from_string_literal("Failed to parse approval date parts (mm/dd/yyyy)"sv);
  83. auto month = parts[0].to_uint();
  84. if (!month.has_value())
  85. return Error::from_string_literal("Failed to parse month from approval date"sv);
  86. if (month.value() == 0 || month.value() > 12)
  87. return Error::from_string_literal("Invalid month in approval date"sv);
  88. auto day = parts[1].to_uint();
  89. if (!day.has_value())
  90. return Error::from_string_literal("Failed to parse day from approval date"sv);
  91. if (day.value() == 0 || day.value() > 31)
  92. return Error::from_string_literal("Invalid day in approval date"sv);
  93. auto year = parts[2].to_uint();
  94. if (!year.has_value())
  95. return Error::from_string_literal("Failed to parse year from approval date"sv);
  96. if (year.value() < 1900 || year.value() > 2999)
  97. return Error::from_string_literal("Invalid year approval date"sv);
  98. return ApprovalDate { .year = year.value(), .month = month.value(), .day = day.value() };
  99. }
  100. static ErrorOr<HashMap<String, PnpIdData>> parse_pnp_ids_database(Core::File& pnp_ids_file)
  101. {
  102. auto pnp_ids_file_bytes = pnp_ids_file.read_all();
  103. StringView pnp_ids_file_contents(pnp_ids_file_bytes);
  104. HashMap<String, PnpIdData> pnp_id_data;
  105. for (size_t row_content_offset = 0;;) {
  106. static const auto row_start_tag = "<tr class=\""sv;
  107. auto row_start = pnp_ids_file_contents.find(row_start_tag, row_content_offset);
  108. if (!row_start.has_value())
  109. break;
  110. auto row_start_tag_end = pnp_ids_file_contents.find(">"sv, row_start.value() + row_start_tag.length());
  111. if (!row_start_tag_end.has_value())
  112. return Error::from_string_literal("Incomplete row start tag"sv);
  113. static const auto row_end_tag = "</tr>"sv;
  114. auto row_end = pnp_ids_file_contents.find(row_end_tag, row_start.value());
  115. if (!row_end.has_value())
  116. return Error::from_string_literal("No matching row end tag found"sv);
  117. if (row_start_tag_end.value() > row_end.value() + row_end_tag.length())
  118. return Error::from_string_literal("Invalid row start tag"sv);
  119. auto row_string = pnp_ids_file_contents.substring_view(row_start_tag_end.value() + 1, row_end.value() - row_start_tag_end.value() - 1);
  120. Vector<String, (size_t)PnpIdColumns::ColumnCount> columns;
  121. for (size_t column_row_offset = 0;;) {
  122. static const auto column_start_tag = "<td>"sv;
  123. auto column_start = row_string.find(column_start_tag, column_row_offset);
  124. if (!column_start.has_value())
  125. break;
  126. static const auto column_end_tag = "</td>"sv;
  127. auto column_end = row_string.find(column_end_tag, column_start.value() + column_start_tag.length());
  128. if (!column_end.has_value())
  129. return Error::from_string_literal("No matching column end tag found"sv);
  130. auto column_content_row_offset = column_start.value() + column_start_tag.length();
  131. auto column_str = row_string.substring_view(column_content_row_offset, column_end.value() - column_content_row_offset).trim_whitespace();
  132. if (column_str.find('\"').has_value())
  133. return Error::from_string_literal("Found '\"' in column content, escaping not supported!"sv);
  134. columns.append(column_str);
  135. column_row_offset = column_end.value() + column_end_tag.length();
  136. }
  137. if (columns.size() != (size_t)PnpIdColumns::ColumnCount)
  138. return Error::from_string_literal("Unexpected number of columns found"sv);
  139. auto approval_date = TRY(parse_approval_date(columns[(size_t)PnpIdColumns::ApprovalDate]));
  140. auto decoded_manufacturer_name = TRY(decode_html_entities(columns[(size_t)PnpIdColumns::ManufacturerName]));
  141. auto hash_set_result = pnp_id_data.set(columns[(size_t)PnpIdColumns::ManufacturerId], PnpIdData { .manufacturer_name = decoded_manufacturer_name, .approval_date = move(approval_date) });
  142. if (hash_set_result != AK::HashSetResult::InsertedNewEntry)
  143. return Error::from_string_literal("Duplicate manufacturer ID encountered"sv);
  144. row_content_offset = row_end.value() + row_end_tag.length();
  145. }
  146. if (pnp_id_data.size() <= 1)
  147. return Error::from_string_literal("Expected more than one row"sv);
  148. return pnp_id_data;
  149. }
  150. static void generate_header(Core::File& file, HashMap<String, PnpIdData> const& pnp_ids)
  151. {
  152. StringBuilder builder;
  153. SourceGenerator generator { builder };
  154. generator.set("pnp_id_count", String::formatted("{}", pnp_ids.size()));
  155. generator.append(R"~~~(
  156. #pragma once
  157. #include <AK/Function.h>
  158. #include <AK/StringView.h>
  159. #include <AK/Types.h>
  160. namespace PnpIDs {
  161. struct PnpIDData {
  162. StringView manufacturer_id;
  163. StringView manufacturer_name;
  164. struct {
  165. u16 year{};
  166. u8 month{};
  167. u8 day{};
  168. } approval_date;
  169. };
  170. Optional<PnpIDData> find_by_manufacturer_id(StringView);
  171. IterationDecision for_each(Function<IterationDecision(PnpIDData const&)>);
  172. static constexpr size_t count = @pnp_id_count@;
  173. }
  174. )~~~");
  175. VERIFY(file.write(generator.as_string_view()));
  176. }
  177. static void generate_source(Core::File& file, HashMap<String, PnpIdData> const& pnp_ids)
  178. {
  179. StringBuilder builder;
  180. SourceGenerator generator { builder };
  181. generator.append(R"~~~(
  182. #include "PnpIDs.h"
  183. namespace PnpIDs {
  184. static constexpr PnpIDData s_pnp_ids[] = {
  185. )~~~");
  186. for (auto& pnp_id_data : pnp_ids) {
  187. generator.set("manufacturer_id", pnp_id_data.key);
  188. generator.set("manufacturer_name", pnp_id_data.value.manufacturer_name);
  189. generator.set("approval_year", String::formatted("{}", pnp_id_data.value.approval_date.year));
  190. generator.set("approval_month", String::formatted("{}", pnp_id_data.value.approval_date.month));
  191. generator.set("approval_day", String::formatted("{}", pnp_id_data.value.approval_date.day));
  192. generator.append(R"~~~(
  193. { "@manufacturer_id@"sv, "@manufacturer_name@"sv, { @approval_year@, @approval_month@, @approval_day@ } },
  194. )~~~");
  195. }
  196. generator.append(R"~~~(
  197. };
  198. Optional<PnpIDData> find_by_manufacturer_id(StringView manufacturer_id)
  199. {
  200. for (auto& pnp_data : s_pnp_ids) {
  201. if (pnp_data.manufacturer_id == manufacturer_id)
  202. return pnp_data;
  203. }
  204. return {};
  205. }
  206. IterationDecision for_each(Function<IterationDecision(PnpIDData const&)> callback)
  207. {
  208. for (auto& pnp_data : s_pnp_ids) {
  209. auto decision = callback(pnp_data);
  210. if (decision != IterationDecision::Continue)
  211. return decision;
  212. }
  213. return IterationDecision::Continue;
  214. }
  215. }
  216. )~~~");
  217. VERIFY(file.write(generator.as_string_view()));
  218. }
  219. ErrorOr<int> serenity_main(Main::Arguments arguments)
  220. {
  221. StringView generated_header_path;
  222. StringView generated_implementation_path;
  223. StringView pnp_ids_file_path;
  224. Core::ArgsParser args_parser;
  225. args_parser.add_option(generated_header_path, "Path to the header file to generate", "generated-header-path", 'h', "generated-header-path");
  226. args_parser.add_option(generated_implementation_path, "Path to the implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  227. args_parser.add_option(pnp_ids_file_path, "Path to the input PNP ID database file", "pnp-ids-file", 'p', "pnp-ids-file");
  228. args_parser.parse(arguments);
  229. auto open_file = [&](StringView path, Core::OpenMode mode = Core::OpenMode::ReadOnly) -> ErrorOr<NonnullRefPtr<Core::File>> {
  230. if (path.is_empty()) {
  231. args_parser.print_usage(stderr, arguments.argv[0]);
  232. return Error::from_string_literal("Must provide all command line options"sv);
  233. }
  234. return Core::File::open(path, mode);
  235. };
  236. auto generated_header_file = TRY(open_file(generated_header_path, Core::OpenMode::ReadWrite));
  237. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::OpenMode::ReadWrite));
  238. auto pnp_ids_file = TRY(open_file(pnp_ids_file_path));
  239. auto pnp_id_map = TRY(parse_pnp_ids_database(*pnp_ids_file));
  240. generate_header(*generated_header_file, pnp_id_map);
  241. generate_source(*generated_implementation_file, pnp_id_map);
  242. return 0;
  243. }