GeneratorUtil.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. /*
  2. * Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/ByteString.h>
  8. #include <AK/Function.h>
  9. #include <AK/HashFunctions.h>
  10. #include <AK/HashMap.h>
  11. #include <AK/LexicalPath.h>
  12. #include <AK/NumericLimits.h>
  13. #include <AK/Optional.h>
  14. #include <AK/QuickSort.h>
  15. #include <AK/SourceGenerator.h>
  16. #include <AK/StringView.h>
  17. #include <AK/Traits.h>
  18. #include <AK/Vector.h>
  19. #include <LibCore/File.h>
  20. #include <LibUnicode/CharacterTypes.h>
  21. template<typename T>
  22. concept IntegralOrEnum = Integral<T> || Enum<T>;
  23. template<IntegralOrEnum T>
  24. struct AK::Traits<Vector<T>> : public DefaultTraits<Vector<T>> {
  25. static unsigned hash(Vector<T> const& list)
  26. {
  27. auto hash = int_hash(static_cast<u32>(list.size()));
  28. for (auto value : list) {
  29. if constexpr (Enum<T>)
  30. hash = pair_int_hash(hash, to_underlying(value));
  31. else
  32. hash = pair_int_hash(hash, value);
  33. }
  34. return hash;
  35. }
  36. };
  37. template<typename StorageType>
  38. class UniqueStorage {
  39. public:
  40. size_t ensure(StorageType value)
  41. {
  42. // We maintain a set of unique values in two structures: a vector which stores the values in
  43. // the order they are added, and a hash map which maps that value to its index in the vector.
  44. // The vector is to ensure the values are generated in an easily known order, and the map is
  45. // to allow quickly deciding if a value is actually unique (otherwise, we'd have to linearly
  46. // search the vector for each value).
  47. //
  48. // Also note that index 0 will be reserved for the default-initialized value, so the index
  49. // returned from this method is actually the real index in the vector + 1.
  50. if (auto index = m_storage_indices.get(value); index.has_value())
  51. return *index;
  52. m_storage.append(move(value));
  53. auto storage_index = m_storage.size();
  54. m_storage_indices.set(m_storage.last(), storage_index);
  55. return storage_index;
  56. }
  57. StorageType const& get(size_t index) const
  58. {
  59. if (index == 0) {
  60. static StorageType empty {};
  61. return empty;
  62. }
  63. VERIFY(index <= m_storage.size());
  64. return m_storage.at(index - 1);
  65. }
  66. StringView type_that_fits() const
  67. {
  68. if (m_storage.size() <= NumericLimits<u8>::max())
  69. return "u8"sv;
  70. if (m_storage.size() <= NumericLimits<u16>::max())
  71. return "u16"sv;
  72. if (m_storage.size() <= NumericLimits<u32>::max())
  73. return "u32"sv;
  74. return "u64"sv;
  75. }
  76. protected:
  77. Vector<StorageType> m_storage;
  78. HashMap<StorageType, size_t> m_storage_indices;
  79. };
  80. class UniqueStringStorage : public UniqueStorage<ByteString> {
  81. using Base = UniqueStorage<ByteString>;
  82. public:
  83. // The goal of the string table generator is to ensure the table is located within the read-only
  84. // section of the shared library. If StringViews are generated directly, the table will be located
  85. // in the initialized data section. So instead, we generate run-length encoded (RLE) arrays to
  86. // represent the strings.
  87. void generate(SourceGenerator& generator) const
  88. {
  89. constexpr size_t max_values_per_row = 300;
  90. size_t values_in_current_row = 0;
  91. auto append_hex_value = [&](auto value) {
  92. if (values_in_current_row++ > 0)
  93. generator.append(", ");
  94. generator.append(ByteString::formatted("{:#x}", value));
  95. if (values_in_current_row == max_values_per_row) {
  96. values_in_current_row = 0;
  97. generator.append(",\n ");
  98. }
  99. };
  100. Vector<u32> string_indices;
  101. string_indices.ensure_capacity(Base::m_storage.size());
  102. u32 next_index { 0 };
  103. for (auto const& string : Base::m_storage) {
  104. // Ensure the string length may be encoded as two u8s.
  105. VERIFY(string.length() <= NumericLimits<u16>::max());
  106. string_indices.unchecked_append(next_index);
  107. next_index += string.length() + 2;
  108. }
  109. generator.set("size", ByteString::number(next_index));
  110. generator.append(R"~~~(
  111. static constexpr Array<u8, @size@> s_encoded_strings { {
  112. )~~~");
  113. for (auto const& string : Base::m_storage) {
  114. auto length = string.length();
  115. append_hex_value((length & 0xff00) >> 8);
  116. append_hex_value(length & 0x00ff);
  117. for (auto ch : string)
  118. append_hex_value(static_cast<u8>(ch));
  119. }
  120. generator.append(R"~~~(
  121. } };
  122. )~~~");
  123. generator.set("size", ByteString::number(string_indices.size()));
  124. generator.append(R"~~~(
  125. static constexpr Array<u32, @size@> s_encoded_string_indices { {
  126. )~~~");
  127. values_in_current_row = 0;
  128. for (auto index : string_indices)
  129. append_hex_value(index);
  130. generator.append(R"~~~(
  131. } };
  132. static constexpr StringView decode_string(size_t index)
  133. {
  134. if (index == 0)
  135. return {};
  136. index = s_encoded_string_indices[index - 1];
  137. auto length_high = s_encoded_strings[index];
  138. auto length_low = s_encoded_strings[index + 1];
  139. size_t length = (length_high << 8) | length_low;
  140. if (length == 0)
  141. return {};
  142. auto const* start = &s_encoded_strings[index + 2];
  143. return { reinterpret_cast<char const*>(start), length };
  144. }
  145. )~~~");
  146. }
  147. };
  148. struct Alias {
  149. ByteString name;
  150. ByteString alias;
  151. };
  152. inline ErrorOr<NonnullOwnPtr<Core::InputBufferedFile>> open_file(StringView path, Core::File::OpenMode mode)
  153. {
  154. if (path.is_empty())
  155. return Error::from_string_literal("Provided path is empty, please provide all command line options");
  156. auto file = TRY(Core::File::open(path, mode));
  157. return Core::InputBufferedFile::create(move(file));
  158. }
  159. inline void ensure_from_string_types_are_generated(SourceGenerator& generator)
  160. {
  161. static bool generated_from_string_types = false;
  162. if (generated_from_string_types)
  163. return;
  164. generator.append(R"~~~(
  165. template <typename ValueType>
  166. struct HashValuePair {
  167. unsigned hash { 0 };
  168. ValueType value {};
  169. };
  170. template <typename ValueType>
  171. struct HashValueComparator
  172. {
  173. constexpr int operator()(unsigned hash, HashValuePair<ValueType> const& pair)
  174. {
  175. if (hash > pair.hash)
  176. return 1;
  177. if (hash < pair.hash)
  178. return -1;
  179. return 0;
  180. }
  181. };
  182. )~~~");
  183. generated_from_string_types = true;
  184. }
  185. template<typename ValueType>
  186. using HashValueMap = HashMap<unsigned, ValueType>;
  187. struct ValueFromStringOptions {
  188. Optional<StringView> return_type {};
  189. StringView return_format { "{}"sv };
  190. CaseSensitivity sensitivity { CaseSensitivity::CaseSensitive };
  191. };
  192. template<typename ValueType>
  193. void generate_value_from_string(SourceGenerator& generator, StringView method_name_format, StringView value_type, StringView value_name, HashValueMap<ValueType> hashes, ValueFromStringOptions options = {})
  194. {
  195. ensure_from_string_types_are_generated(generator);
  196. generator.set("method_name", ByteString::formatted(method_name_format, value_name));
  197. generator.set("value_type", value_type);
  198. generator.set("value_name", value_name);
  199. generator.set("return_type", options.return_type.has_value() ? *options.return_type : value_type);
  200. generator.set("size", ByteString::number(hashes.size()));
  201. generator.append(R"~~~(
  202. Optional<@return_type@> @method_name@(StringView key)
  203. {
  204. constexpr Array<HashValuePair<@value_type@>, @size@> hash_pairs { {
  205. )~~~");
  206. auto hash_keys = hashes.keys();
  207. quick_sort(hash_keys);
  208. constexpr size_t max_values_per_row = 10;
  209. size_t values_in_current_row = 0;
  210. for (auto hash_key : hash_keys) {
  211. if (values_in_current_row++ > 0)
  212. generator.append(" ");
  213. if constexpr (IsIntegral<ValueType>)
  214. generator.set("value"sv, ByteString::number(hashes.get(hash_key).value()));
  215. else
  216. generator.set("value"sv, ByteString::formatted("{}::{}", value_type, hashes.get(hash_key).value()));
  217. generator.set("hash"sv, ByteString::number(hash_key));
  218. generator.append("{ @hash@U, @value@ },"sv);
  219. if (values_in_current_row == max_values_per_row) {
  220. generator.append("\n ");
  221. values_in_current_row = 0;
  222. }
  223. }
  224. generator.set("return_statement", ByteString::formatted(options.return_format, "value->value"sv));
  225. generator.append(R"~~~(
  226. } };
  227. )~~~");
  228. if (options.sensitivity == CaseSensitivity::CaseSensitive) {
  229. generator.append(R"~~~(
  230. auto hash = key.hash();
  231. )~~~");
  232. } else {
  233. generator.append(R"~~~(
  234. auto hash = CaseInsensitiveASCIIStringViewTraits::hash(key);
  235. )~~~");
  236. }
  237. generator.append(R"~~~(
  238. if (auto const* value = binary_search(hash_pairs, hash, nullptr, HashValueComparator<@value_type@> {}))
  239. return @return_statement@;
  240. return {};
  241. }
  242. )~~~");
  243. }
  244. template<typename IdentifierFormatter>
  245. void generate_value_to_string(SourceGenerator& generator, StringView method_name_format, StringView value_type, StringView value_name, IdentifierFormatter&& format_identifier, ReadonlySpan<ByteString> values)
  246. {
  247. generator.set("method_name", ByteString::formatted(method_name_format, value_name));
  248. generator.set("value_type", value_type);
  249. generator.set("value_name", value_name);
  250. generator.append(R"~~~(
  251. StringView @method_name@(@value_type@ @value_name@)
  252. {
  253. using enum @value_type@;
  254. switch (@value_name@) {)~~~");
  255. for (auto const& value : values) {
  256. generator.set("enum_value", format_identifier(value_type, value));
  257. generator.set("string_value", value);
  258. generator.append(R"~~~(
  259. case @enum_value@:
  260. return "@string_value@"sv;)~~~");
  261. }
  262. generator.append(R"~~~(
  263. }
  264. VERIFY_NOT_REACHED();
  265. }
  266. )~~~");
  267. }
  268. template<typename IdentifierFormatter>
  269. void generate_enum(SourceGenerator& generator, IdentifierFormatter&& format_identifier, StringView name, StringView default_, Vector<ByteString>& values, Vector<Alias> aliases = {})
  270. {
  271. quick_sort(values, [](auto const& value1, auto const& value2) { return value1.to_lowercase() < value2.to_lowercase(); });
  272. quick_sort(aliases, [](auto const& alias1, auto const& alias2) { return alias1.alias.to_lowercase() < alias2.alias.to_lowercase(); });
  273. generator.set("name", name);
  274. generator.set("underlying", ((values.size() + !default_.is_empty()) < 256) ? "u8"sv : "u16"sv);
  275. generator.append(R"~~~(
  276. enum class @name@ : @underlying@ {)~~~");
  277. if (!default_.is_empty()) {
  278. generator.set("default", default_);
  279. generator.append(R"~~~(
  280. @default@,)~~~");
  281. }
  282. for (auto const& value : values) {
  283. generator.set("value", format_identifier(name, value));
  284. generator.append(R"~~~(
  285. @value@,)~~~");
  286. }
  287. for (auto const& alias : aliases) {
  288. generator.set("alias", format_identifier(name, alias.alias));
  289. generator.set("value", format_identifier(name, alias.name));
  290. generator.append(R"~~~(
  291. @alias@ = @value@,)~~~");
  292. }
  293. generator.append(R"~~~(
  294. };
  295. )~~~");
  296. }
  297. template<typename LocalesType, typename IdentifierFormatter, typename ListFormatter>
  298. void generate_mapping(SourceGenerator& generator, LocalesType const& locales, StringView type, StringView name, StringView format, IdentifierFormatter&& format_identifier, ListFormatter&& format_list)
  299. {
  300. auto format_mapping_name = [&](StringView format, StringView name) {
  301. ByteString mapping_name;
  302. if constexpr (IsNullPointer<IdentifierFormatter>)
  303. mapping_name = name.replace("-"sv, "_"sv, ReplaceMode::All);
  304. else
  305. mapping_name = format_identifier(type, name);
  306. return ByteString::formatted(format, mapping_name.to_lowercase());
  307. };
  308. Vector<ByteString> mapping_names;
  309. for (auto const& locale : locales) {
  310. ByteString mapping_name;
  311. if constexpr (requires { locale.key; }) {
  312. mapping_name = format_mapping_name(format, locale.key);
  313. format_list(mapping_name, locale.value);
  314. } else {
  315. mapping_name = format_mapping_name(format, locale);
  316. format_list(mapping_name, locale);
  317. }
  318. mapping_names.append(move(mapping_name));
  319. }
  320. quick_sort(mapping_names);
  321. generator.set("type", type);
  322. generator.set("name", name);
  323. generator.set("size", ByteString::number(locales.size()));
  324. generator.append(R"~~~(
  325. static constexpr Array<ReadonlySpan<@type@>, @size@> @name@ { {
  326. )~~~");
  327. constexpr size_t max_values_per_row = 10;
  328. size_t values_in_current_row = 0;
  329. for (auto& mapping_name : mapping_names) {
  330. if (values_in_current_row++ > 0)
  331. generator.append(" ");
  332. generator.set("name", move(mapping_name));
  333. generator.append("@name@.span(),");
  334. if (values_in_current_row == max_values_per_row) {
  335. values_in_current_row = 0;
  336. generator.append("\n ");
  337. }
  338. }
  339. generator.append(R"~~~(
  340. } };
  341. )~~~");
  342. }
  343. inline Vector<u32> parse_code_point_list(StringView list)
  344. {
  345. Vector<u32> code_points;
  346. auto segments = list.split_view(' ');
  347. for (auto const& code_point : segments)
  348. code_points.append(AK::StringUtils::convert_to_uint_from_hex<u32>(code_point).value());
  349. return code_points;
  350. }
  351. inline Unicode::CodePointRange parse_code_point_range(StringView list)
  352. {
  353. Unicode::CodePointRange code_point_range {};
  354. if (list.contains(".."sv)) {
  355. auto segments = list.split_view(".."sv);
  356. VERIFY(segments.size() == 2);
  357. auto begin = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[0]).value();
  358. auto end = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[1]).value();
  359. code_point_range = { begin, end };
  360. } else {
  361. auto code_point = AK::StringUtils::convert_to_uint_from_hex<u32>(list).value();
  362. code_point_range = { code_point, code_point };
  363. }
  364. return code_point_range;
  365. }