main.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. /*
  2. * Copyright (c) 2021, Daniel Bertalan <dani@danielbertalan.dev>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ByteString.h>
  7. #include <AK/GenericLexer.h>
  8. #include <AK/HashTable.h>
  9. #include <AK/OwnPtr.h>
  10. #include <AK/SourceGenerator.h>
  11. #include <AK/StringBuilder.h>
  12. #include <AK/Types.h>
  13. #include <LibCore/ArgsParser.h>
  14. #include <LibCore/File.h>
  15. #include <LibMain/Main.h>
  16. #include <ctype.h>
  17. struct Range {
  18. int begin;
  19. int end;
  20. };
  21. struct StateTransition {
  22. Optional<ByteString> new_state;
  23. Optional<ByteString> action;
  24. };
  25. struct MatchedAction {
  26. Range range;
  27. StateTransition action;
  28. };
  29. struct State {
  30. ByteString name;
  31. Vector<MatchedAction> actions;
  32. Optional<ByteString> entry_action;
  33. Optional<ByteString> exit_action;
  34. };
  35. struct StateMachine {
  36. ByteString name;
  37. ByteString initial_state;
  38. Vector<State> states;
  39. Optional<State> anywhere;
  40. Optional<ByteString> namespaces;
  41. };
  42. static OwnPtr<StateMachine>
  43. parse_state_machine(StringView input)
  44. {
  45. auto state_machine = make<StateMachine>();
  46. GenericLexer lexer(input);
  47. auto consume_whitespace = [&] {
  48. bool consumed = true;
  49. while (consumed) {
  50. consumed = lexer.consume_while(isspace).length() > 0;
  51. if (lexer.consume_specific("//")) {
  52. lexer.consume_line();
  53. consumed = true;
  54. }
  55. }
  56. };
  57. auto consume_identifier = [&] {
  58. consume_whitespace();
  59. return lexer.consume_while([](char c) { return isalnum(c) || c == '_'; });
  60. };
  61. auto get_hex_value = [&](char c) {
  62. if (isdigit(c))
  63. return c - '0';
  64. else
  65. return c - 'a' + 10;
  66. };
  67. auto consume_number = [&] {
  68. int num = 0;
  69. consume_whitespace();
  70. if (lexer.consume_specific("0x")) {
  71. auto hex_digits = lexer.consume_while([](char c) {
  72. if (isdigit(c)) return true;
  73. else {
  74. c = tolower(c);
  75. return (c >= 'a' && c <= 'f');
  76. } });
  77. for (auto c : hex_digits)
  78. num = 16 * num + get_hex_value(c);
  79. } else {
  80. lexer.consume_specific('\'');
  81. if (lexer.next_is('\\')) {
  82. num = (int)lexer.consume_escaped_character('\\');
  83. } else {
  84. num = lexer.consume_until('\'').to_number<int>().value();
  85. lexer.ignore();
  86. }
  87. lexer.consume_specific('\'');
  88. }
  89. return num;
  90. };
  91. auto consume_condition = [&] {
  92. Range condition;
  93. consume_whitespace();
  94. if (lexer.consume_specific('[')) {
  95. consume_whitespace();
  96. condition.begin = consume_number();
  97. consume_whitespace();
  98. lexer.consume_specific("..");
  99. consume_whitespace();
  100. condition.end = consume_number();
  101. consume_whitespace();
  102. lexer.consume_specific(']');
  103. } else {
  104. auto num = consume_number();
  105. condition.begin = num;
  106. condition.end = num;
  107. }
  108. return condition;
  109. };
  110. auto consume_action = [&]() {
  111. StateTransition action;
  112. consume_whitespace();
  113. lexer.consume_specific("=>");
  114. consume_whitespace();
  115. lexer.consume_specific('(');
  116. consume_whitespace();
  117. if (!lexer.consume_specific("_"))
  118. action.new_state = consume_identifier();
  119. consume_whitespace();
  120. lexer.consume_specific(',');
  121. consume_whitespace();
  122. if (!lexer.consume_specific("_"))
  123. action.action = consume_identifier();
  124. consume_whitespace();
  125. lexer.consume_specific(')');
  126. return action;
  127. };
  128. auto consume_state_description
  129. = [&] {
  130. State state;
  131. consume_whitespace();
  132. state.name = consume_identifier();
  133. consume_whitespace();
  134. consume_whitespace();
  135. lexer.consume_specific('{');
  136. for (;;) {
  137. consume_whitespace();
  138. if (lexer.consume_specific('}')) {
  139. break;
  140. }
  141. if (lexer.consume_specific("@entry")) {
  142. consume_whitespace();
  143. state.entry_action = consume_identifier();
  144. } else if (lexer.consume_specific("@exit")) {
  145. consume_whitespace();
  146. state.exit_action = consume_identifier();
  147. } else if (lexer.next_is('@')) {
  148. auto directive = consume_identifier().to_byte_string();
  149. fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters());
  150. exit(1);
  151. } else {
  152. MatchedAction matched_action;
  153. matched_action.range = consume_condition();
  154. matched_action.action = consume_action();
  155. state.actions.append(matched_action);
  156. }
  157. }
  158. return state;
  159. };
  160. while (!lexer.is_eof()) {
  161. consume_whitespace();
  162. if (lexer.is_eof())
  163. break;
  164. if (lexer.consume_specific("@namespace")) {
  165. consume_whitespace();
  166. state_machine->namespaces = lexer.consume_while([](char c) { return isalpha(c) || c == ':'; });
  167. } else if (lexer.consume_specific("@begin")) {
  168. consume_whitespace();
  169. state_machine->initial_state = consume_identifier();
  170. } else if (lexer.consume_specific("@name")) {
  171. consume_whitespace();
  172. state_machine->name = consume_identifier();
  173. } else if (lexer.next_is("@anywhere")) {
  174. lexer.consume_specific('@');
  175. state_machine->anywhere = consume_state_description();
  176. } else if (lexer.consume_specific('@')) {
  177. auto directive = consume_identifier().to_byte_string();
  178. fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters());
  179. exit(1);
  180. } else {
  181. auto description = consume_state_description();
  182. state_machine->states.append(description);
  183. }
  184. }
  185. if (state_machine->initial_state.is_empty()) {
  186. fprintf(stderr, "Missing @begin directive\n");
  187. exit(1);
  188. } else if (state_machine->name.is_empty()) {
  189. fprintf(stderr, "Missing @name directive\n");
  190. exit(1);
  191. }
  192. if (state_machine->anywhere.has_value()) {
  193. state_machine->anywhere.value().name = "_Anywhere";
  194. }
  195. return state_machine;
  196. }
  197. void output_header(StateMachine const&, SourceGenerator&);
  198. ErrorOr<int> serenity_main(Main::Arguments arguments)
  199. {
  200. StringView path;
  201. StringView output_file = "-"sv;
  202. Core::ArgsParser parser;
  203. parser.add_positional_argument(path, "Path to parser description", "input", Core::ArgsParser::Required::Yes);
  204. parser.add_option(output_file, "Place to write file", "output", 'o', "output-file");
  205. parser.parse(arguments);
  206. auto output = TRY(Core::File::open_file_or_standard_stream(output_file, Core::File::OpenMode::Write));
  207. auto file = TRY(Core::File::open(path, Core::File::OpenMode::Read));
  208. auto content = TRY(file->read_until_eof());
  209. auto state_machine = parse_state_machine(content);
  210. StringBuilder builder;
  211. SourceGenerator generator { builder };
  212. output_header(*state_machine, generator);
  213. TRY(output->write_until_depleted(generator.as_string_view().bytes()));
  214. return 0;
  215. }
  216. HashTable<ByteString> actions(StateMachine const& machine)
  217. {
  218. HashTable<ByteString> table;
  219. auto do_state = [&](State const& state) {
  220. if (state.entry_action.has_value())
  221. table.set(state.entry_action.value());
  222. if (state.exit_action.has_value())
  223. table.set(state.exit_action.value());
  224. for (auto action : state.actions) {
  225. if (action.action.action.has_value())
  226. table.set(action.action.action.value());
  227. }
  228. };
  229. for (auto state : machine.states) {
  230. do_state(state);
  231. }
  232. if (machine.anywhere.has_value())
  233. do_state(machine.anywhere.value());
  234. return table;
  235. }
  236. void generate_lookup_table(StateMachine const& machine, SourceGenerator& generator)
  237. {
  238. generator.append(R"~~~(
  239. static constexpr StateTransition STATE_TRANSITION_TABLE[][256] = {
  240. )~~~");
  241. auto generate_for_state = [&](State const& s) {
  242. auto table_generator = generator.fork();
  243. table_generator.set("active_state", s.name);
  244. table_generator.append("/* @active_state@ */ { ");
  245. VERIFY(!s.name.is_empty());
  246. Vector<StateTransition> row;
  247. for (int i = 0; i < 256; i++)
  248. row.append({ s.name, "_Ignore" });
  249. for (auto action : s.actions) {
  250. for (int range_element = action.range.begin; range_element <= action.range.end; range_element++) {
  251. row[range_element] = { action.action.new_state, action.action.action };
  252. }
  253. }
  254. for (int i = 0; i < 256; ++i) {
  255. auto cell_generator = table_generator.fork();
  256. cell_generator.set("cell_new_state", row[i].new_state.value_or(s.name));
  257. cell_generator.set("cell_action", row[i].action.value_or("_Ignore"));
  258. cell_generator.append(" {State::@cell_new_state@, Action::@cell_action@}, ");
  259. }
  260. table_generator.append("},\n");
  261. };
  262. if (machine.anywhere.has_value()) {
  263. generate_for_state(machine.anywhere.value());
  264. }
  265. for (auto s : machine.states) {
  266. generate_for_state(s);
  267. }
  268. generator.append(R"~~~(
  269. };
  270. )~~~");
  271. }
  272. void output_header(StateMachine const& machine, SourceGenerator& generator)
  273. {
  274. generator.set("class_name", machine.name);
  275. generator.set("initial_state", machine.initial_state);
  276. generator.set("state_count", ByteString::number(machine.states.size() + 1));
  277. generator.append(R"~~~(
  278. #pragma once
  279. #include <AK/Function.h>
  280. #include <AK/Platform.h>
  281. #include <AK/Types.h>
  282. )~~~");
  283. if (machine.namespaces.has_value()) {
  284. generator.set("namespace", machine.namespaces.value());
  285. generator.append(R"~~~(
  286. namespace @namespace@ {
  287. )~~~");
  288. }
  289. generator.append(R"~~~(
  290. class @class_name@ {
  291. public:
  292. enum class Action : u8 {
  293. _Ignore,
  294. )~~~");
  295. for (auto a : actions(machine)) {
  296. if (a.is_empty())
  297. continue;
  298. auto action_generator = generator.fork();
  299. action_generator.set("action.name", a);
  300. action_generator.append(R"~~~(
  301. @action.name@,
  302. )~~~");
  303. }
  304. generator.append(R"~~~(
  305. }; // end Action
  306. using Handler = Function<void(Action, u8)>;
  307. @class_name@(Handler handler)
  308. : m_handler(move(handler))
  309. {
  310. }
  311. void advance(u8 byte)
  312. {
  313. auto next_state = lookup_state_transition(byte);
  314. bool state_will_change = next_state.new_state != m_state && next_state.new_state != State::_Anywhere;
  315. // only run exit directive if state is being changed
  316. if (state_will_change) {
  317. switch (m_state) {
  318. )~~~");
  319. for (auto s : machine.states) {
  320. auto state_generator = generator.fork();
  321. if (s.exit_action.has_value()) {
  322. state_generator.set("state_name", s.name);
  323. state_generator.set("action", s.exit_action.value());
  324. state_generator.append(R"~~~(
  325. case State::@state_name@:
  326. m_handler(Action::@action@, byte);
  327. break;
  328. )~~~");
  329. }
  330. }
  331. generator.append(R"~~~(
  332. default:
  333. break;
  334. }
  335. }
  336. if (next_state.action != Action::_Ignore)
  337. m_handler(next_state.action, byte);
  338. m_state = next_state.new_state;
  339. // only run entry directive if state is being changed
  340. if (state_will_change)
  341. {
  342. switch (next_state.new_state)
  343. {
  344. )~~~");
  345. for (auto state : machine.states) {
  346. auto state_generator = generator.fork();
  347. if (state.entry_action.has_value()) {
  348. state_generator.set("state_name", state.name);
  349. state_generator.set("action", state.entry_action.value());
  350. state_generator.append(R"~~~(
  351. case State::@state_name@:
  352. m_handler(Action::@action@, byte);
  353. break;
  354. )~~~");
  355. }
  356. }
  357. generator.append(R"~~~(
  358. default:
  359. break;
  360. }
  361. }
  362. }
  363. private:
  364. enum class State : u8 {
  365. _Anywhere,
  366. )~~~");
  367. for (auto s : machine.states) {
  368. auto state_generator = generator.fork();
  369. state_generator.set("state.name", s.name);
  370. state_generator.append(R"~~~(
  371. @state.name@,
  372. )~~~");
  373. }
  374. generator.append(R"~~~(
  375. }; // end State
  376. struct StateTransition {
  377. State new_state;
  378. Action action;
  379. };
  380. State m_state { State::@initial_state@ };
  381. Handler m_handler;
  382. ALWAYS_INLINE StateTransition lookup_state_transition(u8 byte)
  383. {
  384. VERIFY((u8)m_state < @state_count@);
  385. )~~~");
  386. if (machine.anywhere.has_value()) {
  387. generator.append(R"~~~(
  388. auto anywhere_state = STATE_TRANSITION_TABLE[0][byte];
  389. if (anywhere_state.new_state != State::_Anywhere || anywhere_state.action != Action::_Ignore)
  390. return anywhere_state;
  391. else
  392. )~~~");
  393. }
  394. generator.append(R"~~~(
  395. return STATE_TRANSITION_TABLE[(u8)m_state][byte];
  396. }
  397. )~~~");
  398. auto table_generator = generator.fork();
  399. generate_lookup_table(machine, table_generator);
  400. generator.append(R"~~~(
  401. }; // end @class_name@
  402. )~~~");
  403. if (machine.namespaces.has_value()) {
  404. generator.append(R"~~~(
  405. } // end namespace
  406. )~~~");
  407. }
  408. }