main.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /*
  2. * Copyright (c) 2021, Daniel Bertalan <dani@danielbertalan.dev>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/GenericLexer.h>
  7. #include <AK/HashTable.h>
  8. #include <AK/OwnPtr.h>
  9. #include <AK/SourceGenerator.h>
  10. #include <AK/String.h>
  11. #include <AK/StringBuilder.h>
  12. #include <AK/Types.h>
  13. #include <LibCore/ArgsParser.h>
  14. #include <LibCore/File.h>
  15. #include <ctype.h>
  16. struct Range {
  17. int begin;
  18. int end;
  19. };
  20. struct StateTransition {
  21. Optional<String> new_state;
  22. Optional<String> action;
  23. };
  24. struct MatchedAction {
  25. Range range;
  26. StateTransition action;
  27. };
  28. struct State {
  29. String name;
  30. Vector<MatchedAction> actions;
  31. Optional<String> entry_action;
  32. Optional<String> exit_action;
  33. };
  34. struct StateMachine {
  35. String name;
  36. String initial_state;
  37. Vector<State> states;
  38. Optional<State> anywhere;
  39. Optional<String> namespaces;
  40. };
  41. static OwnPtr<StateMachine>
  42. parse_state_machine(StringView input)
  43. {
  44. auto state_machine = make<StateMachine>();
  45. GenericLexer lexer(input);
  46. auto consume_whitespace = [&] {
  47. bool consumed = true;
  48. while (consumed) {
  49. consumed = lexer.consume_while(isspace).length() > 0;
  50. if (lexer.consume_specific("//")) {
  51. lexer.consume_line();
  52. consumed = true;
  53. }
  54. }
  55. };
  56. auto consume_identifier = [&] {
  57. consume_whitespace();
  58. return lexer.consume_while([](char c) { return isalnum(c) || c == '_'; });
  59. };
  60. auto get_hex_value = [&](char c) {
  61. if (isdigit(c))
  62. return c - '0';
  63. else
  64. return c - 'a' + 10;
  65. };
  66. auto consume_number = [&] {
  67. int num = 0;
  68. consume_whitespace();
  69. if (lexer.consume_specific("0x")) {
  70. auto hex_digits = lexer.consume_while([](char c) {
  71. if (isdigit(c)) return true;
  72. else {
  73. c = tolower(c);
  74. return (c >= 'a' && c <= 'f');
  75. } });
  76. for (auto c : hex_digits)
  77. num = 16 * num + get_hex_value(c);
  78. } else {
  79. lexer.consume_specific('\'');
  80. if (lexer.next_is('\\')) {
  81. num = (int)lexer.consume_escaped_character('\\');
  82. } else {
  83. num = lexer.consume_until('\'').to_int().value();
  84. lexer.ignore();
  85. }
  86. lexer.consume_specific('\'');
  87. }
  88. return num;
  89. };
  90. auto consume_condition = [&] {
  91. Range condition;
  92. consume_whitespace();
  93. if (lexer.consume_specific('[')) {
  94. consume_whitespace();
  95. condition.begin = consume_number();
  96. consume_whitespace();
  97. lexer.consume_specific("..");
  98. consume_whitespace();
  99. condition.end = consume_number();
  100. consume_whitespace();
  101. lexer.consume_specific(']');
  102. } else {
  103. auto num = consume_number();
  104. condition.begin = num;
  105. condition.end = num;
  106. }
  107. return condition;
  108. };
  109. auto consume_action = [&]() {
  110. StateTransition action;
  111. consume_whitespace();
  112. lexer.consume_specific("=>");
  113. consume_whitespace();
  114. lexer.consume_specific('(');
  115. consume_whitespace();
  116. if (!lexer.consume_specific("_"))
  117. action.new_state = consume_identifier();
  118. consume_whitespace();
  119. lexer.consume_specific(',');
  120. consume_whitespace();
  121. if (!lexer.consume_specific("_"))
  122. action.action = consume_identifier();
  123. consume_whitespace();
  124. lexer.consume_specific(')');
  125. return action;
  126. };
  127. auto consume_state_description
  128. = [&] {
  129. State state;
  130. consume_whitespace();
  131. state.name = consume_identifier();
  132. consume_whitespace();
  133. consume_whitespace();
  134. lexer.consume_specific('{');
  135. for (;;) {
  136. consume_whitespace();
  137. if (lexer.consume_specific('}')) {
  138. break;
  139. }
  140. if (lexer.consume_specific("@entry")) {
  141. consume_whitespace();
  142. state.entry_action = consume_identifier();
  143. } else if (lexer.consume_specific("@exit")) {
  144. consume_whitespace();
  145. state.exit_action = consume_identifier();
  146. } else if (lexer.next_is('@')) {
  147. auto directive = consume_identifier().to_string();
  148. fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters());
  149. exit(1);
  150. } else {
  151. MatchedAction matched_action;
  152. matched_action.range = consume_condition();
  153. matched_action.action = consume_action();
  154. state.actions.append(matched_action);
  155. }
  156. }
  157. return state;
  158. };
  159. while (!lexer.is_eof()) {
  160. consume_whitespace();
  161. if (lexer.is_eof())
  162. break;
  163. if (lexer.consume_specific("@namespace")) {
  164. consume_whitespace();
  165. state_machine->namespaces = lexer.consume_while([](char c) { return isalpha(c) || c == ':'; });
  166. } else if (lexer.consume_specific("@begin")) {
  167. consume_whitespace();
  168. state_machine->initial_state = consume_identifier();
  169. } else if (lexer.consume_specific("@name")) {
  170. consume_whitespace();
  171. state_machine->name = consume_identifier();
  172. } else if (lexer.next_is("@anywhere")) {
  173. lexer.consume_specific('@');
  174. state_machine->anywhere = consume_state_description();
  175. } else if (lexer.consume_specific('@')) {
  176. auto directive = consume_identifier().to_string();
  177. fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters());
  178. exit(1);
  179. } else {
  180. auto description = consume_state_description();
  181. state_machine->states.append(description);
  182. }
  183. }
  184. if (state_machine->initial_state.is_empty()) {
  185. fprintf(stderr, "Missing @begin directive\n");
  186. exit(1);
  187. } else if (state_machine->name.is_empty()) {
  188. fprintf(stderr, "Missing @name directive\n");
  189. exit(1);
  190. }
  191. if (state_machine->anywhere.has_value()) {
  192. state_machine->anywhere.value().name = "_Anywhere";
  193. }
  194. return state_machine;
  195. }
  196. void output_header(const StateMachine&, SourceGenerator&);
  197. int main(int argc, char** argv)
  198. {
  199. Core::ArgsParser args_parser;
  200. const char* path = nullptr;
  201. args_parser.add_positional_argument(path, "Path to parser description", "input", Core::ArgsParser::Required::Yes);
  202. args_parser.parse(argc, argv);
  203. auto file_or_error = Core::File::open(path, Core::OpenMode::ReadOnly);
  204. if (file_or_error.is_error()) {
  205. fprintf(stderr, "Cannot open %s\n", path);
  206. }
  207. auto content = file_or_error.value()->read_all();
  208. auto state_machine = parse_state_machine(content);
  209. StringBuilder builder;
  210. SourceGenerator generator { builder };
  211. output_header(*state_machine, generator);
  212. outln("{}", generator.as_string_view());
  213. return 0;
  214. }
  215. HashTable<String> actions(const StateMachine& machine)
  216. {
  217. HashTable<String> table;
  218. auto do_state = [&](const State& state) {
  219. if (state.entry_action.has_value())
  220. table.set(state.entry_action.value());
  221. if (state.exit_action.has_value())
  222. table.set(state.exit_action.value());
  223. for (auto action : state.actions) {
  224. if (action.action.action.has_value())
  225. table.set(action.action.action.value());
  226. }
  227. };
  228. for (auto state : machine.states) {
  229. do_state(state);
  230. }
  231. if (machine.anywhere.has_value())
  232. do_state(machine.anywhere.value());
  233. return table;
  234. }
  235. void generate_lookup_table(const StateMachine& machine, SourceGenerator& generator)
  236. {
  237. generator.append(R"~~~(
  238. static constexpr StateTransition STATE_TRANSITION_TABLE[][256] = {
  239. )~~~");
  240. auto generate_for_state = [&](const State& s) {
  241. auto table_generator = generator.fork();
  242. table_generator.set("active_state", s.name);
  243. table_generator.append("/* @active_state@ */ { ");
  244. VERIFY(!s.name.is_empty());
  245. Vector<StateTransition> row;
  246. for (int i = 0; i < 256; i++)
  247. row.append({ s.name, "_Ignore" });
  248. for (auto action : s.actions) {
  249. for (int range_element = action.range.begin; range_element <= action.range.end; range_element++) {
  250. row[range_element] = { action.action.new_state, action.action.action };
  251. }
  252. }
  253. for (int i = 0; i < 256; ++i) {
  254. auto cell_generator = table_generator.fork();
  255. cell_generator.set("cell_new_state", row[i].new_state.value_or(s.name));
  256. cell_generator.set("cell_action", row[i].action.value_or("_Ignore"));
  257. cell_generator.append(" {State::@cell_new_state@, Action::@cell_action@}, ");
  258. }
  259. table_generator.append("},\n");
  260. };
  261. if (machine.anywhere.has_value()) {
  262. generate_for_state(machine.anywhere.value());
  263. }
  264. for (auto s : machine.states) {
  265. generate_for_state(s);
  266. }
  267. generator.append(R"~~~(
  268. };
  269. )~~~");
  270. }
  271. void output_header(const StateMachine& machine, SourceGenerator& generator)
  272. {
  273. generator.set("class_name", machine.name);
  274. generator.set("initial_state", machine.initial_state);
  275. generator.set("state_count", String::number(machine.states.size() + 1));
  276. generator.append(R"~~~(
  277. #pragma once
  278. #include <AK/Function.h>
  279. #include <AK/Platform.h>
  280. #include <AK/Types.h>
  281. )~~~");
  282. if (machine.namespaces.has_value()) {
  283. generator.set("namespace", machine.namespaces.value());
  284. generator.append(R"~~~(
  285. namespace @namespace@ {
  286. )~~~");
  287. }
  288. generator.append(R"~~~(
  289. class @class_name@ {
  290. public:
  291. enum class Action : u8 {
  292. _Ignore,
  293. )~~~");
  294. for (auto a : actions(machine)) {
  295. if (a.is_empty())
  296. continue;
  297. auto action_generator = generator.fork();
  298. action_generator.set("action.name", a);
  299. action_generator.append(R"~~~(
  300. @action.name@,
  301. )~~~");
  302. }
  303. generator.append(R"~~~(
  304. }; // end Action
  305. using Handler = Function<void(Action, u8)>;
  306. @class_name@(Handler handler)
  307. : m_handler(move(handler))
  308. {
  309. }
  310. void advance(u8 byte)
  311. {
  312. auto next_state = lookup_state_transition(byte);
  313. bool state_will_change = next_state.new_state != m_state && next_state.new_state != State::_Anywhere;
  314. // only run exit directive if state is being changed
  315. if (state_will_change) {
  316. switch (m_state) {
  317. )~~~");
  318. for (auto s : machine.states) {
  319. auto state_generator = generator.fork();
  320. if (s.exit_action.has_value()) {
  321. state_generator.set("state_name", s.name);
  322. state_generator.set("action", s.exit_action.value());
  323. state_generator.append(R"~~~(
  324. case State::@state_name@:
  325. m_handler(Action::@action@, byte);
  326. break;
  327. )~~~");
  328. }
  329. }
  330. generator.append(R"~~~(
  331. default:
  332. break;
  333. }
  334. }
  335. if (next_state.action != Action::_Ignore)
  336. m_handler(next_state.action, byte);
  337. m_state = next_state.new_state;
  338. // only run entry directive if state is being changed
  339. if (state_will_change)
  340. {
  341. switch (next_state.new_state)
  342. {
  343. )~~~");
  344. for (auto state : machine.states) {
  345. auto state_generator = generator.fork();
  346. if (state.entry_action.has_value()) {
  347. state_generator.set("state_name", state.name);
  348. state_generator.set("action", state.entry_action.value());
  349. state_generator.append(R"~~~(
  350. case State::@state_name@:
  351. m_handler(Action::@action@, byte);
  352. break;
  353. )~~~");
  354. }
  355. }
  356. generator.append(R"~~~(
  357. default:
  358. break;
  359. }
  360. }
  361. }
  362. private:
  363. enum class State : u8 {
  364. _Anywhere,
  365. )~~~");
  366. int largest_state_value = 0;
  367. for (auto s : machine.states) {
  368. auto state_generator = generator.fork();
  369. state_generator.set("state.name", s.name);
  370. largest_state_value++;
  371. state_generator.append(R"~~~(
  372. @state.name@,
  373. )~~~");
  374. }
  375. generator.append(R"~~~(
  376. }; // end State
  377. struct StateTransition {
  378. State new_state;
  379. Action action;
  380. };
  381. State m_state { State::@initial_state@ };
  382. Handler m_handler;
  383. ALWAYS_INLINE StateTransition lookup_state_transition(u8 byte)
  384. {
  385. VERIFY((u8)m_state < @state_count@);
  386. )~~~");
  387. if (machine.anywhere.has_value()) {
  388. generator.append(R"~~~(
  389. auto anywhere_state = STATE_TRANSITION_TABLE[0][byte];
  390. if (anywhere_state.new_state != State::_Anywhere || anywhere_state.action != Action::_Ignore)
  391. return anywhere_state;
  392. else
  393. )~~~");
  394. }
  395. generator.append(R"~~~(
  396. return STATE_TRANSITION_TABLE[(u8)m_state][byte];
  397. }
  398. )~~~");
  399. auto table_generator = generator.fork();
  400. generate_lookup_table(machine, table_generator);
  401. generator.append(R"~~~(
  402. }; // end @class_name@
  403. )~~~");
  404. if (machine.namespaces.has_value()) {
  405. generator.append(R"~~~(
  406. } // end namespace
  407. )~~~");
  408. }
  409. }