CppLexer.h 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #pragma once
  2. #include <AK/StringView.h>
  3. #include <AK/Vector.h>
  4. #define FOR_EACH_TOKEN_TYPE \
  5. __TOKEN(Unknown) \
  6. __TOKEN(Whitespace) \
  7. __TOKEN(PreprocessorStatement) \
  8. __TOKEN(LeftParen) \
  9. __TOKEN(RightParen) \
  10. __TOKEN(LeftCurly) \
  11. __TOKEN(RightCurly) \
  12. __TOKEN(LeftBracket) \
  13. __TOKEN(RightBracket) \
  14. __TOKEN(Comma) \
  15. __TOKEN(Asterisk) \
  16. __TOKEN(Semicolon) \
  17. __TOKEN(DoubleQuotedString) \
  18. __TOKEN(SingleQuotedString) \
  19. __TOKEN(Comment) \
  20. __TOKEN(Number) \
  21. __TOKEN(Keyword) \
  22. __TOKEN(Identifier)
  23. struct CppPosition {
  24. int line { -1 };
  25. int column { -1 };
  26. };
  27. struct CppToken {
  28. enum class Type {
  29. #define __TOKEN(x) x,
  30. FOR_EACH_TOKEN_TYPE
  31. #undef __TOKEN
  32. };
  33. const char* to_string() const
  34. {
  35. switch (m_type) {
  36. #define __TOKEN(x) \
  37. case Type::x: \
  38. return #x;
  39. FOR_EACH_TOKEN_TYPE
  40. #undef __TOKEN
  41. }
  42. ASSERT_NOT_REACHED();
  43. }
  44. Type m_type { Type::Unknown };
  45. CppPosition m_start;
  46. CppPosition m_end;
  47. };
  48. class CppLexer {
  49. public:
  50. CppLexer(const StringView&);
  51. Vector<CppToken> lex();
  52. private:
  53. char peek(int offset = 0) const;
  54. char consume();
  55. StringView m_input;
  56. int m_index { 0 };
  57. CppPosition m_previous_position { 0, 0 };
  58. CppPosition m_position { 0, 0 };
  59. };