RegexOptions.h 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. /*
  2. * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/Types.h>
  8. #include <stdio.h>
  9. #ifdef AK_OS_SERENITY
  10. # include <bits/regex_defs.h>
  11. #else
  12. # include <LibC/bits/regex_defs.h>
  13. #endif
  14. namespace regex {
  15. using FlagsUnderlyingType = u32;
  16. enum class AllFlags {
  17. Default = 0,
  18. Global = __Regex_Global, // All matches (don't return after first match)
  19. Insensitive = __Regex_Insensitive, // Case insensitive match (ignores case of [a-zA-Z])
  20. Ungreedy = __Regex_Ungreedy, // The match becomes lazy by default. Now a ? following a quantifier makes it greedy
  21. Unicode = __Regex_Unicode, // Enable all unicode features and interpret all unicode escape sequences as such
  22. Extended = __Regex_Extended, // Ignore whitespaces. Spaces and text after a # in the pattern are ignored
  23. Extra = __Regex_Extra, // Disallow meaningless escapes. A \ followed by a letter with no special meaning is faulted
  24. MatchNotBeginOfLine = __Regex_MatchNotBeginOfLine, // Pattern is not forced to ^ -> search in whole string!
  25. MatchNotEndOfLine = __Regex_MatchNotEndOfLine, // Don't Force the dollar sign, $, to always match end of the string, instead of end of the line. This option is ignored if the Multiline-flag is set
  26. SkipSubExprResults = __Regex_SkipSubExprResults, // Do not return sub expressions in the result
  27. StringCopyMatches = __Regex_StringCopyMatches, // Do explicitly copy results into new allocated string instead of StringView to original string.
  28. SingleLine = __Regex_SingleLine, // Dot matches newline characters
  29. Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
  30. Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
  31. SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
  32. SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match.
  33. UnicodeSets = __Regex_UnicodeSets, // Only for ECMA262, Allow set operations in character classes.
  34. Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
  35. Internal_BrowserExtended = __Regex_Internal_BrowserExtended, // Only for ECMA262, Enable the behaviors defined in section B.1.4. of the ECMA262 spec.
  36. Internal_ConsiderNewline = __Regex_Internal_ConsiderNewline, // Only for ECMA262, Allow multiline matches to consider newlines as line boundaries.
  37. Internal_ECMA262DotSemantics = __Regex_Internal_ECMA262DotSemantics, // Use ECMA262 dot semantics: disallow matching CR/LF/LS/PS instead of just CR.
  38. Last = Internal_BrowserExtended,
  39. };
  40. enum class PosixFlags : FlagsUnderlyingType {
  41. Default = 0,
  42. Global = (FlagsUnderlyingType)AllFlags::Global,
  43. Insensitive = (FlagsUnderlyingType)AllFlags::Insensitive,
  44. Ungreedy = (FlagsUnderlyingType)AllFlags::Ungreedy,
  45. Unicode = (FlagsUnderlyingType)AllFlags::Unicode,
  46. Extended = (FlagsUnderlyingType)AllFlags::Extended,
  47. Extra = (FlagsUnderlyingType)AllFlags::Extra,
  48. MatchNotBeginOfLine = (FlagsUnderlyingType)AllFlags::MatchNotBeginOfLine,
  49. MatchNotEndOfLine = (FlagsUnderlyingType)AllFlags::MatchNotEndOfLine,
  50. SkipSubExprResults = (FlagsUnderlyingType)AllFlags::SkipSubExprResults,
  51. SkipTrimEmptyMatches = (FlagsUnderlyingType)AllFlags::SkipTrimEmptyMatches,
  52. Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
  53. SingleMatch = (FlagsUnderlyingType)AllFlags::SingleMatch,
  54. StringCopyMatches = (FlagsUnderlyingType)AllFlags::StringCopyMatches,
  55. };
  56. enum class ECMAScriptFlags : FlagsUnderlyingType {
  57. Default = (FlagsUnderlyingType)AllFlags::Internal_ECMA262DotSemantics,
  58. Global = (FlagsUnderlyingType)AllFlags::Global | (FlagsUnderlyingType)AllFlags::Internal_Stateful, // Note: ECMAScript "Global" creates a stateful regex.
  59. Insensitive = (FlagsUnderlyingType)AllFlags::Insensitive,
  60. Ungreedy = (FlagsUnderlyingType)AllFlags::Ungreedy,
  61. Unicode = (FlagsUnderlyingType)AllFlags::Unicode,
  62. Extended = (FlagsUnderlyingType)AllFlags::Extended,
  63. Extra = (FlagsUnderlyingType)AllFlags::Extra,
  64. SingleLine = (FlagsUnderlyingType)AllFlags::SingleLine,
  65. Sticky = (FlagsUnderlyingType)AllFlags::Sticky,
  66. Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
  67. StringCopyMatches = (FlagsUnderlyingType)AllFlags::StringCopyMatches,
  68. UnicodeSets = (FlagsUnderlyingType)AllFlags::UnicodeSets,
  69. BrowserExtended = (FlagsUnderlyingType)AllFlags::Internal_BrowserExtended,
  70. };
  71. template<class T>
  72. class RegexOptions {
  73. public:
  74. using FlagsType = T;
  75. RegexOptions() = default;
  76. constexpr RegexOptions(T flags)
  77. : m_flags(static_cast<T>(to_underlying(flags) | to_underlying(T::Default)))
  78. {
  79. }
  80. template<class U>
  81. constexpr RegexOptions(RegexOptions<U> other)
  82. : RegexOptions(static_cast<T>(to_underlying(other.value())))
  83. {
  84. }
  85. operator bool() const { return !!*this; }
  86. bool operator!() const { return (FlagsUnderlyingType)m_flags == 0; }
  87. constexpr RegexOptions<T> operator|(T flag) const { return RegexOptions<T> { (T)((FlagsUnderlyingType)m_flags | (FlagsUnderlyingType)flag) }; }
  88. constexpr RegexOptions<T> operator&(T flag) const { return RegexOptions<T> { (T)((FlagsUnderlyingType)m_flags & (FlagsUnderlyingType)flag) }; }
  89. constexpr RegexOptions<T>& operator|=(T flag)
  90. {
  91. m_flags = (T)((FlagsUnderlyingType)m_flags | (FlagsUnderlyingType)flag);
  92. return *this;
  93. }
  94. constexpr RegexOptions<T>& operator&=(T flag)
  95. {
  96. m_flags = (T)((FlagsUnderlyingType)m_flags & (FlagsUnderlyingType)flag);
  97. return *this;
  98. }
  99. void reset_flags() { m_flags = (T)0; }
  100. void reset_flag(T flag) { m_flags = (T)((FlagsUnderlyingType)m_flags & ~(FlagsUnderlyingType)flag); }
  101. void set_flag(T flag) { *this |= flag; }
  102. bool has_flag_set(T flag) const { return (FlagsUnderlyingType)flag == ((FlagsUnderlyingType)m_flags & (FlagsUnderlyingType)flag); }
  103. constexpr T value() const { return m_flags; }
  104. private:
  105. T m_flags { T::Default };
  106. };
  107. template<class T>
  108. constexpr RegexOptions<T> operator|(T lhs, T rhs)
  109. {
  110. return RegexOptions<T> { lhs } |= rhs;
  111. }
  112. template<class T>
  113. constexpr RegexOptions<T> operator&(T lhs, T rhs)
  114. {
  115. return RegexOptions<T> { lhs } &= rhs;
  116. }
  117. template<class T>
  118. constexpr T operator~(T flag)
  119. {
  120. return (T) ~((FlagsUnderlyingType)flag);
  121. }
  122. using AllOptions = RegexOptions<AllFlags>;
  123. using ECMAScriptOptions = RegexOptions<ECMAScriptFlags>;
  124. using PosixOptions = RegexOptions<PosixFlags>;
  125. }
  126. using regex::ECMAScriptFlags;
  127. using regex::ECMAScriptOptions;
  128. using regex::PosixFlags;
  129. using regex::PosixOptions;