regex.h 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. /*
  2. * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <stddef.h>
  8. #include <sys/types.h>
  9. __BEGIN_DECLS
  10. typedef ssize_t regoff_t;
  11. typedef struct {
  12. void* __data;
  13. } regex_t;
  14. enum __Regex_Error {
  15. __Regex_NoError,
  16. __Regex_InvalidPattern, // Invalid regular expression.
  17. __Regex_InvalidCollationElement, // Invalid collating element referenced.
  18. __Regex_InvalidCharacterClass, // Invalid character class type referenced.
  19. __Regex_InvalidTrailingEscape, // Trailing \ in pattern.
  20. __Regex_InvalidNumber, // Number in \digit invalid or in error.
  21. __Regex_MismatchingBracket, // [ ] imbalance.
  22. __Regex_MismatchingParen, // ( ) imbalance.
  23. __Regex_MismatchingBrace, // { } imbalance.
  24. __Regex_InvalidBraceContent, // Content of {} invalid: not a number, number too large, more than two numbers, first larger than second.
  25. __Regex_InvalidBracketContent, // Content of [] invalid.
  26. __Regex_InvalidRange, // Invalid endpoint in range expression.
  27. __Regex_InvalidRepetitionMarker, // ?, * or + not preceded by valid regular expression.
  28. __Regex_ReachedMaxRecursion, // MaximumRecursion has been reached.
  29. __Regex_EmptySubExpression, // Sub expression has empty content.
  30. __Regex_InvalidCaptureGroup, // Content of capture group is invalid.
  31. __Regex_InvalidNameForCaptureGroup, // Name of capture group is invalid.
  32. };
  33. enum ReError {
  34. REG_NOERR = __Regex_NoError,
  35. REG_BADPAT = __Regex_InvalidPattern, // Invalid regular expression.
  36. REG_ECOLLATE = __Regex_InvalidCollationElement, // Invalid collating element referenced.
  37. REG_ECTYPE = __Regex_InvalidCharacterClass, // Invalid character class type referenced.
  38. REG_EESCAPE = __Regex_InvalidTrailingEscape, // Trailing \ in pattern.
  39. REG_ESUBREG = __Regex_InvalidNumber, // Number in \digit invalid or in error.
  40. REG_EBRACK = __Regex_MismatchingBracket, // [ ] imbalance.
  41. REG_EPAREN = __Regex_MismatchingParen, // \( \) or ( ) imbalance.
  42. REG_EBRACE = __Regex_MismatchingBrace, // \{ \} imbalance.
  43. REG_BADBR = __Regex_InvalidBraceContent, // Content of \{ \} invalid: not a number, number too large, more than two numbers, first larger than second.
  44. REG_ERANGE = __Regex_InvalidRange, // Invalid endpoint in range expression.
  45. REG_BADRPT = __Regex_InvalidRepetitionMarker, // ?, * or + not preceded by valid regular expression.
  46. REG_EMPTY_EXPR = __Regex_EmptySubExpression, // Empty expression
  47. REG_ENOSYS, // The implementation does not support the function.
  48. REG_ESPACE, // Out of memory.
  49. REG_NOMATCH, // regexec() failed to match.
  50. };
  51. typedef struct {
  52. regoff_t rm_so; // byte offset from start of string to start of substring
  53. regoff_t rm_eo; // byte offset from start of string of the first character after the end of substring
  54. regoff_t rm_cnt; // number of matches
  55. } regmatch_t;
  56. enum __RegexAllFlags {
  57. __Regex_Global = 1, // All matches (don't return after first match)
  58. __Regex_Insensitive = __Regex_Global << 1, // Case insensitive match (ignores case of [a-zA-Z])
  59. __Regex_Ungreedy = __Regex_Global << 2, // The match becomes lazy by default. Now a ? following a quantifier makes it greedy
  60. __Regex_Unicode = __Regex_Global << 3, // Enable all unicode features and interpret all unicode escape sequences as such
  61. __Regex_Extended = __Regex_Global << 4, // Ignore whitespaces. Spaces and text after a # in the pattern are ignored
  62. __Regex_Extra = __Regex_Global << 5, // Disallow meaningless escapes. A \ followed by a letter with no special meaning is faulted
  63. __Regex_MatchNotBeginOfLine = __Regex_Global << 6, // Pattern is not forced to ^ -> search in whole string!
  64. __Regex_MatchNotEndOfLine = __Regex_Global << 7, // Don't Force the dollar sign, $, to always match end of the string, instead of end of the line. This option is ignored if the Multiline-flag is set
  65. __Regex_SkipSubExprResults = __Regex_Global << 8, // Do not return sub expressions in the result
  66. __Regex_StringCopyMatches = __Regex_Global << 9, // Do explicitly copy results into new allocated string instead of StringView to original string.
  67. __Regex_SingleLine = __Regex_Global << 10, // Dot matches newline characters
  68. __Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
  69. __Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
  70. __Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
  71. __Regex_Internal_Stateful = __Regex_Global << 14, // Internal flag; enables stateful matches.
  72. __Regex_Internal_BrowserExtended = __Regex_Global << 15, // Internal flag; enable browser-specific ECMA262 extensions.
  73. __Regex_Last = __Regex_SkipTrimEmptyMatches
  74. };
  75. // Values for the cflags parameter to the regcomp() function:
  76. #define REG_EXTENDED __Regex_Extended // Use Extended Regular Expressions.
  77. #define REG_ICASE __Regex_Insensitive // Ignore case in match.
  78. #define REG_NOSUB __Regex_SkipSubExprResults // Report only success or fail in regexec().
  79. #define REG_GLOBAL __Regex_Global // Don't stop searching for more match
  80. #define REG_NEWLINE (__Regex_Multiline | REG_GLOBAL) // Change the handling of newline.
  81. // Values for the eflags parameter to the regexec() function:
  82. #define REG_NOTBOL __Regex_MatchNotBeginOfLine // The circumflex character (^), when taken as a special character, will not match the beginning of string.
  83. #define REG_NOTEOL __Regex_MatchNotEndOfLine // The dollar sign ($), when taken as a special character, will not match the end of string.
  84. //static_assert (sizeof(FlagsUnderlyingType) * 8 >= regex::POSIXFlags::Last << 1), "flags type too small")
  85. #define REG_SEARCH __Regex_Last << 1
  86. int regcomp(regex_t*, const char*, int);
  87. int regexec(const regex_t*, const char*, size_t, regmatch_t[], int);
  88. size_t regerror(int, const regex_t*, char*, size_t);
  89. void regfree(regex_t*);
  90. __END_DECLS