StringUtils.cpp 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <awesomekling@gmail.com>
  3. * Copyright (c) 2020, Fei Wu <f.eiwu@yahoo.com>
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright notice, this
  10. * list of conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright notice,
  13. * this list of conditions and the following disclaimer in the documentation
  14. * and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  20. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <AK/Memory.h>
  28. #include <AK/Optional.h>
  29. #include <AK/String.h>
  30. #include <AK/StringUtils.h>
  31. #include <AK/StringView.h>
  32. #include <AK/Vector.h>
  33. namespace AK {
  34. namespace StringUtils {
  35. bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans)
  36. {
  37. auto record_span = [&match_spans](size_t start, size_t length) {
  38. if (match_spans)
  39. match_spans->append({ start, length });
  40. };
  41. if (str.is_null() || mask.is_null())
  42. return str.is_null() && mask.is_null();
  43. if (mask == "*") {
  44. record_span(0, str.length());
  45. return true;
  46. }
  47. if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
  48. const String str_lower = String(str).to_lowercase();
  49. const String mask_lower = String(mask).to_lowercase();
  50. return matches(str_lower, mask_lower, CaseSensitivity::CaseSensitive, match_spans);
  51. }
  52. const char* string_ptr = str.characters_without_null_termination();
  53. const char* string_start = str.characters_without_null_termination();
  54. const char* string_end = string_ptr + str.length();
  55. const char* mask_ptr = mask.characters_without_null_termination();
  56. const char* mask_end = mask_ptr + mask.length();
  57. auto matches_one = [](char ch, char p) {
  58. if (p == '?')
  59. return true;
  60. return p == ch && ch != 0;
  61. };
  62. while (string_ptr < string_end && mask_ptr < mask_end) {
  63. auto string_start_ptr = string_ptr;
  64. switch (*mask_ptr) {
  65. case '*':
  66. if (mask_ptr[1] == 0) {
  67. record_span(string_ptr - string_start, string_end - string_ptr);
  68. return true;
  69. }
  70. while (string_ptr < string_end && !matches(string_ptr, mask_ptr + 1))
  71. ++string_ptr;
  72. record_span(string_start_ptr - string_start, string_ptr - string_start_ptr);
  73. --string_ptr;
  74. break;
  75. case '?':
  76. record_span(string_ptr - string_start, 1);
  77. break;
  78. default:
  79. if (!matches_one(*string_ptr, *mask_ptr))
  80. return false;
  81. break;
  82. }
  83. ++string_ptr;
  84. ++mask_ptr;
  85. }
  86. return string_ptr == string_end && mask_ptr == mask_end;
  87. }
  88. Optional<int> convert_to_int(const StringView& str)
  89. {
  90. if (str.is_empty())
  91. return {};
  92. bool negative = false;
  93. size_t i = 0;
  94. const auto characters = str.characters_without_null_termination();
  95. if (characters[0] == '-' || characters[0] == '+') {
  96. if (str.length() == 1)
  97. return {};
  98. i++;
  99. negative = (characters[0] == '-');
  100. }
  101. int value = 0;
  102. for (; i < str.length(); i++) {
  103. if (characters[i] < '0' || characters[i] > '9')
  104. return {};
  105. value = value * 10;
  106. value += characters[i] - '0';
  107. }
  108. return negative ? -value : value;
  109. }
  110. Optional<unsigned> convert_to_uint(const StringView& str)
  111. {
  112. if (str.is_empty())
  113. return {};
  114. unsigned value = 0;
  115. const auto characters = str.characters_without_null_termination();
  116. for (size_t i = 0; i < str.length(); i++) {
  117. if (characters[i] < '0' || characters[i] > '9')
  118. return {};
  119. value = value * 10;
  120. value += characters[i] - '0';
  121. }
  122. return value;
  123. }
  124. Optional<unsigned> convert_to_uint_from_hex(const StringView& str)
  125. {
  126. if (str.is_empty())
  127. return {};
  128. unsigned value = 0;
  129. const auto count = str.length();
  130. for (size_t i = 0; i < count; i++) {
  131. char digit = str[i];
  132. u8 digit_val;
  133. if (digit >= '0' && digit <= '9') {
  134. digit_val = digit - '0';
  135. } else if (digit >= 'a' && digit <= 'f') {
  136. digit_val = 10 + (digit - 'a');
  137. } else if (digit >= 'A' && digit <= 'F') {
  138. digit_val = 10 + (digit - 'A');
  139. } else {
  140. return {};
  141. }
  142. value = (value << 4) + digit_val;
  143. }
  144. return value;
  145. }
  146. static inline char to_lowercase(char c)
  147. {
  148. if (c >= 'A' && c <= 'Z')
  149. return c | 0x20;
  150. return c;
  151. }
  152. bool equals_ignoring_case(const StringView& a, const StringView& b)
  153. {
  154. if (a.impl() && a.impl() == b.impl())
  155. return true;
  156. if (a.length() != b.length())
  157. return false;
  158. for (size_t i = 0; i < a.length(); ++i) {
  159. if (to_lowercase(a.characters_without_null_termination()[i]) != to_lowercase(b.characters_without_null_termination()[i]))
  160. return false;
  161. }
  162. return true;
  163. }
  164. bool ends_with(const StringView& str, const StringView& end, CaseSensitivity case_sensitivity)
  165. {
  166. if (end.is_empty())
  167. return true;
  168. if (str.is_empty())
  169. return false;
  170. if (end.length() > str.length())
  171. return false;
  172. if (case_sensitivity == CaseSensitivity::CaseSensitive)
  173. return !memcmp(str.characters_without_null_termination() + (str.length() - end.length()), end.characters_without_null_termination(), end.length());
  174. auto str_chars = str.characters_without_null_termination();
  175. auto end_chars = end.characters_without_null_termination();
  176. size_t si = str.length() - end.length();
  177. for (size_t ei = 0; ei < end.length(); ++si, ++ei) {
  178. if (to_lowercase(str_chars[si]) != to_lowercase(end_chars[ei]))
  179. return false;
  180. }
  181. return true;
  182. }
  183. bool starts_with(const StringView& str, const StringView& start, CaseSensitivity case_sensitivity)
  184. {
  185. if (start.is_empty())
  186. return true;
  187. if (str.is_empty())
  188. return false;
  189. if (start.length() > str.length())
  190. return false;
  191. if (str.characters_without_null_termination() == start.characters_without_null_termination())
  192. return true;
  193. if (case_sensitivity == CaseSensitivity::CaseSensitive)
  194. return !memcmp(str.characters_without_null_termination(), start.characters_without_null_termination(), start.length());
  195. auto str_chars = str.characters_without_null_termination();
  196. auto start_chars = start.characters_without_null_termination();
  197. size_t si = 0;
  198. for (size_t starti = 0; starti < start.length(); ++si, ++starti) {
  199. if (to_lowercase(str_chars[si]) != to_lowercase(start_chars[starti]))
  200. return false;
  201. }
  202. return true;
  203. }
  204. bool contains(const StringView& str, const StringView& needle, CaseSensitivity case_sensitivity)
  205. {
  206. if (str.is_null() || needle.is_null() || str.is_empty() || needle.length() > str.length())
  207. return false;
  208. if (needle.is_empty())
  209. return true;
  210. auto str_chars = str.characters_without_null_termination();
  211. auto needle_chars = needle.characters_without_null_termination();
  212. if (case_sensitivity == CaseSensitivity::CaseSensitive)
  213. return memmem(str_chars, str.length(), needle_chars, needle.length()) != nullptr;
  214. auto needle_first = to_lowercase(needle_chars[0]);
  215. for (size_t si = 0; si < str.length(); si++) {
  216. if (to_lowercase(str_chars[si]) != needle_first)
  217. continue;
  218. for (size_t ni = 0; si + ni < str.length(); ni++) {
  219. if (to_lowercase(str_chars[si + ni]) != to_lowercase(needle_chars[ni])) {
  220. si += ni;
  221. break;
  222. }
  223. if (ni + 1 == needle.length())
  224. return true;
  225. }
  226. }
  227. return false;
  228. }
  229. StringView trim_whitespace(const StringView& str, TrimMode mode)
  230. {
  231. auto is_whitespace_character = [](char ch) -> bool {
  232. return ch == '\t'
  233. || ch == '\n'
  234. || ch == '\v'
  235. || ch == '\f'
  236. || ch == '\r'
  237. || ch == ' ';
  238. };
  239. size_t substring_start = 0;
  240. size_t substring_length = str.length();
  241. if (mode == TrimMode::Left || mode == TrimMode::Both) {
  242. for (size_t i = 0; i < str.length(); ++i) {
  243. if (substring_length == 0)
  244. return "";
  245. if (!is_whitespace_character(str[i]))
  246. break;
  247. ++substring_start;
  248. --substring_length;
  249. }
  250. }
  251. if (mode == TrimMode::Right || mode == TrimMode::Both) {
  252. for (size_t i = str.length() - 1; i > 0; --i) {
  253. if (substring_length == 0)
  254. return "";
  255. if (!is_whitespace_character(str[i]))
  256. break;
  257. --substring_length;
  258. }
  259. }
  260. return str.substring_view(substring_start, substring_length);
  261. }
  262. }
  263. }