StringUtils.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <awesomekling@gmail.com>
  3. * Copyright (c) 2020, Fei Wu <f.eiwu@yahoo.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/MemMem.h>
  8. #include <AK/Memory.h>
  9. #include <AK/Optional.h>
  10. #include <AK/String.h>
  11. #include <AK/StringBuilder.h>
  12. #include <AK/StringUtils.h>
  13. #include <AK/StringView.h>
  14. #include <AK/Vector.h>
  15. #include <ctype.h>
  16. namespace AK {
  17. namespace StringUtils {
  18. bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans)
  19. {
  20. auto record_span = [&match_spans](size_t start, size_t length) {
  21. if (match_spans)
  22. match_spans->append({ start, length });
  23. };
  24. if (str.is_null() || mask.is_null())
  25. return str.is_null() && mask.is_null();
  26. if (mask == "*") {
  27. record_span(0, str.length());
  28. return true;
  29. }
  30. const char* string_ptr = str.characters_without_null_termination();
  31. const char* string_start = str.characters_without_null_termination();
  32. const char* string_end = string_ptr + str.length();
  33. const char* mask_ptr = mask.characters_without_null_termination();
  34. const char* mask_end = mask_ptr + mask.length();
  35. auto matches_one = [](char ch, char p, CaseSensitivity case_sensitivity) {
  36. if (p == '?')
  37. return true;
  38. if (ch == 0)
  39. return false;
  40. if (case_sensitivity == CaseSensitivity::CaseSensitive)
  41. return p == ch;
  42. return tolower(p) == tolower(ch);
  43. };
  44. while (string_ptr < string_end && mask_ptr < mask_end) {
  45. auto string_start_ptr = string_ptr;
  46. switch (*mask_ptr) {
  47. case '*':
  48. if (mask_ptr[1] == 0) {
  49. record_span(string_ptr - string_start, string_end - string_ptr);
  50. return true;
  51. }
  52. while (string_ptr < string_end && !matches(string_ptr, mask_ptr + 1, case_sensitivity))
  53. ++string_ptr;
  54. record_span(string_start_ptr - string_start, string_ptr - string_start_ptr);
  55. --string_ptr;
  56. break;
  57. case '?':
  58. record_span(string_ptr - string_start, 1);
  59. break;
  60. default:
  61. if (!matches_one(*string_ptr, *mask_ptr, case_sensitivity))
  62. return false;
  63. break;
  64. }
  65. ++string_ptr;
  66. ++mask_ptr;
  67. }
  68. if (string_ptr == string_end) {
  69. // Allow ending '*' to contain nothing.
  70. while (mask_ptr != mask_end && *mask_ptr == '*') {
  71. record_span(string_ptr - string_start, 0);
  72. ++mask_ptr;
  73. }
  74. }
  75. return string_ptr == string_end && mask_ptr == mask_end;
  76. }
  77. template<typename T>
  78. Optional<T> convert_to_int(const StringView& str, TrimWhitespace trim_whitespace)
  79. {
  80. auto string = trim_whitespace == TrimWhitespace::Yes
  81. ? str.trim_whitespace()
  82. : str;
  83. if (string.is_empty())
  84. return {};
  85. T sign = 1;
  86. size_t i = 0;
  87. const auto characters = string.characters_without_null_termination();
  88. if (characters[0] == '-' || characters[0] == '+') {
  89. if (string.length() == 1)
  90. return {};
  91. i++;
  92. if (characters[0] == '-')
  93. sign = -1;
  94. }
  95. T value = 0;
  96. for (; i < string.length(); i++) {
  97. if (characters[i] < '0' || characters[i] > '9')
  98. return {};
  99. if (__builtin_mul_overflow(value, 10, &value))
  100. return {};
  101. if (__builtin_add_overflow(value, sign * (characters[i] - '0'), &value))
  102. return {};
  103. }
  104. return value;
  105. }
  106. template Optional<i8> convert_to_int(const StringView& str, TrimWhitespace);
  107. template Optional<i16> convert_to_int(const StringView& str, TrimWhitespace);
  108. template Optional<i32> convert_to_int(const StringView& str, TrimWhitespace);
  109. template Optional<i64> convert_to_int(const StringView& str, TrimWhitespace);
  110. template<typename T>
  111. Optional<T> convert_to_uint(const StringView& str, TrimWhitespace trim_whitespace)
  112. {
  113. auto string = trim_whitespace == TrimWhitespace::Yes
  114. ? str.trim_whitespace()
  115. : str;
  116. if (string.is_empty())
  117. return {};
  118. T value = 0;
  119. const auto characters = string.characters_without_null_termination();
  120. for (size_t i = 0; i < string.length(); i++) {
  121. if (characters[i] < '0' || characters[i] > '9')
  122. return {};
  123. if (__builtin_mul_overflow(value, 10, &value))
  124. return {};
  125. if (__builtin_add_overflow(value, characters[i] - '0', &value))
  126. return {};
  127. }
  128. return value;
  129. }
  130. template Optional<u8> convert_to_uint(const StringView& str, TrimWhitespace);
  131. template Optional<u16> convert_to_uint(const StringView& str, TrimWhitespace);
  132. template Optional<u32> convert_to_uint(const StringView& str, TrimWhitespace);
  133. template Optional<u64> convert_to_uint(const StringView& str, TrimWhitespace);
  134. template Optional<long> convert_to_uint(const StringView& str, TrimWhitespace);
  135. template Optional<long long> convert_to_uint(const StringView& str, TrimWhitespace);
  136. template<typename T>
  137. Optional<T> convert_to_uint_from_hex(const StringView& str, TrimWhitespace trim_whitespace)
  138. {
  139. auto string = trim_whitespace == TrimWhitespace::Yes
  140. ? str.trim_whitespace()
  141. : str;
  142. if (string.is_empty())
  143. return {};
  144. T value = 0;
  145. const auto count = string.length();
  146. const T upper_bound = NumericLimits<T>::max();
  147. for (size_t i = 0; i < count; i++) {
  148. char digit = string[i];
  149. u8 digit_val;
  150. if (value > (upper_bound >> 4))
  151. return {};
  152. if (digit >= '0' && digit <= '9') {
  153. digit_val = digit - '0';
  154. } else if (digit >= 'a' && digit <= 'f') {
  155. digit_val = 10 + (digit - 'a');
  156. } else if (digit >= 'A' && digit <= 'F') {
  157. digit_val = 10 + (digit - 'A');
  158. } else {
  159. return {};
  160. }
  161. value = (value << 4) + digit_val;
  162. }
  163. return value;
  164. }
  165. template Optional<u8> convert_to_uint_from_hex(const StringView& str, TrimWhitespace);
  166. template Optional<u16> convert_to_uint_from_hex(const StringView& str, TrimWhitespace);
  167. template Optional<u32> convert_to_uint_from_hex(const StringView& str, TrimWhitespace);
  168. template Optional<u64> convert_to_uint_from_hex(const StringView& str, TrimWhitespace);
  169. static inline char to_lowercase(char c)
  170. {
  171. if (c >= 'A' && c <= 'Z')
  172. return c | 0x20;
  173. return c;
  174. }
  175. bool equals_ignoring_case(const StringView& a, const StringView& b)
  176. {
  177. if (a.length() != b.length())
  178. return false;
  179. for (size_t i = 0; i < a.length(); ++i) {
  180. if (to_lowercase(a.characters_without_null_termination()[i]) != to_lowercase(b.characters_without_null_termination()[i]))
  181. return false;
  182. }
  183. return true;
  184. }
  185. bool ends_with(const StringView& str, const StringView& end, CaseSensitivity case_sensitivity)
  186. {
  187. if (end.is_empty())
  188. return true;
  189. if (str.is_empty())
  190. return false;
  191. if (end.length() > str.length())
  192. return false;
  193. if (case_sensitivity == CaseSensitivity::CaseSensitive)
  194. return !memcmp(str.characters_without_null_termination() + (str.length() - end.length()), end.characters_without_null_termination(), end.length());
  195. auto str_chars = str.characters_without_null_termination();
  196. auto end_chars = end.characters_without_null_termination();
  197. size_t si = str.length() - end.length();
  198. for (size_t ei = 0; ei < end.length(); ++si, ++ei) {
  199. if (to_lowercase(str_chars[si]) != to_lowercase(end_chars[ei]))
  200. return false;
  201. }
  202. return true;
  203. }
  204. bool starts_with(const StringView& str, const StringView& start, CaseSensitivity case_sensitivity)
  205. {
  206. if (start.is_empty())
  207. return true;
  208. if (str.is_empty())
  209. return false;
  210. if (start.length() > str.length())
  211. return false;
  212. if (str.characters_without_null_termination() == start.characters_without_null_termination())
  213. return true;
  214. if (case_sensitivity == CaseSensitivity::CaseSensitive)
  215. return !memcmp(str.characters_without_null_termination(), start.characters_without_null_termination(), start.length());
  216. auto str_chars = str.characters_without_null_termination();
  217. auto start_chars = start.characters_without_null_termination();
  218. size_t si = 0;
  219. for (size_t starti = 0; starti < start.length(); ++si, ++starti) {
  220. if (to_lowercase(str_chars[si]) != to_lowercase(start_chars[starti]))
  221. return false;
  222. }
  223. return true;
  224. }
  225. bool contains(const StringView& str, const StringView& needle, CaseSensitivity case_sensitivity)
  226. {
  227. if (str.is_null() || needle.is_null() || str.is_empty() || needle.length() > str.length())
  228. return false;
  229. if (needle.is_empty())
  230. return true;
  231. auto str_chars = str.characters_without_null_termination();
  232. auto needle_chars = needle.characters_without_null_termination();
  233. if (case_sensitivity == CaseSensitivity::CaseSensitive)
  234. return memmem(str_chars, str.length(), needle_chars, needle.length()) != nullptr;
  235. auto needle_first = to_lowercase(needle_chars[0]);
  236. for (size_t si = 0; si < str.length(); si++) {
  237. if (to_lowercase(str_chars[si]) != needle_first)
  238. continue;
  239. for (size_t ni = 0; si + ni < str.length(); ni++) {
  240. if (to_lowercase(str_chars[si + ni]) != to_lowercase(needle_chars[ni])) {
  241. si += ni;
  242. break;
  243. }
  244. if (ni + 1 == needle.length())
  245. return true;
  246. }
  247. }
  248. return false;
  249. }
  250. bool is_whitespace(const StringView& str)
  251. {
  252. for (auto ch : str) {
  253. if (!isspace(ch))
  254. return false;
  255. }
  256. return true;
  257. }
  258. StringView trim(const StringView& str, const StringView& characters, TrimMode mode)
  259. {
  260. size_t substring_start = 0;
  261. size_t substring_length = str.length();
  262. if (mode == TrimMode::Left || mode == TrimMode::Both) {
  263. for (size_t i = 0; i < str.length(); ++i) {
  264. if (substring_length == 0)
  265. return "";
  266. if (!characters.contains(str[i]))
  267. break;
  268. ++substring_start;
  269. --substring_length;
  270. }
  271. }
  272. if (mode == TrimMode::Right || mode == TrimMode::Both) {
  273. for (size_t i = str.length() - 1; i > 0; --i) {
  274. if (substring_length == 0)
  275. return "";
  276. if (!characters.contains(str[i]))
  277. break;
  278. --substring_length;
  279. }
  280. }
  281. return str.substring_view(substring_start, substring_length);
  282. }
  283. StringView trim_whitespace(const StringView& str, TrimMode mode)
  284. {
  285. return trim(str, " \n\t\v\f\r", mode);
  286. }
  287. Optional<size_t> find(StringView const& haystack, char needle, size_t start)
  288. {
  289. if (start >= haystack.length())
  290. return {};
  291. for (size_t i = start; i < haystack.length(); ++i) {
  292. if (haystack[i] == needle)
  293. return i;
  294. }
  295. return {};
  296. }
  297. Optional<size_t> find(StringView const& haystack, StringView const& needle, size_t start)
  298. {
  299. if (start > haystack.length())
  300. return {};
  301. auto index = AK::memmem_optional(
  302. haystack.characters_without_null_termination() + start, haystack.length() - start,
  303. needle.characters_without_null_termination(), needle.length());
  304. return index.has_value() ? (*index + start) : index;
  305. }
  306. Optional<size_t> find_last(StringView const& haystack, char needle)
  307. {
  308. for (size_t i = haystack.length(); i > 0; --i) {
  309. if (haystack[i - 1] == needle)
  310. return i - 1;
  311. }
  312. return {};
  313. }
  314. Vector<size_t> find_all(StringView const& haystack, StringView const& needle)
  315. {
  316. Vector<size_t> positions;
  317. size_t current_position = 0;
  318. while (current_position <= haystack.length()) {
  319. auto maybe_position = AK::memmem_optional(
  320. haystack.characters_without_null_termination() + current_position, haystack.length() - current_position,
  321. needle.characters_without_null_termination(), needle.length());
  322. if (!maybe_position.has_value())
  323. break;
  324. positions.append(current_position + *maybe_position);
  325. current_position += *maybe_position + 1;
  326. }
  327. return positions;
  328. }
  329. Optional<size_t> find_any_of(StringView const& haystack, StringView const& needles, SearchDirection direction)
  330. {
  331. if (haystack.is_empty() || needles.is_empty())
  332. return {};
  333. if (direction == SearchDirection::Forward) {
  334. for (size_t i = 0; i < haystack.length(); ++i) {
  335. if (needles.contains(haystack[i]))
  336. return i;
  337. }
  338. } else if (direction == SearchDirection::Backward) {
  339. for (size_t i = haystack.length(); i > 0; --i) {
  340. if (needles.contains(haystack[i - 1]))
  341. return i - 1;
  342. }
  343. }
  344. return {};
  345. }
  346. String to_snakecase(const StringView& str)
  347. {
  348. auto should_insert_underscore = [&](auto i, auto current_char) {
  349. if (i == 0)
  350. return false;
  351. auto previous_ch = str[i - 1];
  352. if (islower(previous_ch) && isupper(current_char))
  353. return true;
  354. if (i >= str.length() - 1)
  355. return false;
  356. auto next_ch = str[i + 1];
  357. if (isupper(current_char) && islower(next_ch))
  358. return true;
  359. return false;
  360. };
  361. StringBuilder builder;
  362. for (size_t i = 0; i < str.length(); ++i) {
  363. auto ch = str[i];
  364. if (should_insert_underscore(i, ch))
  365. builder.append('_');
  366. builder.append(tolower(ch));
  367. }
  368. return builder.to_string();
  369. }
  370. }
  371. }