123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504 |
- /*
- * Copyright (c) 2018-2020, Andreas Kling <awesomekling@gmail.com>
- * Copyright (c) 2020, Fei Wu <f.eiwu@yahoo.com>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include <AK/CharacterTypes.h>
- #include <AK/MemMem.h>
- #include <AK/Memory.h>
- #include <AK/Optional.h>
- #include <AK/String.h>
- #include <AK/StringBuilder.h>
- #include <AK/StringUtils.h>
- #include <AK/StringView.h>
- #include <AK/Vector.h>
- namespace AK {
- namespace StringUtils {
- bool matches(StringView str, StringView mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans)
- {
- auto record_span = [&match_spans](size_t start, size_t length) {
- if (match_spans)
- match_spans->append({ start, length });
- };
- if (str.is_null() || mask.is_null())
- return str.is_null() && mask.is_null();
- if (mask == "*"sv) {
- record_span(0, str.length());
- return true;
- }
- const char* string_ptr = str.characters_without_null_termination();
- const char* string_start = str.characters_without_null_termination();
- const char* string_end = string_ptr + str.length();
- const char* mask_ptr = mask.characters_without_null_termination();
- const char* mask_end = mask_ptr + mask.length();
- while (string_ptr < string_end && mask_ptr < mask_end) {
- auto string_start_ptr = string_ptr;
- switch (*mask_ptr) {
- case '*':
- if (mask_ptr == mask_end - 1) {
- record_span(string_ptr - string_start, string_end - string_ptr);
- return true;
- }
- while (string_ptr < string_end && !matches({ string_ptr, static_cast<size_t>(string_end - string_ptr) }, { mask_ptr + 1, static_cast<size_t>(mask_end - mask_ptr - 1) }, case_sensitivity))
- ++string_ptr;
- record_span(string_start_ptr - string_start, string_ptr - string_start_ptr);
- --string_ptr;
- break;
- case '?':
- record_span(string_ptr - string_start, 1);
- break;
- default:
- auto p = *mask_ptr;
- auto ch = *string_ptr;
- if (case_sensitivity == CaseSensitivity::CaseSensitive ? p != ch : to_ascii_lowercase(p) != to_ascii_lowercase(ch))
- return false;
- break;
- }
- ++string_ptr;
- ++mask_ptr;
- }
- if (string_ptr == string_end) {
- // Allow ending '*' to contain nothing.
- while (mask_ptr != mask_end && *mask_ptr == '*') {
- record_span(string_ptr - string_start, 0);
- ++mask_ptr;
- }
- }
- return string_ptr == string_end && mask_ptr == mask_end;
- }
- template<typename T>
- Optional<T> convert_to_int(StringView str, TrimWhitespace trim_whitespace)
- {
- auto string = trim_whitespace == TrimWhitespace::Yes
- ? str.trim_whitespace()
- : str;
- if (string.is_empty())
- return {};
- T sign = 1;
- size_t i = 0;
- const auto characters = string.characters_without_null_termination();
- if (characters[0] == '-' || characters[0] == '+') {
- if (string.length() == 1)
- return {};
- i++;
- if (characters[0] == '-')
- sign = -1;
- }
- T value = 0;
- for (; i < string.length(); i++) {
- if (characters[i] < '0' || characters[i] > '9')
- return {};
- if (__builtin_mul_overflow(value, 10, &value))
- return {};
- if (__builtin_add_overflow(value, sign * (characters[i] - '0'), &value))
- return {};
- }
- return value;
- }
- template Optional<i8> convert_to_int(StringView str, TrimWhitespace);
- template Optional<i16> convert_to_int(StringView str, TrimWhitespace);
- template Optional<i32> convert_to_int(StringView str, TrimWhitespace);
- template Optional<long> convert_to_int(StringView str, TrimWhitespace);
- template Optional<long long> convert_to_int(StringView str, TrimWhitespace);
- template<typename T>
- Optional<T> convert_to_uint(StringView str, TrimWhitespace trim_whitespace)
- {
- auto string = trim_whitespace == TrimWhitespace::Yes
- ? str.trim_whitespace()
- : str;
- if (string.is_empty())
- return {};
- T value = 0;
- const auto characters = string.characters_without_null_termination();
- for (size_t i = 0; i < string.length(); i++) {
- if (characters[i] < '0' || characters[i] > '9')
- return {};
- if (__builtin_mul_overflow(value, 10, &value))
- return {};
- if (__builtin_add_overflow(value, characters[i] - '0', &value))
- return {};
- }
- return value;
- }
- template Optional<u8> convert_to_uint(StringView str, TrimWhitespace);
- template Optional<u16> convert_to_uint(StringView str, TrimWhitespace);
- template Optional<u32> convert_to_uint(StringView str, TrimWhitespace);
- template Optional<unsigned long> convert_to_uint(StringView str, TrimWhitespace);
- template Optional<unsigned long long> convert_to_uint(StringView str, TrimWhitespace);
- template Optional<long> convert_to_uint(StringView str, TrimWhitespace);
- template Optional<long long> convert_to_uint(StringView str, TrimWhitespace);
- template<typename T>
- Optional<T> convert_to_uint_from_hex(StringView str, TrimWhitespace trim_whitespace)
- {
- auto string = trim_whitespace == TrimWhitespace::Yes
- ? str.trim_whitespace()
- : str;
- if (string.is_empty())
- return {};
- T value = 0;
- const auto count = string.length();
- const T upper_bound = NumericLimits<T>::max();
- for (size_t i = 0; i < count; i++) {
- char digit = string[i];
- u8 digit_val;
- if (value > (upper_bound >> 4))
- return {};
- if (digit >= '0' && digit <= '9') {
- digit_val = digit - '0';
- } else if (digit >= 'a' && digit <= 'f') {
- digit_val = 10 + (digit - 'a');
- } else if (digit >= 'A' && digit <= 'F') {
- digit_val = 10 + (digit - 'A');
- } else {
- return {};
- }
- value = (value << 4) + digit_val;
- }
- return value;
- }
- template Optional<u8> convert_to_uint_from_hex(StringView str, TrimWhitespace);
- template Optional<u16> convert_to_uint_from_hex(StringView str, TrimWhitespace);
- template Optional<u32> convert_to_uint_from_hex(StringView str, TrimWhitespace);
- template Optional<u64> convert_to_uint_from_hex(StringView str, TrimWhitespace);
- template<typename T>
- Optional<T> convert_to_uint_from_octal(StringView str, TrimWhitespace trim_whitespace)
- {
- auto string = trim_whitespace == TrimWhitespace::Yes
- ? str.trim_whitespace()
- : str;
- if (string.is_empty())
- return {};
- T value = 0;
- const auto count = string.length();
- const T upper_bound = NumericLimits<T>::max();
- for (size_t i = 0; i < count; i++) {
- char digit = string[i];
- u8 digit_val;
- if (value > (upper_bound >> 3))
- return {};
- if (digit >= '0' && digit <= '7') {
- digit_val = digit - '0';
- } else {
- return {};
- }
- value = (value << 3) + digit_val;
- }
- return value;
- }
- template Optional<u8> convert_to_uint_from_octal(StringView str, TrimWhitespace);
- template Optional<u16> convert_to_uint_from_octal(StringView str, TrimWhitespace);
- template Optional<u32> convert_to_uint_from_octal(StringView str, TrimWhitespace);
- template Optional<u64> convert_to_uint_from_octal(StringView str, TrimWhitespace);
- bool equals_ignoring_case(StringView a, StringView b)
- {
- if (a.length() != b.length())
- return false;
- for (size_t i = 0; i < a.length(); ++i) {
- if (to_ascii_lowercase(a.characters_without_null_termination()[i]) != to_ascii_lowercase(b.characters_without_null_termination()[i]))
- return false;
- }
- return true;
- }
- bool ends_with(StringView str, StringView end, CaseSensitivity case_sensitivity)
- {
- if (end.is_empty())
- return true;
- if (str.is_empty())
- return false;
- if (end.length() > str.length())
- return false;
- if (case_sensitivity == CaseSensitivity::CaseSensitive)
- return !memcmp(str.characters_without_null_termination() + (str.length() - end.length()), end.characters_without_null_termination(), end.length());
- auto str_chars = str.characters_without_null_termination();
- auto end_chars = end.characters_without_null_termination();
- size_t si = str.length() - end.length();
- for (size_t ei = 0; ei < end.length(); ++si, ++ei) {
- if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(end_chars[ei]))
- return false;
- }
- return true;
- }
- bool starts_with(StringView str, StringView start, CaseSensitivity case_sensitivity)
- {
- if (start.is_empty())
- return true;
- if (str.is_empty())
- return false;
- if (start.length() > str.length())
- return false;
- if (str.characters_without_null_termination() == start.characters_without_null_termination())
- return true;
- if (case_sensitivity == CaseSensitivity::CaseSensitive)
- return !memcmp(str.characters_without_null_termination(), start.characters_without_null_termination(), start.length());
- auto str_chars = str.characters_without_null_termination();
- auto start_chars = start.characters_without_null_termination();
- size_t si = 0;
- for (size_t starti = 0; starti < start.length(); ++si, ++starti) {
- if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(start_chars[starti]))
- return false;
- }
- return true;
- }
- bool contains(StringView str, StringView needle, CaseSensitivity case_sensitivity)
- {
- if (str.is_null() || needle.is_null() || str.is_empty() || needle.length() > str.length())
- return false;
- if (needle.is_empty())
- return true;
- auto str_chars = str.characters_without_null_termination();
- auto needle_chars = needle.characters_without_null_termination();
- if (case_sensitivity == CaseSensitivity::CaseSensitive)
- return memmem(str_chars, str.length(), needle_chars, needle.length()) != nullptr;
- auto needle_first = to_ascii_lowercase(needle_chars[0]);
- for (size_t si = 0; si < str.length(); si++) {
- if (to_ascii_lowercase(str_chars[si]) != needle_first)
- continue;
- for (size_t ni = 0; si + ni < str.length(); ni++) {
- if (to_ascii_lowercase(str_chars[si + ni]) != to_ascii_lowercase(needle_chars[ni])) {
- si += ni;
- break;
- }
- if (ni + 1 == needle.length())
- return true;
- }
- }
- return false;
- }
- bool is_whitespace(StringView str)
- {
- return all_of(str, is_ascii_space);
- }
- StringView trim(StringView str, StringView characters, TrimMode mode)
- {
- size_t substring_start = 0;
- size_t substring_length = str.length();
- if (mode == TrimMode::Left || mode == TrimMode::Both) {
- for (size_t i = 0; i < str.length(); ++i) {
- if (substring_length == 0)
- return "";
- if (!characters.contains(str[i]))
- break;
- ++substring_start;
- --substring_length;
- }
- }
- if (mode == TrimMode::Right || mode == TrimMode::Both) {
- for (size_t i = str.length() - 1; i > 0; --i) {
- if (substring_length == 0)
- return "";
- if (!characters.contains(str[i]))
- break;
- --substring_length;
- }
- }
- return str.substring_view(substring_start, substring_length);
- }
- StringView trim_whitespace(StringView str, TrimMode mode)
- {
- return trim(str, " \n\t\v\f\r", mode);
- }
- Optional<size_t> find(StringView haystack, char needle, size_t start)
- {
- if (start >= haystack.length())
- return {};
- for (size_t i = start; i < haystack.length(); ++i) {
- if (haystack[i] == needle)
- return i;
- }
- return {};
- }
- Optional<size_t> find(StringView haystack, StringView needle, size_t start)
- {
- if (start > haystack.length())
- return {};
- auto index = AK::memmem_optional(
- haystack.characters_without_null_termination() + start, haystack.length() - start,
- needle.characters_without_null_termination(), needle.length());
- return index.has_value() ? (*index + start) : index;
- }
- Optional<size_t> find_last(StringView haystack, char needle)
- {
- for (size_t i = haystack.length(); i > 0; --i) {
- if (haystack[i - 1] == needle)
- return i - 1;
- }
- return {};
- }
- Vector<size_t> find_all(StringView haystack, StringView needle)
- {
- Vector<size_t> positions;
- size_t current_position = 0;
- while (current_position <= haystack.length()) {
- auto maybe_position = AK::memmem_optional(
- haystack.characters_without_null_termination() + current_position, haystack.length() - current_position,
- needle.characters_without_null_termination(), needle.length());
- if (!maybe_position.has_value())
- break;
- positions.append(current_position + *maybe_position);
- current_position += *maybe_position + 1;
- }
- return positions;
- }
- Optional<size_t> find_any_of(StringView haystack, StringView needles, SearchDirection direction)
- {
- if (haystack.is_empty() || needles.is_empty())
- return {};
- if (direction == SearchDirection::Forward) {
- for (size_t i = 0; i < haystack.length(); ++i) {
- if (needles.contains(haystack[i]))
- return i;
- }
- } else if (direction == SearchDirection::Backward) {
- for (size_t i = haystack.length(); i > 0; --i) {
- if (needles.contains(haystack[i - 1]))
- return i - 1;
- }
- }
- return {};
- }
- String to_snakecase(StringView str)
- {
- auto should_insert_underscore = [&](auto i, auto current_char) {
- if (i == 0)
- return false;
- auto previous_ch = str[i - 1];
- if (is_ascii_lower_alpha(previous_ch) && is_ascii_upper_alpha(current_char))
- return true;
- if (i >= str.length() - 1)
- return false;
- auto next_ch = str[i + 1];
- if (is_ascii_upper_alpha(current_char) && is_ascii_lower_alpha(next_ch))
- return true;
- return false;
- };
- StringBuilder builder;
- for (size_t i = 0; i < str.length(); ++i) {
- auto ch = str[i];
- if (should_insert_underscore(i, ch))
- builder.append('_');
- builder.append_as_lowercase(ch);
- }
- return builder.to_string();
- }
- String to_titlecase(StringView str)
- {
- StringBuilder builder;
- bool next_is_upper = true;
- for (auto ch : str) {
- if (next_is_upper)
- builder.append_code_point(to_ascii_uppercase(ch));
- else
- builder.append_code_point(to_ascii_lowercase(ch));
- next_is_upper = ch == ' ';
- }
- return builder.to_string();
- }
- String replace(StringView str, StringView needle, StringView replacement, bool all_occurrences)
- {
- if (str.is_empty())
- return str;
- Vector<size_t> positions;
- if (all_occurrences) {
- positions = str.find_all(needle);
- if (!positions.size())
- return str;
- } else {
- auto pos = str.find(needle);
- if (!pos.has_value())
- return str;
- positions.append(pos.value());
- }
- StringBuilder replaced_string;
- size_t last_position = 0;
- for (auto& position : positions) {
- replaced_string.append(str.substring_view(last_position, position - last_position));
- replaced_string.append(replacement);
- last_position = position + needle.length();
- }
- replaced_string.append(str.substring_view(last_position, str.length() - last_position));
- return replaced_string.build();
- }
- // TODO: Benchmark against KMP (AK/MemMem.h) and switch over if it's faster for short strings too
- size_t count(StringView str, StringView needle)
- {
- if (needle.is_empty())
- return str.length();
- size_t count = 0;
- for (size_t i = 0; i < str.length() - needle.length() + 1; ++i) {
- if (str.substring_view(i).starts_with(needle))
- count++;
- }
- return count;
- }
- }
- }
|