String.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ByteBuffer.h>
  7. #include <AK/FlyString.h>
  8. #include <AK/Format.h>
  9. #include <AK/Memory.h>
  10. #include <AK/StdLibExtras.h>
  11. #include <AK/String.h>
  12. #include <AK/StringView.h>
  13. #include <AK/Vector.h>
  14. namespace AK {
  15. bool String::operator==(const FlyString& fly_string) const
  16. {
  17. return *this == String(fly_string.impl());
  18. }
  19. bool String::operator==(const String& other) const
  20. {
  21. if (!m_impl)
  22. return !other.m_impl;
  23. if (!other.m_impl)
  24. return false;
  25. return *m_impl == *other.m_impl;
  26. }
  27. bool String::operator==(const StringView& other) const
  28. {
  29. if (!m_impl)
  30. return !other.m_characters;
  31. if (!other.m_characters)
  32. return false;
  33. if (length() != other.length())
  34. return false;
  35. return !memcmp(characters(), other.characters_without_null_termination(), length());
  36. }
  37. bool String::operator<(const String& other) const
  38. {
  39. if (!m_impl)
  40. return other.m_impl;
  41. if (!other.m_impl)
  42. return false;
  43. return strcmp(characters(), other.characters()) < 0;
  44. }
  45. bool String::operator>(const String& other) const
  46. {
  47. if (!m_impl)
  48. return other.m_impl;
  49. if (!other.m_impl)
  50. return false;
  51. return strcmp(characters(), other.characters()) > 0;
  52. }
  53. bool String::copy_characters_to_buffer(char* buffer, size_t buffer_size) const
  54. {
  55. // We must fit at least the NUL-terminator.
  56. VERIFY(buffer_size > 0);
  57. size_t characters_to_copy = min(length(), buffer_size - 1);
  58. __builtin_memcpy(buffer, characters(), characters_to_copy);
  59. buffer[characters_to_copy] = 0;
  60. return characters_to_copy == length();
  61. }
  62. String String::isolated_copy() const
  63. {
  64. if (!m_impl)
  65. return {};
  66. if (!m_impl->length())
  67. return empty();
  68. char* buffer;
  69. auto impl = StringImpl::create_uninitialized(length(), buffer);
  70. memcpy(buffer, m_impl->characters(), m_impl->length());
  71. return String(move(*impl));
  72. }
  73. String String::substring(size_t start) const
  74. {
  75. VERIFY(m_impl);
  76. VERIFY(start <= length());
  77. return { characters() + start, length() - start };
  78. }
  79. String String::substring(size_t start, size_t length) const
  80. {
  81. if (!length)
  82. return "";
  83. VERIFY(m_impl);
  84. VERIFY(start + length <= m_impl->length());
  85. // FIXME: This needs some input bounds checking.
  86. return { characters() + start, length };
  87. }
  88. StringView String::substring_view(size_t start, size_t length) const
  89. {
  90. VERIFY(m_impl);
  91. VERIFY(start + length <= m_impl->length());
  92. // FIXME: This needs some input bounds checking.
  93. return { characters() + start, length };
  94. }
  95. StringView String::substring_view(size_t start) const
  96. {
  97. VERIFY(m_impl);
  98. VERIFY(start <= length());
  99. return { characters() + start, length() - start };
  100. }
  101. Vector<String> String::split(char separator, bool keep_empty) const
  102. {
  103. return split_limit(separator, 0, keep_empty);
  104. }
  105. Vector<String> String::split_limit(char separator, size_t limit, bool keep_empty) const
  106. {
  107. if (is_empty())
  108. return {};
  109. Vector<String> v;
  110. size_t substart = 0;
  111. for (size_t i = 0; i < length() && (v.size() + 1) != limit; ++i) {
  112. char ch = characters()[i];
  113. if (ch == separator) {
  114. size_t sublen = i - substart;
  115. if (sublen != 0 || keep_empty)
  116. v.append(substring(substart, sublen));
  117. substart = i + 1;
  118. }
  119. }
  120. size_t taillen = length() - substart;
  121. if (taillen != 0 || keep_empty)
  122. v.append(substring(substart, taillen));
  123. return v;
  124. }
  125. Vector<StringView> String::split_view(const char separator, bool keep_empty) const
  126. {
  127. if (is_empty())
  128. return {};
  129. Vector<StringView> v;
  130. size_t substart = 0;
  131. for (size_t i = 0; i < length(); ++i) {
  132. char ch = characters()[i];
  133. if (ch == separator) {
  134. size_t sublen = i - substart;
  135. if (sublen != 0 || keep_empty)
  136. v.append(substring_view(substart, sublen));
  137. substart = i + 1;
  138. }
  139. }
  140. size_t taillen = length() - substart;
  141. if (taillen != 0 || keep_empty)
  142. v.append(substring_view(substart, taillen));
  143. return v;
  144. }
  145. ByteBuffer String::to_byte_buffer() const
  146. {
  147. if (!m_impl)
  148. return {};
  149. return ByteBuffer::copy(reinterpret_cast<const u8*>(characters()), length());
  150. }
  151. template<typename T>
  152. Optional<T> String::to_int(TrimWhitespace trim_whitespace) const
  153. {
  154. return StringUtils::convert_to_int<T>(view(), trim_whitespace);
  155. }
  156. template Optional<i8> String::to_int(TrimWhitespace) const;
  157. template Optional<i16> String::to_int(TrimWhitespace) const;
  158. template Optional<i32> String::to_int(TrimWhitespace) const;
  159. template Optional<i64> String::to_int(TrimWhitespace) const;
  160. template<typename T>
  161. Optional<T> String::to_uint(TrimWhitespace trim_whitespace) const
  162. {
  163. return StringUtils::convert_to_uint<T>(view(), trim_whitespace);
  164. }
  165. template Optional<u8> String::to_uint(TrimWhitespace) const;
  166. template Optional<u16> String::to_uint(TrimWhitespace) const;
  167. template Optional<u32> String::to_uint(TrimWhitespace) const;
  168. template Optional<u64> String::to_uint(TrimWhitespace) const;
  169. bool String::starts_with(const StringView& str, CaseSensitivity case_sensitivity) const
  170. {
  171. return StringUtils::starts_with(*this, str, case_sensitivity);
  172. }
  173. bool String::starts_with(char ch) const
  174. {
  175. if (is_empty())
  176. return false;
  177. return characters()[0] == ch;
  178. }
  179. bool String::ends_with(const StringView& str, CaseSensitivity case_sensitivity) const
  180. {
  181. return StringUtils::ends_with(*this, str, case_sensitivity);
  182. }
  183. bool String::ends_with(char ch) const
  184. {
  185. if (is_empty())
  186. return false;
  187. return characters()[length() - 1] == ch;
  188. }
  189. String String::repeated(char ch, size_t count)
  190. {
  191. if (!count)
  192. return empty();
  193. char* buffer;
  194. auto impl = StringImpl::create_uninitialized(count, buffer);
  195. memset(buffer, ch, count);
  196. return *impl;
  197. }
  198. String String::repeated(const StringView& string, size_t count)
  199. {
  200. if (!count || string.is_empty())
  201. return empty();
  202. char* buffer;
  203. auto impl = StringImpl::create_uninitialized(count * string.length(), buffer);
  204. for (size_t i = 0; i < count; i++)
  205. __builtin_memcpy(buffer + i * string.length(), string.characters_without_null_termination(), string.length());
  206. return *impl;
  207. }
  208. String String::bijective_base_from(size_t value, unsigned base, StringView map)
  209. {
  210. if (map.is_null())
  211. map = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv;
  212. VERIFY(base >= 2 && base <= map.length());
  213. // The '8 bits per byte' assumption may need to go?
  214. Array<char, round_up_to_power_of_two(sizeof(size_t) * 8 + 1, 2)> buffer;
  215. size_t i = 0;
  216. do {
  217. buffer[i++] = map[value % base];
  218. value /= base;
  219. } while (value > 0);
  220. // NOTE: Weird as this may seem, the thing that comes after 'Z' is 'AA', which as a number would be '00'
  221. // to make this work, only the most significant digit has to be in a range of (1..25) as opposed to (0..25),
  222. // but only if it's not the only digit in the string.
  223. if (i > 1)
  224. --buffer[i - 1];
  225. for (size_t j = 0; j < i / 2; ++j)
  226. swap(buffer[j], buffer[i - j - 1]);
  227. return String { ReadonlyBytes(buffer.data(), i) };
  228. }
  229. bool String::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
  230. {
  231. return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
  232. }
  233. bool String::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
  234. {
  235. return StringUtils::matches(*this, mask, case_sensitivity);
  236. }
  237. bool String::contains(const StringView& needle, CaseSensitivity case_sensitivity) const
  238. {
  239. return StringUtils::contains(*this, needle, case_sensitivity);
  240. }
  241. bool String::equals_ignoring_case(const StringView& other) const
  242. {
  243. return StringUtils::equals_ignoring_case(view(), other);
  244. }
  245. int String::replace(const String& needle, const String& replacement, bool all_occurrences)
  246. {
  247. if (is_empty())
  248. return 0;
  249. Vector<size_t> positions;
  250. if (all_occurrences) {
  251. positions = find_all(needle);
  252. } else {
  253. auto pos = find(needle);
  254. if (!pos.has_value())
  255. return 0;
  256. positions.append(pos.value());
  257. }
  258. if (!positions.size())
  259. return 0;
  260. StringBuilder b;
  261. size_t lastpos = 0;
  262. for (auto& pos : positions) {
  263. b.append(substring_view(lastpos, pos - lastpos));
  264. b.append(replacement);
  265. lastpos = pos + needle.length();
  266. }
  267. b.append(substring_view(lastpos, length() - lastpos));
  268. m_impl = StringImpl::create(b.build().characters());
  269. return positions.size();
  270. }
  271. size_t String::count(const String& needle) const
  272. {
  273. size_t count = 0;
  274. size_t start = 0, pos;
  275. for (;;) {
  276. const char* ptr = strstr(characters() + start, needle.characters());
  277. if (!ptr)
  278. break;
  279. pos = ptr - characters();
  280. count++;
  281. start = pos + 1;
  282. }
  283. return count;
  284. }
  285. String String::reverse() const
  286. {
  287. StringBuilder reversed_string(length());
  288. for (size_t i = length(); i-- > 0;) {
  289. reversed_string.append(characters()[i]);
  290. }
  291. return reversed_string.to_string();
  292. }
  293. String escape_html_entities(const StringView& html)
  294. {
  295. StringBuilder builder;
  296. for (size_t i = 0; i < html.length(); ++i) {
  297. if (html[i] == '<')
  298. builder.append("&lt;");
  299. else if (html[i] == '>')
  300. builder.append("&gt;");
  301. else if (html[i] == '&')
  302. builder.append("&amp;");
  303. else
  304. builder.append(html[i]);
  305. }
  306. return builder.to_string();
  307. }
  308. String::String(const FlyString& string)
  309. : m_impl(string.impl())
  310. {
  311. }
  312. String String::to_lowercase() const
  313. {
  314. if (!m_impl)
  315. return {};
  316. return m_impl->to_lowercase();
  317. }
  318. String String::to_uppercase() const
  319. {
  320. if (!m_impl)
  321. return {};
  322. return m_impl->to_uppercase();
  323. }
  324. String String::to_snakecase() const
  325. {
  326. return StringUtils::to_snakecase(*this);
  327. }
  328. bool operator<(const char* characters, const String& string)
  329. {
  330. if (!characters)
  331. return !string.is_null();
  332. if (string.is_null())
  333. return false;
  334. return __builtin_strcmp(characters, string.characters()) < 0;
  335. }
  336. bool operator>=(const char* characters, const String& string)
  337. {
  338. return !(characters < string);
  339. }
  340. bool operator>(const char* characters, const String& string)
  341. {
  342. if (!characters)
  343. return !string.is_null();
  344. if (string.is_null())
  345. return false;
  346. return __builtin_strcmp(characters, string.characters()) > 0;
  347. }
  348. bool operator<=(const char* characters, const String& string)
  349. {
  350. return !(characters > string);
  351. }
  352. bool String::operator==(const char* cstring) const
  353. {
  354. if (is_null())
  355. return !cstring;
  356. if (!cstring)
  357. return false;
  358. return !__builtin_strcmp(characters(), cstring);
  359. }
  360. InputStream& operator>>(InputStream& stream, String& string)
  361. {
  362. StringBuilder builder;
  363. for (;;) {
  364. char next_char;
  365. stream >> next_char;
  366. if (stream.has_any_error()) {
  367. stream.set_fatal_error();
  368. string = nullptr;
  369. return stream;
  370. }
  371. if (next_char) {
  372. builder.append(next_char);
  373. } else {
  374. string = builder.to_string();
  375. return stream;
  376. }
  377. }
  378. }
  379. String String::vformatted(StringView fmtstr, TypeErasedFormatParams params)
  380. {
  381. StringBuilder builder;
  382. vformat(builder, fmtstr, params);
  383. return builder.to_string();
  384. }
  385. }