String.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ByteBuffer.h>
  7. #include <AK/FlyString.h>
  8. #include <AK/Format.h>
  9. #include <AK/Memory.h>
  10. #include <AK/StdLibExtras.h>
  11. #include <AK/String.h>
  12. #include <AK/StringView.h>
  13. #include <AK/Vector.h>
  14. namespace AK {
  15. bool String::operator==(const FlyString& fly_string) const
  16. {
  17. return *this == String(fly_string.impl());
  18. }
  19. bool String::operator==(const String& other) const
  20. {
  21. if (!m_impl)
  22. return !other.m_impl;
  23. if (!other.m_impl)
  24. return false;
  25. return *m_impl == *other.m_impl;
  26. }
  27. bool String::operator==(StringView other) const
  28. {
  29. if (!m_impl)
  30. return !other.m_characters;
  31. if (!other.m_characters)
  32. return false;
  33. if (length() != other.length())
  34. return false;
  35. return !memcmp(characters(), other.characters_without_null_termination(), length());
  36. }
  37. bool String::operator<(const String& other) const
  38. {
  39. if (!m_impl)
  40. return other.m_impl;
  41. if (!other.m_impl)
  42. return false;
  43. return strcmp(characters(), other.characters()) < 0;
  44. }
  45. bool String::operator>(const String& other) const
  46. {
  47. if (!m_impl)
  48. return other.m_impl;
  49. if (!other.m_impl)
  50. return false;
  51. return strcmp(characters(), other.characters()) > 0;
  52. }
  53. bool String::copy_characters_to_buffer(char* buffer, size_t buffer_size) const
  54. {
  55. // We must fit at least the NUL-terminator.
  56. VERIFY(buffer_size > 0);
  57. size_t characters_to_copy = min(length(), buffer_size - 1);
  58. __builtin_memcpy(buffer, characters(), characters_to_copy);
  59. buffer[characters_to_copy] = 0;
  60. return characters_to_copy == length();
  61. }
  62. String String::isolated_copy() const
  63. {
  64. if (!m_impl)
  65. return {};
  66. if (!m_impl->length())
  67. return empty();
  68. char* buffer;
  69. auto impl = StringImpl::create_uninitialized(length(), buffer);
  70. memcpy(buffer, m_impl->characters(), m_impl->length());
  71. return String(move(*impl));
  72. }
  73. String String::substring(size_t start, size_t length) const
  74. {
  75. if (!length)
  76. return String::empty();
  77. VERIFY(m_impl);
  78. VERIFY(!Checked<size_t>::addition_would_overflow(start, length));
  79. VERIFY(start + length <= m_impl->length());
  80. return { characters() + start, length };
  81. }
  82. String String::substring(size_t start) const
  83. {
  84. VERIFY(m_impl);
  85. VERIFY(start <= length());
  86. return { characters() + start, length() - start };
  87. }
  88. StringView String::substring_view(size_t start, size_t length) const
  89. {
  90. VERIFY(m_impl);
  91. VERIFY(!Checked<size_t>::addition_would_overflow(start, length));
  92. VERIFY(start + length <= m_impl->length());
  93. return { characters() + start, length };
  94. }
  95. StringView String::substring_view(size_t start) const
  96. {
  97. VERIFY(m_impl);
  98. VERIFY(start <= length());
  99. return { characters() + start, length() - start };
  100. }
  101. Vector<String> String::split(char separator, bool keep_empty) const
  102. {
  103. return split_limit(separator, 0, keep_empty);
  104. }
  105. Vector<String> String::split_limit(char separator, size_t limit, bool keep_empty) const
  106. {
  107. if (is_empty())
  108. return {};
  109. Vector<String> v;
  110. size_t substart = 0;
  111. for (size_t i = 0; i < length() && (v.size() + 1) != limit; ++i) {
  112. char ch = characters()[i];
  113. if (ch == separator) {
  114. size_t sublen = i - substart;
  115. if (sublen != 0 || keep_empty)
  116. v.append(substring(substart, sublen));
  117. substart = i + 1;
  118. }
  119. }
  120. size_t taillen = length() - substart;
  121. if (taillen != 0 || keep_empty)
  122. v.append(substring(substart, taillen));
  123. return v;
  124. }
  125. Vector<StringView> String::split_view(const char separator, bool keep_empty) const
  126. {
  127. if (is_empty())
  128. return {};
  129. Vector<StringView> v;
  130. size_t substart = 0;
  131. for (size_t i = 0; i < length(); ++i) {
  132. char ch = characters()[i];
  133. if (ch == separator) {
  134. size_t sublen = i - substart;
  135. if (sublen != 0 || keep_empty)
  136. v.append(substring_view(substart, sublen));
  137. substart = i + 1;
  138. }
  139. }
  140. size_t taillen = length() - substart;
  141. if (taillen != 0 || keep_empty)
  142. v.append(substring_view(substart, taillen));
  143. return v;
  144. }
  145. ByteBuffer String::to_byte_buffer() const
  146. {
  147. if (!m_impl)
  148. return {};
  149. // FIXME: Handle OOM failure.
  150. return ByteBuffer::copy(bytes()).release_value();
  151. }
  152. template<typename T>
  153. Optional<T> String::to_int(TrimWhitespace trim_whitespace) const
  154. {
  155. return StringUtils::convert_to_int<T>(view(), trim_whitespace);
  156. }
  157. template Optional<i8> String::to_int(TrimWhitespace) const;
  158. template Optional<i16> String::to_int(TrimWhitespace) const;
  159. template Optional<i32> String::to_int(TrimWhitespace) const;
  160. template Optional<i64> String::to_int(TrimWhitespace) const;
  161. template<typename T>
  162. Optional<T> String::to_uint(TrimWhitespace trim_whitespace) const
  163. {
  164. return StringUtils::convert_to_uint<T>(view(), trim_whitespace);
  165. }
  166. template Optional<u8> String::to_uint(TrimWhitespace) const;
  167. template Optional<u16> String::to_uint(TrimWhitespace) const;
  168. template Optional<u32> String::to_uint(TrimWhitespace) const;
  169. template Optional<u64> String::to_uint(TrimWhitespace) const;
  170. bool String::starts_with(StringView str, CaseSensitivity case_sensitivity) const
  171. {
  172. return StringUtils::starts_with(*this, str, case_sensitivity);
  173. }
  174. bool String::starts_with(char ch) const
  175. {
  176. if (is_empty())
  177. return false;
  178. return characters()[0] == ch;
  179. }
  180. bool String::ends_with(StringView str, CaseSensitivity case_sensitivity) const
  181. {
  182. return StringUtils::ends_with(*this, str, case_sensitivity);
  183. }
  184. bool String::ends_with(char ch) const
  185. {
  186. if (is_empty())
  187. return false;
  188. return characters()[length() - 1] == ch;
  189. }
  190. String String::repeated(char ch, size_t count)
  191. {
  192. if (!count)
  193. return empty();
  194. char* buffer;
  195. auto impl = StringImpl::create_uninitialized(count, buffer);
  196. memset(buffer, ch, count);
  197. return *impl;
  198. }
  199. String String::repeated(StringView string, size_t count)
  200. {
  201. if (!count || string.is_empty())
  202. return empty();
  203. char* buffer;
  204. auto impl = StringImpl::create_uninitialized(count * string.length(), buffer);
  205. for (size_t i = 0; i < count; i++)
  206. __builtin_memcpy(buffer + i * string.length(), string.characters_without_null_termination(), string.length());
  207. return *impl;
  208. }
  209. String String::bijective_base_from(size_t value, unsigned base, StringView map)
  210. {
  211. if (map.is_null())
  212. map = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv;
  213. VERIFY(base >= 2 && base <= map.length());
  214. // The '8 bits per byte' assumption may need to go?
  215. Array<char, round_up_to_power_of_two(sizeof(size_t) * 8 + 1, 2)> buffer;
  216. size_t i = 0;
  217. do {
  218. buffer[i++] = map[value % base];
  219. value /= base;
  220. } while (value > 0);
  221. // NOTE: Weird as this may seem, the thing that comes after 'Z' is 'AA', which as a number would be '00'
  222. // to make this work, only the most significant digit has to be in a range of (1..25) as opposed to (0..25),
  223. // but only if it's not the only digit in the string.
  224. if (i > 1)
  225. --buffer[i - 1];
  226. for (size_t j = 0; j < i / 2; ++j)
  227. swap(buffer[j], buffer[i - j - 1]);
  228. return String { ReadonlyBytes(buffer.data(), i) };
  229. }
  230. String String::roman_number_from(size_t value)
  231. {
  232. if (value > 3999)
  233. return String::number(value);
  234. StringBuilder builder;
  235. while (value > 0) {
  236. if (value >= 1000) {
  237. builder.append('M');
  238. value -= 1000;
  239. } else if (value >= 900) {
  240. builder.append("CM"sv);
  241. value -= 900;
  242. } else if (value >= 500) {
  243. builder.append('D');
  244. value -= 500;
  245. } else if (value >= 400) {
  246. builder.append("CD"sv);
  247. value -= 400;
  248. } else if (value >= 100) {
  249. builder.append('C');
  250. value -= 100;
  251. } else if (value >= 90) {
  252. builder.append("XC"sv);
  253. value -= 90;
  254. } else if (value >= 50) {
  255. builder.append('L');
  256. value -= 50;
  257. } else if (value >= 40) {
  258. builder.append("XL"sv);
  259. value -= 40;
  260. } else if (value >= 10) {
  261. builder.append('X');
  262. value -= 10;
  263. } else if (value == 9) {
  264. builder.append("IX"sv);
  265. value -= 9;
  266. } else if (value >= 5 && value <= 8) {
  267. builder.append('V');
  268. value -= 5;
  269. } else if (value == 4) {
  270. builder.append("IV"sv);
  271. value -= 4;
  272. } else if (value <= 3) {
  273. builder.append('I');
  274. value -= 1;
  275. }
  276. }
  277. return builder.to_string();
  278. }
  279. bool String::matches(StringView mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
  280. {
  281. return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
  282. }
  283. bool String::matches(StringView mask, CaseSensitivity case_sensitivity) const
  284. {
  285. return StringUtils::matches(*this, mask, case_sensitivity);
  286. }
  287. bool String::contains(StringView needle, CaseSensitivity case_sensitivity) const
  288. {
  289. return StringUtils::contains(*this, needle, case_sensitivity);
  290. }
  291. bool String::contains(char needle, CaseSensitivity case_sensitivity) const
  292. {
  293. return StringUtils::contains(*this, StringView(&needle, 1), case_sensitivity);
  294. }
  295. bool String::equals_ignoring_case(StringView other) const
  296. {
  297. return StringUtils::equals_ignoring_case(view(), other);
  298. }
  299. String String::reverse() const
  300. {
  301. StringBuilder reversed_string(length());
  302. for (size_t i = length(); i-- > 0;) {
  303. reversed_string.append(characters()[i]);
  304. }
  305. return reversed_string.to_string();
  306. }
  307. String escape_html_entities(StringView html)
  308. {
  309. StringBuilder builder;
  310. for (size_t i = 0; i < html.length(); ++i) {
  311. if (html[i] == '<')
  312. builder.append("&lt;");
  313. else if (html[i] == '>')
  314. builder.append("&gt;");
  315. else if (html[i] == '&')
  316. builder.append("&amp;");
  317. else if (html[i] == '"')
  318. builder.append("&quot;");
  319. else
  320. builder.append(html[i]);
  321. }
  322. return builder.to_string();
  323. }
  324. String::String(const FlyString& string)
  325. : m_impl(string.impl())
  326. {
  327. }
  328. String String::to_lowercase() const
  329. {
  330. if (!m_impl)
  331. return {};
  332. return m_impl->to_lowercase();
  333. }
  334. String String::to_uppercase() const
  335. {
  336. if (!m_impl)
  337. return {};
  338. return m_impl->to_uppercase();
  339. }
  340. String String::to_snakecase() const
  341. {
  342. return StringUtils::to_snakecase(*this);
  343. }
  344. String String::to_titlecase() const
  345. {
  346. return StringUtils::to_titlecase(*this);
  347. }
  348. bool operator<(const char* characters, const String& string)
  349. {
  350. if (!characters)
  351. return !string.is_null();
  352. if (string.is_null())
  353. return false;
  354. return __builtin_strcmp(characters, string.characters()) < 0;
  355. }
  356. bool operator>=(const char* characters, const String& string)
  357. {
  358. return !(characters < string);
  359. }
  360. bool operator>(const char* characters, const String& string)
  361. {
  362. if (!characters)
  363. return !string.is_null();
  364. if (string.is_null())
  365. return false;
  366. return __builtin_strcmp(characters, string.characters()) > 0;
  367. }
  368. bool operator<=(const char* characters, const String& string)
  369. {
  370. return !(characters > string);
  371. }
  372. bool String::operator==(const char* cstring) const
  373. {
  374. if (is_null())
  375. return !cstring;
  376. if (!cstring)
  377. return false;
  378. return !__builtin_strcmp(characters(), cstring);
  379. }
  380. InputStream& operator>>(InputStream& stream, String& string)
  381. {
  382. StringBuilder builder;
  383. for (;;) {
  384. char next_char;
  385. stream >> next_char;
  386. if (stream.has_any_error()) {
  387. stream.set_fatal_error();
  388. string = nullptr;
  389. return stream;
  390. }
  391. if (next_char) {
  392. builder.append(next_char);
  393. } else {
  394. string = builder.to_string();
  395. return stream;
  396. }
  397. }
  398. }
  399. String String::vformatted(StringView fmtstr, TypeErasedFormatParams& params)
  400. {
  401. StringBuilder builder;
  402. MUST(vformat(builder, fmtstr, params));
  403. return builder.to_string();
  404. }
  405. Vector<size_t> String::find_all(StringView needle) const
  406. {
  407. return StringUtils::find_all(*this, needle);
  408. }
  409. }