String.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ByteBuffer.h>
  7. #include <AK/FlyString.h>
  8. #include <AK/Format.h>
  9. #include <AK/Memory.h>
  10. #include <AK/StdLibExtras.h>
  11. #include <AK/String.h>
  12. #include <AK/StringView.h>
  13. #include <AK/Vector.h>
  14. namespace AK {
  15. bool String::operator==(const FlyString& fly_string) const
  16. {
  17. return *this == String(fly_string.impl());
  18. }
  19. bool String::operator==(const String& other) const
  20. {
  21. if (!m_impl)
  22. return !other.m_impl;
  23. if (!other.m_impl)
  24. return false;
  25. return *m_impl == *other.m_impl;
  26. }
  27. bool String::operator==(const StringView& other) const
  28. {
  29. if (!m_impl)
  30. return !other.m_characters;
  31. if (!other.m_characters)
  32. return false;
  33. if (length() != other.length())
  34. return false;
  35. return !memcmp(characters(), other.characters_without_null_termination(), length());
  36. }
  37. bool String::operator<(const String& other) const
  38. {
  39. if (!m_impl)
  40. return other.m_impl;
  41. if (!other.m_impl)
  42. return false;
  43. return strcmp(characters(), other.characters()) < 0;
  44. }
  45. bool String::operator>(const String& other) const
  46. {
  47. if (!m_impl)
  48. return other.m_impl;
  49. if (!other.m_impl)
  50. return false;
  51. return strcmp(characters(), other.characters()) > 0;
  52. }
  53. bool String::copy_characters_to_buffer(char* buffer, size_t buffer_size) const
  54. {
  55. // We must fit at least the NUL-terminator.
  56. VERIFY(buffer_size > 0);
  57. size_t characters_to_copy = min(length(), buffer_size - 1);
  58. __builtin_memcpy(buffer, characters(), characters_to_copy);
  59. buffer[characters_to_copy] = 0;
  60. return characters_to_copy == length();
  61. }
  62. String String::isolated_copy() const
  63. {
  64. if (!m_impl)
  65. return {};
  66. if (!m_impl->length())
  67. return empty();
  68. char* buffer;
  69. auto impl = StringImpl::create_uninitialized(length(), buffer);
  70. memcpy(buffer, m_impl->characters(), m_impl->length());
  71. return String(move(*impl));
  72. }
  73. String String::substring(size_t start, size_t length) const
  74. {
  75. if (!length)
  76. return String::empty();
  77. VERIFY(m_impl);
  78. VERIFY(!Checked<size_t>::addition_would_overflow(start, length));
  79. VERIFY(start + length <= m_impl->length());
  80. return { characters() + start, length };
  81. }
  82. String String::substring(size_t start) const
  83. {
  84. VERIFY(m_impl);
  85. VERIFY(start <= length());
  86. return { characters() + start, length() - start };
  87. }
  88. StringView String::substring_view(size_t start, size_t length) const
  89. {
  90. VERIFY(m_impl);
  91. VERIFY(!Checked<size_t>::addition_would_overflow(start, length));
  92. VERIFY(start + length <= m_impl->length());
  93. return { characters() + start, length };
  94. }
  95. StringView String::substring_view(size_t start) const
  96. {
  97. VERIFY(m_impl);
  98. VERIFY(start <= length());
  99. return { characters() + start, length() - start };
  100. }
  101. Vector<String> String::split(char separator, bool keep_empty) const
  102. {
  103. return split_limit(separator, 0, keep_empty);
  104. }
  105. Vector<String> String::split_limit(char separator, size_t limit, bool keep_empty) const
  106. {
  107. if (is_empty())
  108. return {};
  109. Vector<String> v;
  110. size_t substart = 0;
  111. for (size_t i = 0; i < length() && (v.size() + 1) != limit; ++i) {
  112. char ch = characters()[i];
  113. if (ch == separator) {
  114. size_t sublen = i - substart;
  115. if (sublen != 0 || keep_empty)
  116. v.append(substring(substart, sublen));
  117. substart = i + 1;
  118. }
  119. }
  120. size_t taillen = length() - substart;
  121. if (taillen != 0 || keep_empty)
  122. v.append(substring(substart, taillen));
  123. return v;
  124. }
  125. Vector<StringView> String::split_view(const char separator, bool keep_empty) const
  126. {
  127. if (is_empty())
  128. return {};
  129. Vector<StringView> v;
  130. size_t substart = 0;
  131. for (size_t i = 0; i < length(); ++i) {
  132. char ch = characters()[i];
  133. if (ch == separator) {
  134. size_t sublen = i - substart;
  135. if (sublen != 0 || keep_empty)
  136. v.append(substring_view(substart, sublen));
  137. substart = i + 1;
  138. }
  139. }
  140. size_t taillen = length() - substart;
  141. if (taillen != 0 || keep_empty)
  142. v.append(substring_view(substart, taillen));
  143. return v;
  144. }
  145. ByteBuffer String::to_byte_buffer() const
  146. {
  147. if (!m_impl)
  148. return {};
  149. // FIXME: Handle OOM failure.
  150. return ByteBuffer::copy(bytes()).release_value();
  151. }
  152. template<typename T>
  153. Optional<T> String::to_int(TrimWhitespace trim_whitespace) const
  154. {
  155. return StringUtils::convert_to_int<T>(view(), trim_whitespace);
  156. }
  157. template Optional<i8> String::to_int(TrimWhitespace) const;
  158. template Optional<i16> String::to_int(TrimWhitespace) const;
  159. template Optional<i32> String::to_int(TrimWhitespace) const;
  160. template Optional<i64> String::to_int(TrimWhitespace) const;
  161. template<typename T>
  162. Optional<T> String::to_uint(TrimWhitespace trim_whitespace) const
  163. {
  164. return StringUtils::convert_to_uint<T>(view(), trim_whitespace);
  165. }
  166. template Optional<u8> String::to_uint(TrimWhitespace) const;
  167. template Optional<u16> String::to_uint(TrimWhitespace) const;
  168. template Optional<u32> String::to_uint(TrimWhitespace) const;
  169. template Optional<u64> String::to_uint(TrimWhitespace) const;
  170. bool String::starts_with(const StringView& str, CaseSensitivity case_sensitivity) const
  171. {
  172. return StringUtils::starts_with(*this, str, case_sensitivity);
  173. }
  174. bool String::starts_with(char ch) const
  175. {
  176. if (is_empty())
  177. return false;
  178. return characters()[0] == ch;
  179. }
  180. bool String::ends_with(const StringView& str, CaseSensitivity case_sensitivity) const
  181. {
  182. return StringUtils::ends_with(*this, str, case_sensitivity);
  183. }
  184. bool String::ends_with(char ch) const
  185. {
  186. if (is_empty())
  187. return false;
  188. return characters()[length() - 1] == ch;
  189. }
  190. String String::repeated(char ch, size_t count)
  191. {
  192. if (!count)
  193. return empty();
  194. char* buffer;
  195. auto impl = StringImpl::create_uninitialized(count, buffer);
  196. memset(buffer, ch, count);
  197. return *impl;
  198. }
  199. String String::repeated(const StringView& string, size_t count)
  200. {
  201. if (!count || string.is_empty())
  202. return empty();
  203. char* buffer;
  204. auto impl = StringImpl::create_uninitialized(count * string.length(), buffer);
  205. for (size_t i = 0; i < count; i++)
  206. __builtin_memcpy(buffer + i * string.length(), string.characters_without_null_termination(), string.length());
  207. return *impl;
  208. }
  209. String String::bijective_base_from(size_t value, unsigned base, StringView map)
  210. {
  211. if (map.is_null())
  212. map = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv;
  213. VERIFY(base >= 2 && base <= map.length());
  214. // The '8 bits per byte' assumption may need to go?
  215. Array<char, round_up_to_power_of_two(sizeof(size_t) * 8 + 1, 2)> buffer;
  216. size_t i = 0;
  217. do {
  218. buffer[i++] = map[value % base];
  219. value /= base;
  220. } while (value > 0);
  221. // NOTE: Weird as this may seem, the thing that comes after 'Z' is 'AA', which as a number would be '00'
  222. // to make this work, only the most significant digit has to be in a range of (1..25) as opposed to (0..25),
  223. // but only if it's not the only digit in the string.
  224. if (i > 1)
  225. --buffer[i - 1];
  226. for (size_t j = 0; j < i / 2; ++j)
  227. swap(buffer[j], buffer[i - j - 1]);
  228. return String { ReadonlyBytes(buffer.data(), i) };
  229. }
  230. String String::roman_number_from(size_t value)
  231. {
  232. if (value > 3999)
  233. return String::number(value);
  234. StringBuilder builder;
  235. while (value > 0) {
  236. if (value >= 1000) {
  237. builder.append('M');
  238. value -= 1000;
  239. } else if (value >= 900) {
  240. builder.append("CM"sv);
  241. value -= 900;
  242. } else if (value >= 500) {
  243. builder.append('D');
  244. value -= 500;
  245. } else if (value >= 400) {
  246. builder.append("CD"sv);
  247. value -= 400;
  248. } else if (value >= 100) {
  249. builder.append('C');
  250. value -= 100;
  251. } else if (value >= 90) {
  252. builder.append("XC"sv);
  253. value -= 90;
  254. } else if (value >= 50) {
  255. builder.append('L');
  256. value -= 50;
  257. } else if (value >= 40) {
  258. builder.append("XL"sv);
  259. value -= 40;
  260. } else if (value >= 10) {
  261. builder.append('X');
  262. value -= 10;
  263. } else if (value == 9) {
  264. builder.append("IX"sv);
  265. value -= 9;
  266. } else if (value >= 5 && value <= 8) {
  267. builder.append('V');
  268. value -= 5;
  269. } else if (value == 4) {
  270. builder.append("IV"sv);
  271. value -= 4;
  272. } else if (value <= 3) {
  273. builder.append('I');
  274. value -= 1;
  275. }
  276. }
  277. return builder.to_string();
  278. }
  279. bool String::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
  280. {
  281. return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
  282. }
  283. bool String::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
  284. {
  285. return StringUtils::matches(*this, mask, case_sensitivity);
  286. }
  287. bool String::contains(const StringView& needle, CaseSensitivity case_sensitivity) const
  288. {
  289. return StringUtils::contains(*this, needle, case_sensitivity);
  290. }
  291. bool String::contains(char needle, CaseSensitivity case_sensitivity) const
  292. {
  293. return StringUtils::contains(*this, StringView(&needle, 1), case_sensitivity);
  294. }
  295. bool String::equals_ignoring_case(const StringView& other) const
  296. {
  297. return StringUtils::equals_ignoring_case(view(), other);
  298. }
  299. int String::replace(const String& needle, const String& replacement, bool all_occurrences)
  300. {
  301. if (is_empty())
  302. return 0;
  303. Vector<size_t> positions;
  304. if (all_occurrences) {
  305. positions = find_all(needle);
  306. } else {
  307. auto pos = find(needle);
  308. if (!pos.has_value())
  309. return 0;
  310. positions.append(pos.value());
  311. }
  312. if (!positions.size())
  313. return 0;
  314. StringBuilder b;
  315. size_t lastpos = 0;
  316. for (auto& pos : positions) {
  317. b.append(substring_view(lastpos, pos - lastpos));
  318. b.append(replacement);
  319. lastpos = pos + needle.length();
  320. }
  321. b.append(substring_view(lastpos, length() - lastpos));
  322. m_impl = StringImpl::create(b.build().characters());
  323. return positions.size();
  324. }
  325. size_t String::count(const String& needle) const
  326. {
  327. size_t count = 0;
  328. size_t start = 0, pos;
  329. for (;;) {
  330. const char* ptr = strstr(characters() + start, needle.characters());
  331. if (!ptr)
  332. break;
  333. pos = ptr - characters();
  334. count++;
  335. start = pos + 1;
  336. }
  337. return count;
  338. }
  339. String String::reverse() const
  340. {
  341. StringBuilder reversed_string(length());
  342. for (size_t i = length(); i-- > 0;) {
  343. reversed_string.append(characters()[i]);
  344. }
  345. return reversed_string.to_string();
  346. }
  347. String escape_html_entities(const StringView& html)
  348. {
  349. StringBuilder builder;
  350. for (size_t i = 0; i < html.length(); ++i) {
  351. if (html[i] == '<')
  352. builder.append("&lt;");
  353. else if (html[i] == '>')
  354. builder.append("&gt;");
  355. else if (html[i] == '&')
  356. builder.append("&amp;");
  357. else
  358. builder.append(html[i]);
  359. }
  360. return builder.to_string();
  361. }
  362. String::String(const FlyString& string)
  363. : m_impl(string.impl())
  364. {
  365. }
  366. String String::to_lowercase() const
  367. {
  368. if (!m_impl)
  369. return {};
  370. return m_impl->to_lowercase();
  371. }
  372. String String::to_uppercase() const
  373. {
  374. if (!m_impl)
  375. return {};
  376. return m_impl->to_uppercase();
  377. }
  378. String String::to_snakecase() const
  379. {
  380. return StringUtils::to_snakecase(*this);
  381. }
  382. String String::to_titlecase() const
  383. {
  384. return StringUtils::to_titlecase(*this);
  385. }
  386. bool operator<(const char* characters, const String& string)
  387. {
  388. if (!characters)
  389. return !string.is_null();
  390. if (string.is_null())
  391. return false;
  392. return __builtin_strcmp(characters, string.characters()) < 0;
  393. }
  394. bool operator>=(const char* characters, const String& string)
  395. {
  396. return !(characters < string);
  397. }
  398. bool operator>(const char* characters, const String& string)
  399. {
  400. if (!characters)
  401. return !string.is_null();
  402. if (string.is_null())
  403. return false;
  404. return __builtin_strcmp(characters, string.characters()) > 0;
  405. }
  406. bool operator<=(const char* characters, const String& string)
  407. {
  408. return !(characters > string);
  409. }
  410. bool String::operator==(const char* cstring) const
  411. {
  412. if (is_null())
  413. return !cstring;
  414. if (!cstring)
  415. return false;
  416. return !__builtin_strcmp(characters(), cstring);
  417. }
  418. InputStream& operator>>(InputStream& stream, String& string)
  419. {
  420. StringBuilder builder;
  421. for (;;) {
  422. char next_char;
  423. stream >> next_char;
  424. if (stream.has_any_error()) {
  425. stream.set_fatal_error();
  426. string = nullptr;
  427. return stream;
  428. }
  429. if (next_char) {
  430. builder.append(next_char);
  431. } else {
  432. string = builder.to_string();
  433. return stream;
  434. }
  435. }
  436. }
  437. String String::vformatted(StringView fmtstr, TypeErasedFormatParams& params)
  438. {
  439. StringBuilder builder;
  440. vformat(builder, fmtstr, params);
  441. return builder.to_string();
  442. }
  443. }