String.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ByteBuffer.h>
  7. #include <AK/FlyString.h>
  8. #include <AK/Format.h>
  9. #include <AK/Function.h>
  10. #include <AK/Memory.h>
  11. #include <AK/StdLibExtras.h>
  12. #include <AK/String.h>
  13. #include <AK/StringView.h>
  14. #include <AK/Vector.h>
  15. namespace AK {
  16. bool String::operator==(FlyString const& fly_string) const
  17. {
  18. return m_impl == fly_string.impl() || view() == fly_string.view();
  19. }
  20. bool String::operator==(String const& other) const
  21. {
  22. return m_impl == other.impl() || view() == other.view();
  23. }
  24. bool String::operator==(StringView other) const
  25. {
  26. return view() == other;
  27. }
  28. bool String::operator<(String const& other) const
  29. {
  30. return view() < other.view();
  31. }
  32. bool String::operator>(String const& other) const
  33. {
  34. return view() > other.view();
  35. }
  36. bool String::copy_characters_to_buffer(char* buffer, size_t buffer_size) const
  37. {
  38. // We must fit at least the NUL-terminator.
  39. VERIFY(buffer_size > 0);
  40. size_t characters_to_copy = min(length(), buffer_size - 1);
  41. __builtin_memcpy(buffer, characters(), characters_to_copy);
  42. buffer[characters_to_copy] = 0;
  43. return characters_to_copy == length();
  44. }
  45. String String::isolated_copy() const
  46. {
  47. if (!m_impl)
  48. return {};
  49. if (!m_impl->length())
  50. return empty();
  51. char* buffer;
  52. auto impl = StringImpl::create_uninitialized(length(), buffer);
  53. memcpy(buffer, m_impl->characters(), m_impl->length());
  54. return String(move(*impl));
  55. }
  56. String String::substring(size_t start, size_t length) const
  57. {
  58. if (!length)
  59. return String::empty();
  60. VERIFY(m_impl);
  61. VERIFY(!Checked<size_t>::addition_would_overflow(start, length));
  62. VERIFY(start + length <= m_impl->length());
  63. return { characters() + start, length };
  64. }
  65. String String::substring(size_t start) const
  66. {
  67. VERIFY(m_impl);
  68. VERIFY(start <= length());
  69. return { characters() + start, length() - start };
  70. }
  71. StringView String::substring_view(size_t start, size_t length) const
  72. {
  73. VERIFY(m_impl);
  74. VERIFY(!Checked<size_t>::addition_would_overflow(start, length));
  75. VERIFY(start + length <= m_impl->length());
  76. return { characters() + start, length };
  77. }
  78. StringView String::substring_view(size_t start) const
  79. {
  80. VERIFY(m_impl);
  81. VERIFY(start <= length());
  82. return { characters() + start, length() - start };
  83. }
  84. Vector<String> String::split(char separator, bool keep_empty) const
  85. {
  86. return split_limit(separator, 0, keep_empty);
  87. }
  88. Vector<String> String::split_limit(char separator, size_t limit, bool keep_empty) const
  89. {
  90. if (is_empty())
  91. return {};
  92. Vector<String> v;
  93. size_t substart = 0;
  94. for (size_t i = 0; i < length() && (v.size() + 1) != limit; ++i) {
  95. char ch = characters()[i];
  96. if (ch == separator) {
  97. size_t sublen = i - substart;
  98. if (sublen != 0 || keep_empty)
  99. v.append(substring(substart, sublen));
  100. substart = i + 1;
  101. }
  102. }
  103. size_t taillen = length() - substart;
  104. if (taillen != 0 || keep_empty)
  105. v.append(substring(substart, taillen));
  106. return v;
  107. }
  108. Vector<StringView> String::split_view(Function<bool(char)> separator, bool keep_empty) const
  109. {
  110. if (is_empty())
  111. return {};
  112. Vector<StringView> v;
  113. size_t substart = 0;
  114. for (size_t i = 0; i < length(); ++i) {
  115. char ch = characters()[i];
  116. if (separator(ch)) {
  117. size_t sublen = i - substart;
  118. if (sublen != 0 || keep_empty)
  119. v.append(substring_view(substart, sublen));
  120. substart = i + 1;
  121. }
  122. }
  123. size_t taillen = length() - substart;
  124. if (taillen != 0 || keep_empty)
  125. v.append(substring_view(substart, taillen));
  126. return v;
  127. }
  128. Vector<StringView> String::split_view(char const separator, bool keep_empty) const
  129. {
  130. return split_view([separator](char ch) { return ch == separator; }, keep_empty);
  131. }
  132. ByteBuffer String::to_byte_buffer() const
  133. {
  134. if (!m_impl)
  135. return {};
  136. // FIXME: Handle OOM failure.
  137. return ByteBuffer::copy(bytes()).release_value_but_fixme_should_propagate_errors();
  138. }
  139. template<typename T>
  140. Optional<T> String::to_int(TrimWhitespace trim_whitespace) const
  141. {
  142. return StringUtils::convert_to_int<T>(view(), trim_whitespace);
  143. }
  144. template Optional<i8> String::to_int(TrimWhitespace) const;
  145. template Optional<i16> String::to_int(TrimWhitespace) const;
  146. template Optional<i32> String::to_int(TrimWhitespace) const;
  147. template Optional<i64> String::to_int(TrimWhitespace) const;
  148. template<typename T>
  149. Optional<T> String::to_uint(TrimWhitespace trim_whitespace) const
  150. {
  151. return StringUtils::convert_to_uint<T>(view(), trim_whitespace);
  152. }
  153. template Optional<u8> String::to_uint(TrimWhitespace) const;
  154. template Optional<u16> String::to_uint(TrimWhitespace) const;
  155. template Optional<u32> String::to_uint(TrimWhitespace) const;
  156. template Optional<unsigned long> String::to_uint(TrimWhitespace) const;
  157. template Optional<unsigned long long> String::to_uint(TrimWhitespace) const;
  158. bool String::starts_with(StringView str, CaseSensitivity case_sensitivity) const
  159. {
  160. return StringUtils::starts_with(*this, str, case_sensitivity);
  161. }
  162. bool String::starts_with(char ch) const
  163. {
  164. if (is_empty())
  165. return false;
  166. return characters()[0] == ch;
  167. }
  168. bool String::ends_with(StringView str, CaseSensitivity case_sensitivity) const
  169. {
  170. return StringUtils::ends_with(*this, str, case_sensitivity);
  171. }
  172. bool String::ends_with(char ch) const
  173. {
  174. if (is_empty())
  175. return false;
  176. return characters()[length() - 1] == ch;
  177. }
  178. String String::repeated(char ch, size_t count)
  179. {
  180. if (!count)
  181. return empty();
  182. char* buffer;
  183. auto impl = StringImpl::create_uninitialized(count, buffer);
  184. memset(buffer, ch, count);
  185. return *impl;
  186. }
  187. String String::repeated(StringView string, size_t count)
  188. {
  189. if (!count || string.is_empty())
  190. return empty();
  191. char* buffer;
  192. auto impl = StringImpl::create_uninitialized(count * string.length(), buffer);
  193. for (size_t i = 0; i < count; i++)
  194. __builtin_memcpy(buffer + i * string.length(), string.characters_without_null_termination(), string.length());
  195. return *impl;
  196. }
  197. String String::bijective_base_from(size_t value, unsigned base, StringView map)
  198. {
  199. if (map.is_null())
  200. map = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv;
  201. VERIFY(base >= 2 && base <= map.length());
  202. // The '8 bits per byte' assumption may need to go?
  203. Array<char, round_up_to_power_of_two(sizeof(size_t) * 8 + 1, 2)> buffer;
  204. size_t i = 0;
  205. do {
  206. buffer[i++] = map[value % base];
  207. value /= base;
  208. } while (value > 0);
  209. // NOTE: Weird as this may seem, the thing that comes after 'Z' is 'AA', which as a number would be '00'
  210. // to make this work, only the most significant digit has to be in a range of (1..25) as opposed to (0..25),
  211. // but only if it's not the only digit in the string.
  212. if (i > 1)
  213. --buffer[i - 1];
  214. for (size_t j = 0; j < i / 2; ++j)
  215. swap(buffer[j], buffer[i - j - 1]);
  216. return String { ReadonlyBytes(buffer.data(), i) };
  217. }
  218. String String::roman_number_from(size_t value)
  219. {
  220. if (value > 3999)
  221. return String::number(value);
  222. StringBuilder builder;
  223. while (value > 0) {
  224. if (value >= 1000) {
  225. builder.append('M');
  226. value -= 1000;
  227. } else if (value >= 900) {
  228. builder.append("CM"sv);
  229. value -= 900;
  230. } else if (value >= 500) {
  231. builder.append('D');
  232. value -= 500;
  233. } else if (value >= 400) {
  234. builder.append("CD"sv);
  235. value -= 400;
  236. } else if (value >= 100) {
  237. builder.append('C');
  238. value -= 100;
  239. } else if (value >= 90) {
  240. builder.append("XC"sv);
  241. value -= 90;
  242. } else if (value >= 50) {
  243. builder.append('L');
  244. value -= 50;
  245. } else if (value >= 40) {
  246. builder.append("XL"sv);
  247. value -= 40;
  248. } else if (value >= 10) {
  249. builder.append('X');
  250. value -= 10;
  251. } else if (value == 9) {
  252. builder.append("IX"sv);
  253. value -= 9;
  254. } else if (value >= 5 && value <= 8) {
  255. builder.append('V');
  256. value -= 5;
  257. } else if (value == 4) {
  258. builder.append("IV"sv);
  259. value -= 4;
  260. } else if (value <= 3) {
  261. builder.append('I');
  262. value -= 1;
  263. }
  264. }
  265. return builder.to_string();
  266. }
  267. bool String::matches(StringView mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
  268. {
  269. return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
  270. }
  271. bool String::matches(StringView mask, CaseSensitivity case_sensitivity) const
  272. {
  273. return StringUtils::matches(*this, mask, case_sensitivity);
  274. }
  275. bool String::contains(StringView needle, CaseSensitivity case_sensitivity) const
  276. {
  277. return StringUtils::contains(*this, needle, case_sensitivity);
  278. }
  279. bool String::contains(char needle, CaseSensitivity case_sensitivity) const
  280. {
  281. return StringUtils::contains(*this, StringView(&needle, 1), case_sensitivity);
  282. }
  283. bool String::equals_ignoring_case(StringView other) const
  284. {
  285. return StringUtils::equals_ignoring_case(view(), other);
  286. }
  287. String String::reverse() const
  288. {
  289. StringBuilder reversed_string(length());
  290. for (size_t i = length(); i-- > 0;) {
  291. reversed_string.append(characters()[i]);
  292. }
  293. return reversed_string.to_string();
  294. }
  295. String escape_html_entities(StringView html)
  296. {
  297. StringBuilder builder;
  298. for (size_t i = 0; i < html.length(); ++i) {
  299. if (html[i] == '<')
  300. builder.append("&lt;");
  301. else if (html[i] == '>')
  302. builder.append("&gt;");
  303. else if (html[i] == '&')
  304. builder.append("&amp;");
  305. else if (html[i] == '"')
  306. builder.append("&quot;");
  307. else
  308. builder.append(html[i]);
  309. }
  310. return builder.to_string();
  311. }
  312. String::String(FlyString const& string)
  313. : m_impl(string.impl())
  314. {
  315. }
  316. String String::to_lowercase() const
  317. {
  318. if (!m_impl)
  319. return {};
  320. return m_impl->to_lowercase();
  321. }
  322. String String::to_uppercase() const
  323. {
  324. if (!m_impl)
  325. return {};
  326. return m_impl->to_uppercase();
  327. }
  328. String String::to_snakecase() const
  329. {
  330. return StringUtils::to_snakecase(*this);
  331. }
  332. String String::to_titlecase() const
  333. {
  334. return StringUtils::to_titlecase(*this);
  335. }
  336. bool operator<(char const* characters, String const& string)
  337. {
  338. return string.view() > characters;
  339. }
  340. bool operator>=(char const* characters, String const& string)
  341. {
  342. return string.view() <= characters;
  343. }
  344. bool operator>(char const* characters, String const& string)
  345. {
  346. return string.view() < characters;
  347. }
  348. bool operator<=(char const* characters, String const& string)
  349. {
  350. return string.view() >= characters;
  351. }
  352. bool String::operator==(char const* cstring) const
  353. {
  354. return view() == cstring;
  355. }
  356. InputStream& operator>>(InputStream& stream, String& string)
  357. {
  358. StringBuilder builder;
  359. for (;;) {
  360. char next_char;
  361. stream >> next_char;
  362. if (stream.has_any_error()) {
  363. stream.set_fatal_error();
  364. string = nullptr;
  365. return stream;
  366. }
  367. if (next_char) {
  368. builder.append(next_char);
  369. } else {
  370. string = builder.to_string();
  371. return stream;
  372. }
  373. }
  374. }
  375. String String::vformatted(StringView fmtstr, TypeErasedFormatParams& params)
  376. {
  377. StringBuilder builder;
  378. MUST(vformat(builder, fmtstr, params));
  379. return builder.to_string();
  380. }
  381. Vector<size_t> String::find_all(StringView needle) const
  382. {
  383. return StringUtils::find_all(*this, needle);
  384. }
  385. }