String.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/FlyString.h>
  27. #include <AK/Memory.h>
  28. #include <AK/StdLibExtras.h>
  29. #include <AK/String.h>
  30. #include <AK/StringBuilder.h>
  31. #include <AK/StringView.h>
  32. #include <AK/Vector.h>
  33. #ifndef KERNEL
  34. # include <inttypes.h>
  35. #endif
  36. #ifdef KERNEL
  37. extern "C" char* strstr(const char* haystack, const char* needle);
  38. #endif
  39. namespace AK {
  40. String::String(const StringView& view)
  41. {
  42. if (view.m_impl)
  43. m_impl = *view.m_impl;
  44. else
  45. m_impl = StringImpl::create(view.characters_without_null_termination(), view.length());
  46. }
  47. bool String::operator==(const FlyString& fly_string) const
  48. {
  49. return *this == String(fly_string.impl());
  50. }
  51. bool String::operator==(const String& other) const
  52. {
  53. if (!m_impl)
  54. return !other.m_impl;
  55. if (!other.m_impl)
  56. return false;
  57. if (length() != other.length())
  58. return false;
  59. return !memcmp(characters(), other.characters(), length());
  60. }
  61. bool String::operator==(const StringView& other) const
  62. {
  63. if (!m_impl)
  64. return !other.m_characters;
  65. if (!other.m_characters)
  66. return false;
  67. if (length() != other.length())
  68. return false;
  69. return !memcmp(characters(), other.characters_without_null_termination(), length());
  70. }
  71. bool String::operator<(const String& other) const
  72. {
  73. if (!m_impl)
  74. return other.m_impl;
  75. if (!other.m_impl)
  76. return false;
  77. return strcmp(characters(), other.characters()) < 0;
  78. }
  79. bool String::operator>(const String& other) const
  80. {
  81. if (!m_impl)
  82. return other.m_impl;
  83. if (!other.m_impl)
  84. return false;
  85. return strcmp(characters(), other.characters()) > 0;
  86. }
  87. String String::empty()
  88. {
  89. return StringImpl::the_empty_stringimpl();
  90. }
  91. String String::isolated_copy() const
  92. {
  93. if (!m_impl)
  94. return {};
  95. if (!m_impl->length())
  96. return empty();
  97. char* buffer;
  98. auto impl = StringImpl::create_uninitialized(length(), buffer);
  99. memcpy(buffer, m_impl->characters(), m_impl->length());
  100. return String(move(*impl));
  101. }
  102. String String::substring(size_t start, size_t length) const
  103. {
  104. if (!length)
  105. return {};
  106. ASSERT(m_impl);
  107. ASSERT(start + length <= m_impl->length());
  108. // FIXME: This needs some input bounds checking.
  109. return { characters() + start, length };
  110. }
  111. StringView String::substring_view(size_t start, size_t length) const
  112. {
  113. ASSERT(m_impl);
  114. ASSERT(start + length <= m_impl->length());
  115. // FIXME: This needs some input bounds checking.
  116. return { characters() + start, length };
  117. }
  118. Vector<String> String::split(char separator, bool keep_empty) const
  119. {
  120. return split_limit(separator, 0, keep_empty);
  121. }
  122. Vector<String> String::split_limit(char separator, size_t limit, bool keep_empty) const
  123. {
  124. if (is_empty())
  125. return {};
  126. Vector<String> v;
  127. size_t substart = 0;
  128. for (size_t i = 0; i < length() && (v.size() + 1) != limit; ++i) {
  129. char ch = characters()[i];
  130. if (ch == separator) {
  131. size_t sublen = i - substart;
  132. if (sublen != 0 || keep_empty)
  133. v.append(substring(substart, sublen));
  134. substart = i + 1;
  135. }
  136. }
  137. size_t taillen = length() - substart;
  138. if (taillen != 0 || keep_empty)
  139. v.append(substring(substart, taillen));
  140. return v;
  141. }
  142. Vector<StringView> String::split_view(const char separator, bool keep_empty) const
  143. {
  144. if (is_empty())
  145. return {};
  146. Vector<StringView> v;
  147. size_t substart = 0;
  148. for (size_t i = 0; i < length(); ++i) {
  149. char ch = characters()[i];
  150. if (ch == separator) {
  151. size_t sublen = i - substart;
  152. if (sublen != 0 || keep_empty)
  153. v.append(substring_view(substart, sublen));
  154. substart = i + 1;
  155. }
  156. }
  157. size_t taillen = length() - substart;
  158. if (taillen != 0 || keep_empty)
  159. v.append(substring_view(substart, taillen));
  160. return v;
  161. }
  162. ByteBuffer String::to_byte_buffer() const
  163. {
  164. if (!m_impl)
  165. return nullptr;
  166. return ByteBuffer::copy(reinterpret_cast<const u8*>(characters()), length());
  167. }
  168. int String::to_int(bool& ok) const
  169. {
  170. return StringUtils::convert_to_int(this->view(), ok);
  171. }
  172. unsigned String::to_uint(bool& ok) const
  173. {
  174. return StringUtils::convert_to_uint(this->view(), ok);
  175. }
  176. String String::number(unsigned long long value)
  177. {
  178. int size;
  179. char buffer[32];
  180. size = sprintf(buffer, "%llu", value);
  181. return String(buffer, size);
  182. }
  183. String String::number(unsigned long value)
  184. {
  185. int size;
  186. char buffer[32];
  187. size = sprintf(buffer, "%lu", value);
  188. return String(buffer, size);
  189. }
  190. String String::number(unsigned value)
  191. {
  192. char buffer[32];
  193. int size = sprintf(buffer, "%u", value);
  194. return String(buffer, size);
  195. }
  196. String String::number(long long value)
  197. {
  198. char buffer[32];
  199. int size = sprintf(buffer, "%lld", value);
  200. return String(buffer, size);
  201. }
  202. String String::number(long value)
  203. {
  204. char buffer[32];
  205. int size = sprintf(buffer, "%ld", value);
  206. return String(buffer, size);
  207. }
  208. String String::number(int value)
  209. {
  210. char buffer[32];
  211. int size = sprintf(buffer, "%d", value);
  212. return String(buffer, size);
  213. }
  214. String String::format(const char* fmt, ...)
  215. {
  216. StringBuilder builder;
  217. va_list ap;
  218. va_start(ap, fmt);
  219. builder.appendvf(fmt, ap);
  220. va_end(ap);
  221. return builder.to_string();
  222. }
  223. bool String::starts_with(const StringView& str) const
  224. {
  225. if (str.is_empty())
  226. return true;
  227. if (is_empty())
  228. return false;
  229. if (str.length() > length())
  230. return false;
  231. return !memcmp(characters(), str.characters_without_null_termination(), str.length());
  232. }
  233. bool String::starts_with(char ch) const
  234. {
  235. if (is_empty())
  236. return false;
  237. return characters()[0] == ch;
  238. }
  239. bool String::ends_with(const StringView& str) const
  240. {
  241. if (str.is_empty())
  242. return true;
  243. if (is_empty())
  244. return false;
  245. if (str.length() > length())
  246. return false;
  247. return !memcmp(characters() + (length() - str.length()), str.characters_without_null_termination(), str.length());
  248. }
  249. bool String::ends_with(char ch) const
  250. {
  251. if (is_empty())
  252. return false;
  253. return characters()[length() - 1] == ch;
  254. }
  255. String String::repeated(char ch, size_t count)
  256. {
  257. if (!count)
  258. return empty();
  259. char* buffer;
  260. auto impl = StringImpl::create_uninitialized(count, buffer);
  261. memset(buffer, ch, count);
  262. return *impl;
  263. }
  264. bool String::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
  265. {
  266. return StringUtils::matches(*this, mask, case_sensitivity);
  267. }
  268. bool String::contains(const String& needle) const
  269. {
  270. if (is_null() || needle.is_null())
  271. return false;
  272. return strstr(characters(), needle.characters());
  273. }
  274. Optional<size_t> String::index_of(const String& needle) const
  275. {
  276. if (is_null() || needle.is_null())
  277. return {};
  278. const char* self_characters = characters();
  279. const char* result = strstr(self_characters, needle.characters());
  280. if (!result)
  281. return {};
  282. return Optional<size_t> { result - self_characters };
  283. }
  284. bool String::equals_ignoring_case(const StringView& other) const
  285. {
  286. return StringUtils::equals_ignoring_case(view(), other);
  287. }
  288. int String::replace(const String& needle, const String& replacement, bool all_occurences)
  289. {
  290. if (is_empty())
  291. return 0;
  292. Vector<size_t> positions;
  293. size_t start = 0, pos;
  294. for (;;) {
  295. const char* ptr = strstr(characters() + start, needle.characters());
  296. if (!ptr)
  297. break;
  298. pos = ptr - characters();
  299. positions.append(pos);
  300. if (!all_occurences)
  301. break;
  302. start = pos + 1;
  303. }
  304. if (!positions.size())
  305. return 0;
  306. StringBuilder b;
  307. size_t lastpos = 0;
  308. for (auto& pos : positions) {
  309. b.append(substring_view(lastpos, pos - lastpos));
  310. b.append(replacement);
  311. lastpos = pos + needle.length();
  312. }
  313. b.append(substring_view(lastpos, length() - lastpos));
  314. m_impl = StringImpl::create(b.build().characters());
  315. return positions.size();
  316. }
  317. String String::trim_whitespace(TrimMode mode) const
  318. {
  319. auto is_whitespace_character = [](char ch) -> bool {
  320. return ch == '\t'
  321. || ch == '\n'
  322. || ch == '\v'
  323. || ch == '\f'
  324. || ch == '\r'
  325. || ch == ' ';
  326. };
  327. size_t substring_start = 0;
  328. size_t substring_length = length();
  329. if (mode == TrimMode::Left || mode == TrimMode::Both) {
  330. for (size_t i = 0; i < length(); ++i) {
  331. if (substring_length == 0)
  332. return "";
  333. if (!is_whitespace_character(characters()[i]))
  334. break;
  335. ++substring_start;
  336. --substring_length;
  337. }
  338. }
  339. if (mode == TrimMode::Right || mode == TrimMode::Both) {
  340. for (size_t i = length() - 1; i > 0; --i) {
  341. if (substring_length == 0)
  342. return "";
  343. if (!is_whitespace_character(characters()[i]))
  344. break;
  345. --substring_length;
  346. }
  347. }
  348. return substring(substring_start, substring_length);
  349. }
  350. String escape_html_entities(const StringView& html)
  351. {
  352. StringBuilder builder;
  353. for (size_t i = 0; i < html.length(); ++i) {
  354. if (html[i] == '<')
  355. builder.append("&lt;");
  356. else if (html[i] == '>')
  357. builder.append("&gt;");
  358. else if (html[i] == '&')
  359. builder.append("&amp;");
  360. else
  361. builder.append(html[i]);
  362. }
  363. return builder.to_string();
  364. }
  365. String::String(const FlyString& string)
  366. : m_impl(string.impl())
  367. {
  368. }
  369. String String::to_lowercase() const
  370. {
  371. if (!m_impl)
  372. return {};
  373. return m_impl->to_lowercase();
  374. }
  375. String String::to_uppercase() const
  376. {
  377. if (!m_impl)
  378. return {};
  379. return m_impl->to_uppercase();
  380. }
  381. bool operator<(const char* characters, const String& string)
  382. {
  383. if (!characters)
  384. return !string.is_null();
  385. if (string.is_null())
  386. return false;
  387. return __builtin_strcmp(characters, string.characters()) < 0;
  388. }
  389. bool operator>=(const char* characters, const String& string)
  390. {
  391. return !(characters < string);
  392. }
  393. bool operator>(const char* characters, const String& string)
  394. {
  395. if (!characters)
  396. return !string.is_null();
  397. if (string.is_null())
  398. return false;
  399. return __builtin_strcmp(characters, string.characters()) > 0;
  400. }
  401. bool operator<=(const char* characters, const String& string)
  402. {
  403. return !(characters > string);
  404. }
  405. bool String::operator==(const char* cstring) const
  406. {
  407. if (is_null())
  408. return !cstring;
  409. if (!cstring)
  410. return false;
  411. return !__builtin_strcmp(characters(), cstring);
  412. }
  413. StringView String::view() const
  414. {
  415. return { characters(), length() };
  416. }
  417. }