Format.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638
  1. /*
  2. * Copyright (c) 2020, the SerenityOS developers.
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/Format.h>
  27. #include <AK/GenericLexer.h>
  28. #include <AK/String.h>
  29. #include <AK/StringBuilder.h>
  30. #include <ctype.h>
  31. #ifdef KERNEL
  32. # include <Kernel/Process.h>
  33. # include <Kernel/Thread.h>
  34. #else
  35. # include <stdio.h>
  36. # include <unistd.h>
  37. #endif
  38. namespace AK {
  39. namespace {
  40. constexpr size_t use_next_index = NumericLimits<size_t>::max();
  41. // The worst case is that we have the largest 64-bit value formatted as binary number, this would take
  42. // 65 bytes. Choosing a larger power of two won't hurt and is a bit of mitigation against out-of-bounds accesses.
  43. inline size_t convert_unsigned_to_string(u64 value, Array<u8, 128>& buffer, u8 base, bool upper_case)
  44. {
  45. ASSERT(base >= 2 && base <= 16);
  46. static constexpr const char* lowercase_lookup = "0123456789abcdef";
  47. static constexpr const char* uppercase_lookup = "0123456789ABCDEF";
  48. if (value == 0) {
  49. buffer[0] = '0';
  50. return 1;
  51. }
  52. size_t used = 0;
  53. while (value > 0) {
  54. if (upper_case)
  55. buffer[used++] = uppercase_lookup[value % base];
  56. else
  57. buffer[used++] = lowercase_lookup[value % base];
  58. value /= base;
  59. }
  60. for (size_t i = 0; i < used / 2; ++i)
  61. swap(buffer[i], buffer[used - i - 1]);
  62. return used;
  63. }
  64. void vformat_impl(TypeErasedFormatParams& params, FormatBuilder& builder, FormatParser& parser)
  65. {
  66. const auto literal = parser.consume_literal();
  67. builder.put_literal(literal);
  68. FormatParser::FormatSpecifier specifier;
  69. if (!parser.consume_specifier(specifier)) {
  70. ASSERT(parser.is_eof());
  71. return;
  72. }
  73. if (specifier.index == use_next_index)
  74. specifier.index = params.take_next_index();
  75. auto& parameter = params.parameters().at(specifier.index);
  76. FormatParser argparser { specifier.flags };
  77. parameter.formatter(params, builder, argparser, parameter.value);
  78. vformat_impl(params, builder, parser);
  79. }
  80. } // namespace AK::{anonymous}
  81. size_t TypeErasedFormatParams::decode(size_t value, size_t default_value)
  82. {
  83. if (value == StandardFormatter::value_not_set)
  84. return default_value;
  85. if (value == StandardFormatter::value_from_next_arg)
  86. value = StandardFormatter::value_from_arg + take_next_index();
  87. if (value >= StandardFormatter::value_from_arg) {
  88. const auto parameter = parameters().at(value - StandardFormatter::value_from_arg);
  89. Optional<i64> svalue;
  90. if (parameter.type == TypeErasedParameter::Type::UInt8)
  91. value = *reinterpret_cast<const u8*>(parameter.value);
  92. else if (parameter.type == TypeErasedParameter::Type::UInt16)
  93. value = *reinterpret_cast<const u16*>(parameter.value);
  94. else if (parameter.type == TypeErasedParameter::Type::UInt32)
  95. value = *reinterpret_cast<const u32*>(parameter.value);
  96. else if (parameter.type == TypeErasedParameter::Type::UInt64)
  97. value = *reinterpret_cast<const u64*>(parameter.value);
  98. else if (parameter.type == TypeErasedParameter::Type::Int8)
  99. svalue = *reinterpret_cast<const i8*>(parameter.value);
  100. else if (parameter.type == TypeErasedParameter::Type::Int16)
  101. svalue = *reinterpret_cast<const i16*>(parameter.value);
  102. else if (parameter.type == TypeErasedParameter::Type::Int32)
  103. svalue = *reinterpret_cast<const i32*>(parameter.value);
  104. else if (parameter.type == TypeErasedParameter::Type::Int64)
  105. svalue = *reinterpret_cast<const i64*>(parameter.value);
  106. else
  107. ASSERT_NOT_REACHED();
  108. if (svalue.has_value()) {
  109. ASSERT(svalue.value() >= 0);
  110. value = static_cast<size_t>(svalue.value());
  111. }
  112. }
  113. return value;
  114. }
  115. FormatParser::FormatParser(StringView input)
  116. : GenericLexer(input)
  117. {
  118. }
  119. StringView FormatParser::consume_literal()
  120. {
  121. const auto begin = tell();
  122. while (!is_eof()) {
  123. if (consume_specific("{{"))
  124. continue;
  125. if (consume_specific("}}"))
  126. continue;
  127. if (next_is(is_any_of("{}")))
  128. return m_input.substring_view(begin, tell() - begin);
  129. consume();
  130. }
  131. return m_input.substring_view(begin);
  132. }
  133. bool FormatParser::consume_number(size_t& value)
  134. {
  135. value = 0;
  136. bool consumed_at_least_one = false;
  137. while (next_is(isdigit)) {
  138. value *= 10;
  139. value += consume() - '0';
  140. consumed_at_least_one = true;
  141. }
  142. return consumed_at_least_one;
  143. }
  144. bool FormatParser::consume_specifier(FormatSpecifier& specifier)
  145. {
  146. ASSERT(!next_is('}'));
  147. if (!consume_specific('{'))
  148. return false;
  149. if (!consume_number(specifier.index))
  150. specifier.index = use_next_index;
  151. if (consume_specific(':')) {
  152. const auto begin = tell();
  153. size_t level = 1;
  154. while (level > 0) {
  155. ASSERT(!is_eof());
  156. if (consume_specific('{')) {
  157. ++level;
  158. continue;
  159. }
  160. if (consume_specific('}')) {
  161. --level;
  162. continue;
  163. }
  164. consume();
  165. }
  166. specifier.flags = m_input.substring_view(begin, tell() - begin - 1);
  167. } else {
  168. if (!consume_specific('}'))
  169. ASSERT_NOT_REACHED();
  170. specifier.flags = "";
  171. }
  172. return true;
  173. }
  174. bool FormatParser::consume_replacement_field(size_t& index)
  175. {
  176. if (!consume_specific('{'))
  177. return false;
  178. if (!consume_number(index))
  179. index = use_next_index;
  180. if (!consume_specific('}'))
  181. ASSERT_NOT_REACHED();
  182. return true;
  183. }
  184. void FormatBuilder::put_padding(char fill, size_t amount)
  185. {
  186. for (size_t i = 0; i < amount; ++i)
  187. m_builder.append(fill);
  188. }
  189. void FormatBuilder::put_literal(StringView value)
  190. {
  191. for (size_t i = 0; i < value.length(); ++i) {
  192. m_builder.append(value[i]);
  193. if (value[i] == '{' || value[i] == '}')
  194. ++i;
  195. }
  196. }
  197. void FormatBuilder::put_string(
  198. StringView value,
  199. Align align,
  200. size_t min_width,
  201. size_t max_width,
  202. char fill)
  203. {
  204. const auto used_by_string = min(max_width, value.length());
  205. const auto used_by_padding = max(min_width, used_by_string) - used_by_string;
  206. if (used_by_string < value.length())
  207. value = value.substring_view(0, used_by_string);
  208. if (align == Align::Left || align == Align::Default) {
  209. m_builder.append(value);
  210. put_padding(fill, used_by_padding);
  211. } else if (align == Align::Center) {
  212. const auto used_by_left_padding = used_by_padding / 2;
  213. const auto used_by_right_padding = ceil_div<size_t, size_t>(used_by_padding, 2);
  214. put_padding(fill, used_by_left_padding);
  215. m_builder.append(value);
  216. put_padding(fill, used_by_right_padding);
  217. } else if (align == Align::Right) {
  218. put_padding(fill, used_by_padding);
  219. m_builder.append(value);
  220. }
  221. }
  222. void FormatBuilder::put_u64(
  223. u64 value,
  224. u8 base,
  225. bool prefix,
  226. bool upper_case,
  227. bool zero_pad,
  228. Align align,
  229. size_t min_width,
  230. char fill,
  231. SignMode sign_mode,
  232. bool is_negative)
  233. {
  234. Array<u8, 128> buffer;
  235. const auto used_by_digits = convert_unsigned_to_string(value, buffer, base, upper_case);
  236. auto used_by_prefix = sign_mode == SignMode::OnlyIfNeeded ? static_cast<size_t>(is_negative) : 1;
  237. if (prefix) {
  238. if (base == 8)
  239. used_by_prefix += 1;
  240. else if (base == 16)
  241. used_by_prefix += 2;
  242. else if (base == 2)
  243. used_by_prefix += 2;
  244. }
  245. const auto used_by_field = used_by_prefix + used_by_digits;
  246. const auto used_by_padding = max(used_by_field, min_width) - used_by_field;
  247. const auto put_prefix = [&]() {
  248. if (is_negative)
  249. m_builder.append('-');
  250. else if (sign_mode == SignMode::Always)
  251. m_builder.append('+');
  252. else if (sign_mode == SignMode::Reserved)
  253. m_builder.append(' ');
  254. if (prefix) {
  255. if (base == 2) {
  256. if (upper_case)
  257. m_builder.append("0B");
  258. else
  259. m_builder.append("0b");
  260. } else if (base == 8) {
  261. m_builder.append("0");
  262. } else if (base == 16) {
  263. if (upper_case)
  264. m_builder.append("0X");
  265. else
  266. m_builder.append("0x");
  267. }
  268. }
  269. };
  270. const auto put_digits = [&]() {
  271. for (size_t i = 0; i < used_by_digits; ++i)
  272. m_builder.append(buffer[i]);
  273. };
  274. if (align == Align::Left) {
  275. const auto used_by_right_padding = used_by_padding;
  276. put_prefix();
  277. put_digits();
  278. put_padding(fill, used_by_right_padding);
  279. } else if (align == Align::Center) {
  280. const auto used_by_left_padding = used_by_padding / 2;
  281. const auto used_by_right_padding = ceil_div<size_t, size_t>(used_by_padding, 2);
  282. put_padding(fill, used_by_left_padding);
  283. put_prefix();
  284. put_digits();
  285. put_padding(fill, used_by_right_padding);
  286. } else if (align == Align::Right || align == Align::Default) {
  287. const auto used_by_left_padding = used_by_padding;
  288. if (zero_pad) {
  289. put_prefix();
  290. put_padding('0', used_by_left_padding);
  291. put_digits();
  292. } else {
  293. put_padding(fill, used_by_left_padding);
  294. put_prefix();
  295. put_digits();
  296. }
  297. }
  298. }
  299. void FormatBuilder::put_i64(
  300. i64 value,
  301. u8 base,
  302. bool prefix,
  303. bool upper_case,
  304. bool zero_pad,
  305. Align align,
  306. size_t min_width,
  307. char fill,
  308. SignMode sign_mode)
  309. {
  310. const auto is_negative = value < 0;
  311. value = is_negative ? -value : value;
  312. put_u64(static_cast<size_t>(value), base, prefix, upper_case, zero_pad, align, min_width, fill, sign_mode, is_negative);
  313. }
  314. void vformat(StringBuilder& builder, StringView fmtstr, TypeErasedFormatParams params)
  315. {
  316. FormatBuilder fmtbuilder { builder };
  317. FormatParser parser { fmtstr };
  318. vformat_impl(params, fmtbuilder, parser);
  319. }
  320. void vformat(const LogStream& stream, StringView fmtstr, TypeErasedFormatParams params)
  321. {
  322. StringBuilder builder;
  323. vformat(builder, fmtstr, params);
  324. stream << builder.to_string();
  325. }
  326. void StandardFormatter::parse(TypeErasedFormatParams& params, FormatParser& parser)
  327. {
  328. if (StringView { "<^>" }.contains(parser.peek(1))) {
  329. ASSERT(!parser.next_is(is_any_of("{}")));
  330. m_fill = parser.consume();
  331. }
  332. if (parser.consume_specific('<'))
  333. m_align = FormatBuilder::Align::Left;
  334. else if (parser.consume_specific('^'))
  335. m_align = FormatBuilder::Align::Center;
  336. else if (parser.consume_specific('>'))
  337. m_align = FormatBuilder::Align::Right;
  338. if (parser.consume_specific('-'))
  339. m_sign_mode = FormatBuilder::SignMode::OnlyIfNeeded;
  340. else if (parser.consume_specific('+'))
  341. m_sign_mode = FormatBuilder::SignMode::Always;
  342. else if (parser.consume_specific(' '))
  343. m_sign_mode = FormatBuilder::SignMode::Reserved;
  344. if (parser.consume_specific('#'))
  345. m_alternative_form = true;
  346. if (parser.consume_specific('0'))
  347. m_zero_pad = true;
  348. if (size_t index = 0; parser.consume_replacement_field(index)) {
  349. if (index == use_next_index)
  350. index = params.take_next_index();
  351. m_width = value_from_arg + index;
  352. } else if (size_t width = 0; parser.consume_number(width)) {
  353. m_width = width;
  354. }
  355. if (parser.consume_specific('.')) {
  356. if (size_t index = 0; parser.consume_replacement_field(index)) {
  357. if (index == use_next_index)
  358. index = params.take_next_index();
  359. m_precision = value_from_arg + index;
  360. } else if (size_t precision = 0; parser.consume_number(precision)) {
  361. m_precision = precision;
  362. }
  363. }
  364. if (parser.consume_specific('b'))
  365. m_mode = Mode::Binary;
  366. else if (parser.consume_specific('B'))
  367. m_mode = Mode::BinaryUppercase;
  368. else if (parser.consume_specific('d'))
  369. m_mode = Mode::Decimal;
  370. else if (parser.consume_specific('o'))
  371. m_mode = Mode::Octal;
  372. else if (parser.consume_specific('x'))
  373. m_mode = Mode::Hexadecimal;
  374. else if (parser.consume_specific('X'))
  375. m_mode = Mode::HexadecimalUppercase;
  376. else if (parser.consume_specific('c'))
  377. m_mode = Mode::Character;
  378. else if (parser.consume_specific('s'))
  379. m_mode = Mode::String;
  380. else if (parser.consume_specific('p'))
  381. m_mode = Mode::Pointer;
  382. if (!parser.is_eof())
  383. dbg() << __PRETTY_FUNCTION__ << " did not consume '" << parser.remaining() << "'";
  384. ASSERT(parser.is_eof());
  385. }
  386. void Formatter<StringView>::format(TypeErasedFormatParams& params, FormatBuilder& builder, StringView value)
  387. {
  388. if (m_sign_mode != FormatBuilder::SignMode::Default)
  389. ASSERT_NOT_REACHED();
  390. if (m_alternative_form)
  391. ASSERT_NOT_REACHED();
  392. if (m_zero_pad)
  393. ASSERT_NOT_REACHED();
  394. if (m_mode != Mode::Default && m_mode != Mode::String)
  395. ASSERT_NOT_REACHED();
  396. if (m_width != value_not_set && m_precision != value_not_set)
  397. ASSERT_NOT_REACHED();
  398. const auto width = params.decode(m_width);
  399. const auto precision = params.decode(m_precision, NumericLimits<size_t>::max());
  400. builder.put_string(value, m_align, width, precision, m_fill);
  401. }
  402. template<typename T>
  403. void Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type>::format(TypeErasedFormatParams& params, FormatBuilder& builder, T value)
  404. {
  405. if (m_mode == Mode::Character) {
  406. // FIXME: We just support ASCII for now, in the future maybe unicode?
  407. ASSERT(value >= 0 && value <= 127);
  408. m_mode = Mode::String;
  409. Formatter<StringView> formatter { *this };
  410. return formatter.format(params, builder, StringView { reinterpret_cast<const char*>(&value), 1 });
  411. }
  412. if (m_precision != NumericLimits<size_t>::max())
  413. ASSERT_NOT_REACHED();
  414. if (m_mode == Mode::Pointer) {
  415. if (m_sign_mode != FormatBuilder::SignMode::Default)
  416. ASSERT_NOT_REACHED();
  417. if (m_align != FormatBuilder::Align::Default)
  418. ASSERT_NOT_REACHED();
  419. if (m_alternative_form)
  420. ASSERT_NOT_REACHED();
  421. if (m_width != value_not_set)
  422. ASSERT_NOT_REACHED();
  423. m_mode = Mode::Hexadecimal;
  424. m_alternative_form = true;
  425. m_width = 2 * sizeof(void*) + 2;
  426. m_zero_pad = true;
  427. }
  428. u8 base = 0;
  429. bool upper_case = false;
  430. if (m_mode == Mode::Binary) {
  431. base = 2;
  432. } else if (m_mode == Mode::BinaryUppercase) {
  433. base = 2;
  434. upper_case = true;
  435. } else if (m_mode == Mode::Octal) {
  436. base = 8;
  437. } else if (m_mode == Mode::Decimal || m_mode == Mode::Default) {
  438. base = 10;
  439. } else if (m_mode == Mode::Hexadecimal) {
  440. base = 16;
  441. } else if (m_mode == Mode::HexadecimalUppercase) {
  442. base = 16;
  443. upper_case = true;
  444. } else {
  445. ASSERT_NOT_REACHED();
  446. }
  447. const auto width = params.decode(m_width);
  448. if (IsSame<typename MakeUnsigned<T>::Type, T>::value)
  449. builder.put_u64(value, base, m_alternative_form, upper_case, m_zero_pad, m_align, width, m_fill, m_sign_mode);
  450. else
  451. builder.put_i64(value, base, m_alternative_form, upper_case, m_zero_pad, m_align, width, m_fill, m_sign_mode);
  452. }
  453. void Formatter<bool>::format(TypeErasedFormatParams& params, FormatBuilder& builder, bool value)
  454. {
  455. if (m_mode == Mode::Binary || m_mode == Mode::BinaryUppercase || m_mode == Mode::Decimal || m_mode == Mode::Octal || m_mode == Mode::Hexadecimal || m_mode == Mode::HexadecimalUppercase) {
  456. Formatter<u8> formatter { *this };
  457. return formatter.format(params, builder, static_cast<u8>(value));
  458. } else {
  459. Formatter<StringView> formatter { *this };
  460. return formatter.format(params, builder, value ? "true" : "false");
  461. }
  462. }
  463. #ifndef KERNEL
  464. void raw_out(StringView string)
  465. {
  466. const auto retval = ::fwrite(string.characters_without_null_termination(), 1, string.length(), stdout);
  467. ASSERT(retval == string.length());
  468. }
  469. void vout(StringView fmtstr, TypeErasedFormatParams params, bool newline)
  470. {
  471. StringBuilder builder;
  472. vformat(builder, fmtstr, params);
  473. if (newline && !builder.is_empty())
  474. builder.append('\n');
  475. raw_out(builder.to_string());
  476. }
  477. void raw_warn(StringView string)
  478. {
  479. const auto retval = ::write(STDERR_FILENO, string.characters_without_null_termination(), string.length());
  480. ASSERT(static_cast<size_t>(retval) == string.length());
  481. }
  482. void vwarn(StringView fmtstr, TypeErasedFormatParams params, bool newline)
  483. {
  484. StringBuilder builder;
  485. vformat(builder, fmtstr, params);
  486. if (newline && !builder.is_empty())
  487. builder.append('\n');
  488. raw_warn(builder.to_string());
  489. }
  490. #endif
  491. void raw_dbg(StringView string)
  492. {
  493. const auto retval = dbgputstr(string.characters_without_null_termination(), string.length());
  494. ASSERT(static_cast<size_t>(retval) == string.length());
  495. }
  496. void vdbg(StringView fmtstr, TypeErasedFormatParams params, bool newline)
  497. {
  498. StringBuilder builder;
  499. // FIXME: This logic is redundant with the stuff in LogStream.cpp.
  500. #if defined(__serenity__)
  501. # ifdef KERNEL
  502. if (Kernel::Processor::is_initialized() && Kernel::Thread::current()) {
  503. auto& thread = *Kernel::Thread::current();
  504. builder.appendff("\033[34;1m[{}({}:{})]\033[0m: ", thread.process().name(), thread.pid().value(), thread.tid().value());
  505. } else {
  506. builder.appendff("\033[34;1m[Kernel]\033[0m: ");
  507. }
  508. # else
  509. static TriState got_process_name = TriState::Unknown;
  510. static char process_name_buffer[256];
  511. if (got_process_name == TriState::Unknown) {
  512. if (get_process_name(process_name_buffer, sizeof(process_name_buffer)) == 0)
  513. got_process_name = TriState::True;
  514. else
  515. got_process_name = TriState::False;
  516. }
  517. if (got_process_name == TriState::True)
  518. builder.appendff("\033[33;1m{}({})\033[0m: ", process_name_buffer, getpid());
  519. # endif
  520. #endif
  521. vformat(builder, fmtstr, params);
  522. if (newline && !builder.is_empty())
  523. builder.append('\n');
  524. raw_dbg(builder.to_string());
  525. }
  526. template struct Formatter<unsigned char, void>;
  527. template struct Formatter<unsigned short, void>;
  528. template struct Formatter<unsigned int, void>;
  529. template struct Formatter<unsigned long, void>;
  530. template struct Formatter<unsigned long long, void>;
  531. template struct Formatter<char, void>;
  532. template struct Formatter<short, void>;
  533. template struct Formatter<int, void>;
  534. template struct Formatter<long, void>;
  535. template struct Formatter<long long, void>;
  536. template struct Formatter<signed char, void>;
  537. } // namespace AK