scanf.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. /*
  2. * Copyright (c) 2021, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Assertions.h>
  7. #include <AK/Format.h>
  8. #include <AK/GenericLexer.h>
  9. #include <AK/StdLibExtras.h>
  10. #include <ctype.h>
  11. #include <stdarg.h>
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #include <string.h>
  15. enum class LengthModifier {
  16. None,
  17. Default,
  18. Char,
  19. Short,
  20. Long,
  21. LongLong,
  22. IntMax,
  23. Size,
  24. PtrDiff,
  25. LongDouble,
  26. };
  27. enum class ConversionSpecifier {
  28. Unspecified,
  29. Decimal,
  30. Integer,
  31. Octal,
  32. Unsigned,
  33. Hex,
  34. Floating,
  35. String,
  36. UseScanList,
  37. Character,
  38. Pointer,
  39. OutputNumberOfBytes,
  40. Invalid,
  41. };
  42. enum class ReadKind {
  43. Normal,
  44. Octal,
  45. Hex,
  46. Infer,
  47. };
  48. template<typename T, typename ApT, ReadKind kind = ReadKind::Normal>
  49. struct ReadElementConcrete {
  50. bool operator()(GenericLexer&, va_list)
  51. {
  52. return false;
  53. }
  54. };
  55. template<typename ApT, ReadKind kind>
  56. struct ReadElementConcrete<int, ApT, kind> {
  57. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  58. {
  59. long value = 0;
  60. char* endptr = nullptr;
  61. auto nptr = lexer.remaining().characters_without_null_termination();
  62. if constexpr (kind == ReadKind::Normal)
  63. value = strtol(nptr, &endptr, 10);
  64. if constexpr (kind == ReadKind::Octal)
  65. value = strtol(nptr, &endptr, 8);
  66. if constexpr (kind == ReadKind::Hex)
  67. value = strtol(nptr, &endptr, 16);
  68. if constexpr (kind == ReadKind::Infer)
  69. value = strtol(nptr, &endptr, 0);
  70. if (!endptr)
  71. return false;
  72. if (endptr == nptr)
  73. return false;
  74. auto diff = endptr - nptr;
  75. VERIFY(diff > 0);
  76. lexer.ignore((size_t)diff);
  77. if (!suppress_assignment) {
  78. auto* ptr = va_arg(*ap, ApT*);
  79. *ptr = value;
  80. }
  81. return true;
  82. }
  83. };
  84. template<typename ApT, ReadKind kind>
  85. struct ReadElementConcrete<char, ApT, kind> {
  86. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  87. {
  88. static_assert(kind == ReadKind::Normal, "Can't read a non-normal character");
  89. if (lexer.is_eof())
  90. return false;
  91. auto ch = lexer.consume();
  92. if (!suppress_assignment) {
  93. auto* ptr = va_arg(*ap, ApT*);
  94. *ptr = ch;
  95. }
  96. return true;
  97. }
  98. };
  99. template<typename ApT, ReadKind kind>
  100. struct ReadElementConcrete<unsigned, ApT, kind> {
  101. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  102. {
  103. unsigned long value = 0;
  104. char* endptr = nullptr;
  105. auto nptr = lexer.remaining().characters_without_null_termination();
  106. if constexpr (kind == ReadKind::Normal)
  107. value = strtoul(nptr, &endptr, 10);
  108. if constexpr (kind == ReadKind::Octal)
  109. value = strtoul(nptr, &endptr, 8);
  110. if constexpr (kind == ReadKind::Hex)
  111. value = strtoul(nptr, &endptr, 16);
  112. if constexpr (kind == ReadKind::Infer)
  113. value = strtoul(nptr, &endptr, 0);
  114. if (!endptr)
  115. return false;
  116. if (endptr == nptr)
  117. return false;
  118. auto diff = endptr - nptr;
  119. VERIFY(diff > 0);
  120. lexer.ignore((size_t)diff);
  121. if (!suppress_assignment) {
  122. auto* ptr = va_arg(*ap, ApT*);
  123. *ptr = value;
  124. }
  125. return true;
  126. }
  127. };
  128. template<typename ApT, ReadKind kind>
  129. struct ReadElementConcrete<long long, ApT, kind> {
  130. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  131. {
  132. long long value = 0;
  133. char* endptr = nullptr;
  134. auto nptr = lexer.remaining().characters_without_null_termination();
  135. if constexpr (kind == ReadKind::Normal)
  136. value = strtoll(nptr, &endptr, 10);
  137. if constexpr (kind == ReadKind::Octal)
  138. value = strtoll(nptr, &endptr, 8);
  139. if constexpr (kind == ReadKind::Hex)
  140. value = strtoll(nptr, &endptr, 16);
  141. if constexpr (kind == ReadKind::Infer)
  142. value = strtoll(nptr, &endptr, 0);
  143. if (!endptr)
  144. return false;
  145. if (endptr == nptr)
  146. return false;
  147. auto diff = endptr - nptr;
  148. VERIFY(diff > 0);
  149. lexer.ignore((size_t)diff);
  150. if (!suppress_assignment) {
  151. auto* ptr = va_arg(*ap, ApT*);
  152. *ptr = value;
  153. }
  154. return true;
  155. }
  156. };
  157. template<typename ApT, ReadKind kind>
  158. struct ReadElementConcrete<unsigned long long, ApT, kind> {
  159. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  160. {
  161. unsigned long long value = 0;
  162. char* endptr = nullptr;
  163. auto nptr = lexer.remaining().characters_without_null_termination();
  164. if constexpr (kind == ReadKind::Normal)
  165. value = strtoull(nptr, &endptr, 10);
  166. if constexpr (kind == ReadKind::Octal)
  167. value = strtoull(nptr, &endptr, 8);
  168. if constexpr (kind == ReadKind::Hex)
  169. value = strtoull(nptr, &endptr, 16);
  170. if constexpr (kind == ReadKind::Infer)
  171. value = strtoull(nptr, &endptr, 0);
  172. if (!endptr)
  173. return false;
  174. if (endptr == nptr)
  175. return false;
  176. auto diff = endptr - nptr;
  177. VERIFY(diff > 0);
  178. lexer.ignore((size_t)diff);
  179. if (!suppress_assignment) {
  180. auto* ptr = va_arg(*ap, ApT*);
  181. *ptr = value;
  182. }
  183. return true;
  184. }
  185. };
  186. template<typename ApT, ReadKind kind>
  187. struct ReadElementConcrete<float, ApT, kind> {
  188. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  189. {
  190. double value = 0;
  191. char* endptr = nullptr;
  192. auto nptr = lexer.remaining().characters_without_null_termination();
  193. if constexpr (kind == ReadKind::Normal)
  194. value = strtod(nptr, &endptr);
  195. else
  196. return false;
  197. if (!endptr)
  198. return false;
  199. if (endptr == nptr)
  200. return false;
  201. auto diff = endptr - nptr;
  202. VERIFY(diff > 0);
  203. lexer.ignore((size_t)diff);
  204. if (!suppress_assignment) {
  205. auto* ptr = va_arg(*ap, ApT*);
  206. *ptr = value;
  207. }
  208. return true;
  209. }
  210. };
  211. template<typename T, ReadKind kind>
  212. struct ReadElement {
  213. bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment)
  214. {
  215. switch (length_modifier) {
  216. default:
  217. case LengthModifier::None:
  218. VERIFY_NOT_REACHED();
  219. case LengthModifier::Default:
  220. return ReadElementConcrete<T, T, kind> {}(input_lexer, ap, suppress_assignment);
  221. case LengthModifier::Char:
  222. return ReadElementConcrete<T, char, kind> {}(input_lexer, ap, suppress_assignment);
  223. case LengthModifier::Short:
  224. return ReadElementConcrete<T, short, kind> {}(input_lexer, ap, suppress_assignment);
  225. case LengthModifier::Long:
  226. if constexpr (IsSame<T, int>)
  227. return ReadElementConcrete<T, long, kind> {}(input_lexer, ap, suppress_assignment);
  228. if constexpr (IsSame<T, unsigned>)
  229. return ReadElementConcrete<T, unsigned long, kind> {}(input_lexer, ap, suppress_assignment);
  230. if constexpr (IsSame<T, float>)
  231. return ReadElementConcrete<int, double, kind> {}(input_lexer, ap, suppress_assignment);
  232. return false;
  233. case LengthModifier::LongLong:
  234. if constexpr (IsSame<T, int>)
  235. return ReadElementConcrete<long long, long long, kind> {}(input_lexer, ap, suppress_assignment);
  236. if constexpr (IsSame<T, unsigned>)
  237. return ReadElementConcrete<unsigned long long, unsigned long long, kind> {}(input_lexer, ap, suppress_assignment);
  238. if constexpr (IsSame<T, float>)
  239. return ReadElementConcrete<long long, double, kind> {}(input_lexer, ap, suppress_assignment);
  240. return false;
  241. case LengthModifier::IntMax:
  242. return ReadElementConcrete<T, intmax_t, kind> {}(input_lexer, ap, suppress_assignment);
  243. case LengthModifier::Size:
  244. return ReadElementConcrete<T, size_t, kind> {}(input_lexer, ap, suppress_assignment);
  245. case LengthModifier::PtrDiff:
  246. return ReadElementConcrete<T, ptrdiff_t, kind> {}(input_lexer, ap, suppress_assignment);
  247. case LengthModifier::LongDouble:
  248. return ReadElementConcrete<T, long double, kind> {}(input_lexer, ap, suppress_assignment);
  249. }
  250. }
  251. };
  252. template<>
  253. struct ReadElement<char*, ReadKind::Normal> {
  254. ReadElement(StringView scan_set = {}, bool invert = false)
  255. : scan_set(scan_set.is_null() ? " \t\n\f\r"sv : scan_set)
  256. , invert(scan_set.is_null() ? true : invert)
  257. {
  258. }
  259. bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment)
  260. {
  261. // FIXME: Implement wide strings and such.
  262. if (length_modifier != LengthModifier::Default)
  263. return false;
  264. auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); });
  265. if (str.is_empty())
  266. return false;
  267. if (!suppress_assignment) {
  268. auto* ptr = va_arg(*ap, char*);
  269. memcpy(ptr, str.characters_without_null_termination(), str.length());
  270. ptr[str.length()] = 0;
  271. }
  272. return true;
  273. }
  274. private:
  275. bool matches(char c) const
  276. {
  277. return invert ^ scan_set.contains(c);
  278. }
  279. const StringView scan_set;
  280. bool invert { false };
  281. };
  282. template<>
  283. struct ReadElement<void*, ReadKind::Normal> {
  284. bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment)
  285. {
  286. if (length_modifier != LengthModifier::Default)
  287. return false;
  288. auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); });
  289. if (count != 8) {
  290. fail:;
  291. for (size_t i = 0; i < count; ++i)
  292. input_lexer.retreat();
  293. return false;
  294. }
  295. char buf[9] { 0 };
  296. memcpy(buf, str.characters_without_null_termination(), 8);
  297. buf[8] = 0;
  298. char* endptr = nullptr;
  299. auto value = strtoull(buf, &endptr, 16);
  300. if (endptr != &buf[8])
  301. goto fail;
  302. if (!suppress_assignment) {
  303. auto* ptr = va_arg(*ap, void**);
  304. memcpy(ptr, &value, sizeof(value));
  305. }
  306. return true;
  307. }
  308. private:
  309. bool should_consume(char c)
  310. {
  311. if (count == 8)
  312. return false;
  313. if (!isxdigit(c))
  314. return false;
  315. ++count;
  316. return true;
  317. }
  318. size_t count { 0 };
  319. };
  320. extern "C" int vsscanf(char const* input, char const* format, va_list ap)
  321. {
  322. GenericLexer format_lexer { { format, strlen(format) } };
  323. GenericLexer input_lexer { { input, strlen(input) } };
  324. int elements_matched = 0;
  325. va_list copy;
  326. __builtin_va_copy(copy, ap);
  327. while (!format_lexer.is_eof()) {
  328. if (format_lexer.next_is(isspace)) {
  329. format_lexer.ignore_while(isspace);
  330. input_lexer.ignore_while(isspace);
  331. }
  332. if (!format_lexer.next_is('%')) {
  333. read_one_literal:;
  334. if (format_lexer.is_eof())
  335. break;
  336. auto next_char = format_lexer.consume();
  337. if (!input_lexer.consume_specific(next_char))
  338. return elements_matched;
  339. continue;
  340. }
  341. if (format_lexer.next_is("%%")) {
  342. format_lexer.ignore();
  343. goto read_one_literal;
  344. }
  345. format_lexer.ignore(); // '%'
  346. bool suppress_assignment = false;
  347. if (format_lexer.next_is('*')) {
  348. suppress_assignment = true;
  349. format_lexer.ignore();
  350. }
  351. // Parse width specification
  352. [[maybe_unused]] int width_specifier = 0;
  353. if (format_lexer.next_is(isdigit)) {
  354. auto width_digits = format_lexer.consume_while([](char c) { return isdigit(c); });
  355. width_specifier = width_digits.to_int().value();
  356. // FIXME: Actually use width specifier
  357. }
  358. bool invert_scanlist = false;
  359. StringView scanlist;
  360. LengthModifier length_modifier { LengthModifier::None };
  361. ConversionSpecifier conversion_specifier { ConversionSpecifier::Unspecified };
  362. reread_lookahead:;
  363. auto format_lookahead = format_lexer.peek();
  364. if (length_modifier == LengthModifier::None) {
  365. switch (format_lookahead) {
  366. case 'h':
  367. if (format_lexer.peek(1) == 'h') {
  368. format_lexer.consume(2);
  369. length_modifier = LengthModifier::Char;
  370. } else {
  371. format_lexer.consume(1);
  372. length_modifier = LengthModifier::Short;
  373. }
  374. break;
  375. case 'l':
  376. if (format_lexer.peek(1) == 'l') {
  377. format_lexer.consume(2);
  378. length_modifier = LengthModifier::LongLong;
  379. } else {
  380. format_lexer.consume(1);
  381. length_modifier = LengthModifier::Long;
  382. }
  383. break;
  384. case 'j':
  385. format_lexer.consume();
  386. length_modifier = LengthModifier::IntMax;
  387. break;
  388. case 'z':
  389. format_lexer.consume();
  390. length_modifier = LengthModifier::Size;
  391. break;
  392. case 't':
  393. format_lexer.consume();
  394. length_modifier = LengthModifier::PtrDiff;
  395. break;
  396. case 'L':
  397. format_lexer.consume();
  398. length_modifier = LengthModifier::LongDouble;
  399. break;
  400. default:
  401. length_modifier = LengthModifier::Default;
  402. break;
  403. }
  404. goto reread_lookahead;
  405. }
  406. if (conversion_specifier == ConversionSpecifier::Unspecified) {
  407. switch (format_lookahead) {
  408. case 'd':
  409. format_lexer.consume();
  410. conversion_specifier = ConversionSpecifier::Decimal;
  411. break;
  412. case 'i':
  413. format_lexer.consume();
  414. conversion_specifier = ConversionSpecifier::Integer;
  415. break;
  416. case 'o':
  417. format_lexer.consume();
  418. conversion_specifier = ConversionSpecifier::Octal;
  419. break;
  420. case 'u':
  421. format_lexer.consume();
  422. conversion_specifier = ConversionSpecifier::Unsigned;
  423. break;
  424. case 'x':
  425. case 'X':
  426. format_lexer.consume();
  427. conversion_specifier = ConversionSpecifier::Hex;
  428. break;
  429. case 'a':
  430. case 'e':
  431. case 'f':
  432. case 'g':
  433. format_lexer.consume();
  434. conversion_specifier = ConversionSpecifier::Floating;
  435. break;
  436. case 's':
  437. format_lexer.consume();
  438. conversion_specifier = ConversionSpecifier::String;
  439. break;
  440. case '[':
  441. format_lexer.consume();
  442. scanlist = format_lexer.consume_until(']');
  443. format_lexer.ignore();
  444. if (scanlist.starts_with('^')) {
  445. scanlist = scanlist.substring_view(1);
  446. invert_scanlist = true;
  447. }
  448. conversion_specifier = ConversionSpecifier::UseScanList;
  449. break;
  450. case 'c':
  451. format_lexer.consume();
  452. conversion_specifier = ConversionSpecifier::Character;
  453. break;
  454. case 'p':
  455. format_lexer.consume();
  456. conversion_specifier = ConversionSpecifier::Pointer;
  457. break;
  458. case 'n':
  459. format_lexer.consume();
  460. conversion_specifier = ConversionSpecifier::OutputNumberOfBytes;
  461. break;
  462. case 'C':
  463. format_lexer.consume();
  464. length_modifier = LengthModifier::Long;
  465. conversion_specifier = ConversionSpecifier::Character;
  466. break;
  467. case 'S':
  468. format_lexer.consume();
  469. length_modifier = LengthModifier::Long;
  470. conversion_specifier = ConversionSpecifier::String;
  471. break;
  472. default:
  473. format_lexer.consume();
  474. conversion_specifier = ConversionSpecifier::Invalid;
  475. break;
  476. }
  477. }
  478. // Now try to read.
  479. switch (conversion_specifier) {
  480. case ConversionSpecifier::Invalid:
  481. case ConversionSpecifier::Unspecified:
  482. default:
  483. // "undefined behavior", let's be nice and crash.
  484. dbgln("Invalid conversion specifier {} in scanf!", (int)conversion_specifier);
  485. VERIFY_NOT_REACHED();
  486. case ConversionSpecifier::Decimal:
  487. if (!ReadElement<int, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  488. format_lexer.consume_all();
  489. else if (!suppress_assignment)
  490. ++elements_matched;
  491. break;
  492. case ConversionSpecifier::Integer:
  493. if (!ReadElement<int, ReadKind::Infer> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  494. format_lexer.consume_all();
  495. else if (!suppress_assignment)
  496. ++elements_matched;
  497. break;
  498. case ConversionSpecifier::Octal:
  499. if (!ReadElement<unsigned, ReadKind::Octal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  500. format_lexer.consume_all();
  501. else if (!suppress_assignment)
  502. ++elements_matched;
  503. break;
  504. case ConversionSpecifier::Unsigned:
  505. if (!ReadElement<unsigned, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  506. format_lexer.consume_all();
  507. else if (!suppress_assignment)
  508. ++elements_matched;
  509. break;
  510. case ConversionSpecifier::Hex:
  511. if (!ReadElement<unsigned, ReadKind::Hex> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  512. format_lexer.consume_all();
  513. else if (!suppress_assignment)
  514. ++elements_matched;
  515. break;
  516. case ConversionSpecifier::Floating:
  517. if (!ReadElement<float, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  518. format_lexer.consume_all();
  519. else if (!suppress_assignment)
  520. ++elements_matched;
  521. break;
  522. case ConversionSpecifier::String:
  523. if (!ReadElement<char*, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  524. format_lexer.consume_all();
  525. else if (!suppress_assignment)
  526. ++elements_matched;
  527. break;
  528. case ConversionSpecifier::UseScanList:
  529. if (!ReadElement<char*, ReadKind::Normal> { scanlist, invert_scanlist }(length_modifier, input_lexer, &copy, suppress_assignment))
  530. format_lexer.consume_all();
  531. else if (!suppress_assignment)
  532. ++elements_matched;
  533. break;
  534. case ConversionSpecifier::Character:
  535. if (!ReadElement<char, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  536. format_lexer.consume_all();
  537. else if (!suppress_assignment)
  538. ++elements_matched;
  539. break;
  540. case ConversionSpecifier::Pointer:
  541. if (!ReadElement<void*, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  542. format_lexer.consume_all();
  543. else if (!suppress_assignment)
  544. ++elements_matched;
  545. break;
  546. case ConversionSpecifier::OutputNumberOfBytes: {
  547. if (!suppress_assignment) {
  548. auto* ptr = va_arg(copy, int*);
  549. *ptr = input_lexer.tell();
  550. }
  551. break;
  552. }
  553. }
  554. }
  555. va_end(copy);
  556. return elements_matched;
  557. }