scanf.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. /*
  2. * Copyright (c) 2021, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Assertions.h>
  7. #include <AK/Format.h>
  8. #include <AK/GenericLexer.h>
  9. #include <AK/StdLibExtras.h>
  10. #include <ctype.h>
  11. #include <stdarg.h>
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #include <string.h>
  15. enum class LengthModifier {
  16. None,
  17. Default,
  18. Char,
  19. Short,
  20. Long,
  21. LongLong,
  22. IntMax,
  23. Size,
  24. PtrDiff,
  25. LongDouble,
  26. };
  27. enum class ConversionSpecifier {
  28. Unspecified,
  29. Decimal,
  30. Integer,
  31. Octal,
  32. Unsigned,
  33. Hex,
  34. Floating,
  35. String,
  36. UseScanList,
  37. Character,
  38. Pointer,
  39. OutputNumberOfBytes,
  40. Invalid,
  41. };
  42. enum class ReadKind {
  43. Normal,
  44. Octal,
  45. Hex,
  46. Infer,
  47. };
  48. template<typename T, typename ApT, ReadKind kind = ReadKind::Normal>
  49. struct ReadElementConcrete {
  50. bool operator()(GenericLexer&, va_list)
  51. {
  52. return false;
  53. }
  54. };
  55. template<typename ApT, ReadKind kind>
  56. struct ReadElementConcrete<int, ApT, kind> {
  57. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  58. {
  59. lexer.ignore_while(isspace);
  60. long value = 0;
  61. char* endptr = nullptr;
  62. auto nptr = lexer.remaining().characters_without_null_termination();
  63. if constexpr (kind == ReadKind::Normal)
  64. value = strtol(nptr, &endptr, 10);
  65. if constexpr (kind == ReadKind::Octal)
  66. value = strtol(nptr, &endptr, 8);
  67. if constexpr (kind == ReadKind::Hex)
  68. value = strtol(nptr, &endptr, 16);
  69. if constexpr (kind == ReadKind::Infer)
  70. value = strtol(nptr, &endptr, 0);
  71. if (!endptr)
  72. return false;
  73. if (endptr == nptr)
  74. return false;
  75. auto diff = endptr - nptr;
  76. VERIFY(diff > 0);
  77. lexer.ignore((size_t)diff);
  78. if (!suppress_assignment) {
  79. auto* ptr = va_arg(*ap, ApT*);
  80. *ptr = value;
  81. }
  82. return true;
  83. }
  84. };
  85. template<typename ApT, ReadKind kind>
  86. struct ReadElementConcrete<char, ApT, kind> {
  87. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  88. {
  89. static_assert(kind == ReadKind::Normal, "Can't read a non-normal character");
  90. if (lexer.is_eof())
  91. return false;
  92. auto ch = lexer.consume();
  93. if (!suppress_assignment) {
  94. auto* ptr = va_arg(*ap, ApT*);
  95. *ptr = ch;
  96. }
  97. return true;
  98. }
  99. };
  100. template<typename ApT, ReadKind kind>
  101. struct ReadElementConcrete<unsigned, ApT, kind> {
  102. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  103. {
  104. lexer.ignore_while(isspace);
  105. unsigned long value = 0;
  106. char* endptr = nullptr;
  107. auto nptr = lexer.remaining().characters_without_null_termination();
  108. if constexpr (kind == ReadKind::Normal)
  109. value = strtoul(nptr, &endptr, 10);
  110. if constexpr (kind == ReadKind::Octal)
  111. value = strtoul(nptr, &endptr, 8);
  112. if constexpr (kind == ReadKind::Hex)
  113. value = strtoul(nptr, &endptr, 16);
  114. if constexpr (kind == ReadKind::Infer)
  115. value = strtoul(nptr, &endptr, 0);
  116. if (!endptr)
  117. return false;
  118. if (endptr == nptr)
  119. return false;
  120. auto diff = endptr - nptr;
  121. VERIFY(diff > 0);
  122. lexer.ignore((size_t)diff);
  123. if (!suppress_assignment) {
  124. auto* ptr = va_arg(*ap, ApT*);
  125. *ptr = value;
  126. }
  127. return true;
  128. }
  129. };
  130. template<typename ApT, ReadKind kind>
  131. struct ReadElementConcrete<long long, ApT, kind> {
  132. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  133. {
  134. lexer.ignore_while(isspace);
  135. long long value = 0;
  136. char* endptr = nullptr;
  137. auto nptr = lexer.remaining().characters_without_null_termination();
  138. if constexpr (kind == ReadKind::Normal)
  139. value = strtoll(nptr, &endptr, 10);
  140. if constexpr (kind == ReadKind::Octal)
  141. value = strtoll(nptr, &endptr, 8);
  142. if constexpr (kind == ReadKind::Hex)
  143. value = strtoll(nptr, &endptr, 16);
  144. if constexpr (kind == ReadKind::Infer)
  145. value = strtoll(nptr, &endptr, 0);
  146. if (!endptr)
  147. return false;
  148. if (endptr == nptr)
  149. return false;
  150. auto diff = endptr - nptr;
  151. VERIFY(diff > 0);
  152. lexer.ignore((size_t)diff);
  153. if (!suppress_assignment) {
  154. auto* ptr = va_arg(*ap, ApT*);
  155. *ptr = value;
  156. }
  157. return true;
  158. }
  159. };
  160. template<typename ApT, ReadKind kind>
  161. struct ReadElementConcrete<unsigned long long, ApT, kind> {
  162. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  163. {
  164. lexer.ignore_while(isspace);
  165. unsigned long long value = 0;
  166. char* endptr = nullptr;
  167. auto nptr = lexer.remaining().characters_without_null_termination();
  168. if constexpr (kind == ReadKind::Normal)
  169. value = strtoull(nptr, &endptr, 10);
  170. if constexpr (kind == ReadKind::Octal)
  171. value = strtoull(nptr, &endptr, 8);
  172. if constexpr (kind == ReadKind::Hex)
  173. value = strtoull(nptr, &endptr, 16);
  174. if constexpr (kind == ReadKind::Infer)
  175. value = strtoull(nptr, &endptr, 0);
  176. if (!endptr)
  177. return false;
  178. if (endptr == nptr)
  179. return false;
  180. auto diff = endptr - nptr;
  181. VERIFY(diff > 0);
  182. lexer.ignore((size_t)diff);
  183. if (!suppress_assignment) {
  184. auto* ptr = va_arg(*ap, ApT*);
  185. *ptr = value;
  186. }
  187. return true;
  188. }
  189. };
  190. template<typename ApT, ReadKind kind>
  191. struct ReadElementConcrete<float, ApT, kind> {
  192. bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
  193. {
  194. lexer.ignore_while(isspace);
  195. double value = 0;
  196. char* endptr = nullptr;
  197. auto nptr = lexer.remaining().characters_without_null_termination();
  198. if constexpr (kind == ReadKind::Normal)
  199. value = strtod(nptr, &endptr);
  200. else
  201. return false;
  202. if (!endptr)
  203. return false;
  204. if (endptr == nptr)
  205. return false;
  206. auto diff = endptr - nptr;
  207. VERIFY(diff > 0);
  208. lexer.ignore((size_t)diff);
  209. if (!suppress_assignment) {
  210. auto* ptr = va_arg(*ap, ApT*);
  211. *ptr = value;
  212. }
  213. return true;
  214. }
  215. };
  216. template<typename T, ReadKind kind>
  217. struct ReadElement {
  218. bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment)
  219. {
  220. switch (length_modifier) {
  221. default:
  222. case LengthModifier::None:
  223. VERIFY_NOT_REACHED();
  224. case LengthModifier::Default:
  225. return ReadElementConcrete<T, T, kind> {}(input_lexer, ap, suppress_assignment);
  226. case LengthModifier::Char:
  227. return ReadElementConcrete<T, char, kind> {}(input_lexer, ap, suppress_assignment);
  228. case LengthModifier::Short:
  229. return ReadElementConcrete<T, short, kind> {}(input_lexer, ap, suppress_assignment);
  230. case LengthModifier::Long:
  231. if constexpr (IsSame<T, int>)
  232. return ReadElementConcrete<T, long, kind> {}(input_lexer, ap, suppress_assignment);
  233. if constexpr (IsSame<T, unsigned>)
  234. return ReadElementConcrete<T, unsigned long, kind> {}(input_lexer, ap, suppress_assignment);
  235. if constexpr (IsSame<T, float>)
  236. return ReadElementConcrete<int, double, kind> {}(input_lexer, ap, suppress_assignment);
  237. return false;
  238. case LengthModifier::LongLong:
  239. if constexpr (IsSame<T, int>)
  240. return ReadElementConcrete<long long, long long, kind> {}(input_lexer, ap, suppress_assignment);
  241. if constexpr (IsSame<T, unsigned>)
  242. return ReadElementConcrete<unsigned long long, unsigned long long, kind> {}(input_lexer, ap, suppress_assignment);
  243. if constexpr (IsSame<T, float>)
  244. return ReadElementConcrete<long long, double, kind> {}(input_lexer, ap, suppress_assignment);
  245. return false;
  246. case LengthModifier::IntMax:
  247. return ReadElementConcrete<T, intmax_t, kind> {}(input_lexer, ap, suppress_assignment);
  248. case LengthModifier::Size:
  249. return ReadElementConcrete<T, size_t, kind> {}(input_lexer, ap, suppress_assignment);
  250. case LengthModifier::PtrDiff:
  251. return ReadElementConcrete<T, ptrdiff_t, kind> {}(input_lexer, ap, suppress_assignment);
  252. case LengthModifier::LongDouble:
  253. return ReadElementConcrete<T, long double, kind> {}(input_lexer, ap, suppress_assignment);
  254. }
  255. }
  256. };
  257. template<>
  258. struct ReadElement<char*, ReadKind::Normal> {
  259. ReadElement(StringView scan_set = {}, bool invert = false)
  260. : scan_set(scan_set.is_null() ? " \t\n\f\r" : scan_set)
  261. , invert(scan_set.is_null() ? true : invert)
  262. , was_null(scan_set.is_null())
  263. {
  264. }
  265. bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment)
  266. {
  267. // FIXME: Implement wide strings and such.
  268. if (length_modifier != LengthModifier::Default)
  269. return false;
  270. if (was_null)
  271. input_lexer.ignore_while(isspace);
  272. auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); });
  273. if (str.is_empty())
  274. return false;
  275. if (!suppress_assignment) {
  276. auto* ptr = va_arg(*ap, char*);
  277. memcpy(ptr, str.characters_without_null_termination(), str.length());
  278. ptr[str.length()] = 0;
  279. }
  280. return true;
  281. }
  282. private:
  283. bool matches(char c) const
  284. {
  285. return invert ^ scan_set.contains(c);
  286. }
  287. const StringView scan_set;
  288. bool invert { false };
  289. bool was_null { false };
  290. };
  291. template<>
  292. struct ReadElement<void*, ReadKind::Normal> {
  293. bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment)
  294. {
  295. if (length_modifier != LengthModifier::Default)
  296. return false;
  297. input_lexer.ignore_while(isspace);
  298. auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); });
  299. if (count != 8) {
  300. fail:;
  301. for (size_t i = 0; i < count; ++i)
  302. input_lexer.retreat();
  303. return false;
  304. }
  305. char buf[9] { 0 };
  306. memcpy(buf, str.characters_without_null_termination(), 8);
  307. buf[8] = 0;
  308. char* endptr = nullptr;
  309. auto value = strtoull(buf, &endptr, 16);
  310. if (endptr != &buf[8])
  311. goto fail;
  312. if (!suppress_assignment) {
  313. auto* ptr = va_arg(*ap, void**);
  314. memcpy(ptr, &value, sizeof(value));
  315. }
  316. return true;
  317. }
  318. private:
  319. bool should_consume(char c)
  320. {
  321. if (count == 8)
  322. return false;
  323. if (!isxdigit(c))
  324. return false;
  325. ++count;
  326. return true;
  327. }
  328. size_t count { 0 };
  329. };
  330. extern "C" int vsscanf(char const* input, char const* format, va_list ap)
  331. {
  332. GenericLexer format_lexer { { format, strlen(format) } };
  333. GenericLexer input_lexer { { input, strlen(input) } };
  334. int elements_matched = 0;
  335. va_list copy;
  336. __builtin_va_copy(copy, ap);
  337. while (!format_lexer.is_eof()) {
  338. format_lexer.ignore_while(isspace);
  339. if (!format_lexer.next_is('%')) {
  340. read_one_literal:;
  341. input_lexer.ignore_while(isspace);
  342. if (format_lexer.is_eof())
  343. break;
  344. auto next_char = format_lexer.consume();
  345. if (!input_lexer.consume_specific(next_char))
  346. return elements_matched;
  347. continue;
  348. }
  349. if (format_lexer.next_is("%%")) {
  350. format_lexer.ignore();
  351. goto read_one_literal;
  352. }
  353. format_lexer.ignore(); // '%'
  354. bool suppress_assignment = false;
  355. if (format_lexer.next_is('*')) {
  356. suppress_assignment = true;
  357. format_lexer.ignore();
  358. }
  359. // Parse width specification
  360. [[maybe_unused]] int width_specifier = 0;
  361. if (format_lexer.next_is(isdigit)) {
  362. auto width_digits = format_lexer.consume_while([](char c) { return isdigit(c); });
  363. width_specifier = width_digits.to_int().value();
  364. // FIXME: Actually use width specifier
  365. }
  366. bool invert_scanlist = false;
  367. StringView scanlist;
  368. LengthModifier length_modifier { LengthModifier::None };
  369. ConversionSpecifier conversion_specifier { ConversionSpecifier::Unspecified };
  370. reread_lookahead:;
  371. auto format_lookahead = format_lexer.peek();
  372. if (length_modifier == LengthModifier::None) {
  373. switch (format_lookahead) {
  374. case 'h':
  375. if (format_lexer.peek(1) == 'h') {
  376. format_lexer.consume(2);
  377. length_modifier = LengthModifier::Char;
  378. } else {
  379. format_lexer.consume(1);
  380. length_modifier = LengthModifier::Short;
  381. }
  382. break;
  383. case 'l':
  384. if (format_lexer.peek(1) == 'l') {
  385. format_lexer.consume(2);
  386. length_modifier = LengthModifier::LongLong;
  387. } else {
  388. format_lexer.consume(1);
  389. length_modifier = LengthModifier::Long;
  390. }
  391. break;
  392. case 'j':
  393. format_lexer.consume();
  394. length_modifier = LengthModifier::IntMax;
  395. break;
  396. case 'z':
  397. format_lexer.consume();
  398. length_modifier = LengthModifier::Size;
  399. break;
  400. case 't':
  401. format_lexer.consume();
  402. length_modifier = LengthModifier::PtrDiff;
  403. break;
  404. case 'L':
  405. format_lexer.consume();
  406. length_modifier = LengthModifier::LongDouble;
  407. break;
  408. default:
  409. length_modifier = LengthModifier::Default;
  410. break;
  411. }
  412. goto reread_lookahead;
  413. }
  414. if (conversion_specifier == ConversionSpecifier::Unspecified) {
  415. switch (format_lookahead) {
  416. case 'd':
  417. format_lexer.consume();
  418. conversion_specifier = ConversionSpecifier::Decimal;
  419. break;
  420. case 'i':
  421. format_lexer.consume();
  422. conversion_specifier = ConversionSpecifier::Integer;
  423. break;
  424. case 'o':
  425. format_lexer.consume();
  426. conversion_specifier = ConversionSpecifier::Octal;
  427. break;
  428. case 'u':
  429. format_lexer.consume();
  430. conversion_specifier = ConversionSpecifier::Unsigned;
  431. break;
  432. case 'x':
  433. case 'X':
  434. format_lexer.consume();
  435. conversion_specifier = ConversionSpecifier::Hex;
  436. break;
  437. case 'a':
  438. case 'e':
  439. case 'f':
  440. case 'g':
  441. format_lexer.consume();
  442. conversion_specifier = ConversionSpecifier::Floating;
  443. break;
  444. case 's':
  445. format_lexer.consume();
  446. conversion_specifier = ConversionSpecifier::String;
  447. break;
  448. case '[':
  449. format_lexer.consume();
  450. scanlist = format_lexer.consume_until(']');
  451. format_lexer.ignore();
  452. if (scanlist.starts_with('^')) {
  453. scanlist = scanlist.substring_view(1);
  454. invert_scanlist = true;
  455. }
  456. conversion_specifier = ConversionSpecifier::UseScanList;
  457. break;
  458. case 'c':
  459. format_lexer.consume();
  460. conversion_specifier = ConversionSpecifier::Character;
  461. break;
  462. case 'p':
  463. format_lexer.consume();
  464. conversion_specifier = ConversionSpecifier::Pointer;
  465. break;
  466. case 'n':
  467. format_lexer.consume();
  468. conversion_specifier = ConversionSpecifier::OutputNumberOfBytes;
  469. break;
  470. case 'C':
  471. format_lexer.consume();
  472. length_modifier = LengthModifier::Long;
  473. conversion_specifier = ConversionSpecifier::Character;
  474. break;
  475. case 'S':
  476. format_lexer.consume();
  477. length_modifier = LengthModifier::Long;
  478. conversion_specifier = ConversionSpecifier::String;
  479. break;
  480. default:
  481. format_lexer.consume();
  482. conversion_specifier = ConversionSpecifier::Invalid;
  483. break;
  484. }
  485. }
  486. // Now try to read.
  487. switch (conversion_specifier) {
  488. case ConversionSpecifier::Invalid:
  489. case ConversionSpecifier::Unspecified:
  490. default:
  491. // "undefined behavior", let's be nice and crash.
  492. dbgln("Invalid conversion specifier {} in scanf!", (int)conversion_specifier);
  493. VERIFY_NOT_REACHED();
  494. case ConversionSpecifier::Decimal:
  495. if (!ReadElement<int, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  496. format_lexer.consume_all();
  497. else if (!suppress_assignment)
  498. ++elements_matched;
  499. break;
  500. case ConversionSpecifier::Integer:
  501. if (!ReadElement<int, ReadKind::Infer> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  502. format_lexer.consume_all();
  503. else if (!suppress_assignment)
  504. ++elements_matched;
  505. break;
  506. case ConversionSpecifier::Octal:
  507. if (!ReadElement<unsigned, ReadKind::Octal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  508. format_lexer.consume_all();
  509. else if (!suppress_assignment)
  510. ++elements_matched;
  511. break;
  512. case ConversionSpecifier::Unsigned:
  513. if (!ReadElement<unsigned, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  514. format_lexer.consume_all();
  515. else if (!suppress_assignment)
  516. ++elements_matched;
  517. break;
  518. case ConversionSpecifier::Hex:
  519. if (!ReadElement<unsigned, ReadKind::Hex> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  520. format_lexer.consume_all();
  521. else if (!suppress_assignment)
  522. ++elements_matched;
  523. break;
  524. case ConversionSpecifier::Floating:
  525. if (!ReadElement<float, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  526. format_lexer.consume_all();
  527. else if (!suppress_assignment)
  528. ++elements_matched;
  529. break;
  530. case ConversionSpecifier::String:
  531. if (!ReadElement<char*, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  532. format_lexer.consume_all();
  533. else if (!suppress_assignment)
  534. ++elements_matched;
  535. break;
  536. case ConversionSpecifier::UseScanList:
  537. if (!ReadElement<char*, ReadKind::Normal> { scanlist, invert_scanlist }(length_modifier, input_lexer, &copy, suppress_assignment))
  538. format_lexer.consume_all();
  539. else if (!suppress_assignment)
  540. ++elements_matched;
  541. break;
  542. case ConversionSpecifier::Character:
  543. if (!ReadElement<char, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  544. format_lexer.consume_all();
  545. else if (!suppress_assignment)
  546. ++elements_matched;
  547. break;
  548. case ConversionSpecifier::Pointer:
  549. if (!ReadElement<void*, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment))
  550. format_lexer.consume_all();
  551. else if (!suppress_assignment)
  552. ++elements_matched;
  553. break;
  554. case ConversionSpecifier::OutputNumberOfBytes: {
  555. input_lexer.ignore_while(isspace);
  556. if (!suppress_assignment) {
  557. auto* ptr = va_arg(copy, int*);
  558. *ptr = input_lexer.tell();
  559. }
  560. break;
  561. }
  562. }
  563. }
  564. va_end(copy);
  565. return elements_matched;
  566. }