Numbers.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. /*
  2. * Copyright (c) 2023, Jonatan Klemets <jonatan.r.klemets@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/GenericLexer.h>
  7. #include <LibWeb/HTML/Numbers.h>
  8. #include <LibWeb/Infra/CharacterTypes.h>
  9. #include <math.h>
  10. namespace Web::HTML {
  11. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-integers
  12. Optional<StringView> parse_integer_digits(StringView string)
  13. {
  14. // 1. Let input be the string being parsed.
  15. // 2. Let position be a pointer into input, initially pointing at the start of the string.
  16. GenericLexer lexer { string };
  17. // 3. Let sign have the value "positive".
  18. // NOTE: Skipped, see comment on step 6.
  19. // 4. Skip ASCII whitespace within input given position.
  20. lexer.ignore_while(Web::Infra::is_ascii_whitespace);
  21. // 5. If position is past the end of input, return an error.
  22. if (lexer.is_eof()) {
  23. return OptionalNone {};
  24. }
  25. // 6. If the character indicated by position (the first character) is a U+002D HYPHEN-MINUS character (-):
  26. //
  27. // If we parse a signed integer, then we include the sign character (if present) in the collect step
  28. // (step 8) and lean on `AK::StringUtils::convert_to_int` to handle it for us.
  29. size_t start_index = lexer.tell();
  30. if (lexer.peek() == '-' || lexer.peek() == '+') {
  31. lexer.consume();
  32. }
  33. // 7. If the character indicated by position is not an ASCII digit, then return an error.
  34. if (!lexer.next_is(is_ascii_digit)) {
  35. return OptionalNone {};
  36. }
  37. // 8. Collect a sequence of code points that are ASCII digits from input given position, and interpret the resulting sequence as a base-ten integer. Let value be that integer.
  38. // NOTE: Integer conversion is performed by the caller.
  39. lexer.consume_while(is_ascii_digit);
  40. size_t end_index = lexer.tell();
  41. auto digits = lexer.input().substring_view(start_index, end_index - start_index);
  42. // 9. If sign is "positive", return value, otherwise return the result of subtracting value from zero.
  43. // NOTE: Skipped, see comment on step 6.
  44. return digits;
  45. }
  46. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-integers
  47. Optional<i32> parse_integer(StringView string)
  48. {
  49. auto optional_digits = parse_integer_digits(string);
  50. if (!optional_digits.has_value())
  51. return {};
  52. return optional_digits->to_number<i32>(TrimWhitespace::No);
  53. }
  54. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-negative-integers
  55. Optional<StringView> parse_non_negative_integer_digits(StringView string)
  56. {
  57. // 1. Let input be the string being parsed.
  58. // 2. Let value be the result of parsing input using the rules for parsing integers.
  59. //
  60. // NOTE: Because we call `parse_integer`, we parse all integers as signed. If we need the extra
  61. // size that an unsigned integer offers, then this would need to be improved. That said,
  62. // I don't think we need to support such large integers at the moment.
  63. auto optional_integer_digits = parse_integer_digits(string);
  64. // 3. If value is an error, return an error.
  65. if (!optional_integer_digits.has_value())
  66. return OptionalNone {};
  67. // 4. If value is less than zero, return an error.
  68. if (optional_integer_digits->length() > 1 && optional_integer_digits->starts_with('-') && optional_integer_digits->bytes().at(1) != '0')
  69. return OptionalNone {};
  70. // 5. Return value.
  71. // NOTE: Integer conversion is performed by the caller.
  72. return optional_integer_digits;
  73. }
  74. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-negative-integers
  75. Optional<u32> parse_non_negative_integer(StringView string)
  76. {
  77. auto optional_digits = parse_non_negative_integer_digits(string);
  78. if (!optional_digits.has_value())
  79. return {};
  80. auto optional_value = optional_digits->to_number<i64>(TrimWhitespace::No);
  81. if (!optional_value.has_value() || *optional_value > NumericLimits<u32>::max())
  82. return {};
  83. return static_cast<u32>(optional_value.value());
  84. }
  85. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-floating-point-number-values
  86. Optional<double> parse_floating_point_number(StringView string)
  87. {
  88. // 1. Let input be the string being parsed.
  89. // 2. Let position be a pointer into input, initially pointing at the start of the string.
  90. GenericLexer lexer { string };
  91. // 3. Let value have the value 1.
  92. double value = 1;
  93. // 4. Let divisor have the value 1.
  94. double divisor = 1;
  95. // 5. Let exponent have the value 1.
  96. i16 exponent = 1;
  97. // 6. Skip ASCII whitespace within input given position.
  98. lexer.ignore_while(Web::Infra::is_ascii_whitespace);
  99. // 7. If position is past the end of input, return an error.
  100. if (lexer.is_eof()) {
  101. return {};
  102. }
  103. // 8. If the character indicated by position is a U+002D HYPHEN-MINUS character (-):
  104. if (lexer.next_is('-')) {
  105. // 8.1. Change value and divisor to −1.
  106. value = -1;
  107. divisor = -1;
  108. // 8.2. Advance position to the next character.
  109. lexer.consume();
  110. // 8.3. If position is past the end of input, return an error.
  111. if (lexer.is_eof()) {
  112. return {};
  113. }
  114. }
  115. // Otherwise, if the character indicated by position (the first character) is a U+002B PLUS SIGN character (+):
  116. else if (lexer.next_is('+')) {
  117. // 8.1. Advance position to the next character. (The "+" is ignored, but it is not conforming.)
  118. lexer.consume();
  119. // 8.2. If position is past the end of input, return an error.
  120. if (lexer.is_eof()) {
  121. return {};
  122. }
  123. }
  124. // 9. If the character indicated by position is a U+002E FULL STOP (.),
  125. // and that is not the last character in input,
  126. // and the character after the character indicated by position is an ASCII digit,
  127. // then set value to zero and jump to the step labeled fraction.
  128. if (lexer.next_is('.') && (lexer.tell_remaining() > 1) && is_ascii_digit(lexer.peek(1))) {
  129. value = 0;
  130. goto fraction;
  131. }
  132. // 10. If the character indicated by position is not an ASCII digit, then return an error.
  133. if (!lexer.next_is(is_ascii_digit)) {
  134. return {};
  135. }
  136. // 11. Collect a sequence of code points that are ASCII digits from input given position, and interpret the resulting sequence as a base-ten integer.
  137. // Multiply value by that integer.
  138. {
  139. size_t start_index = lexer.tell();
  140. lexer.consume_while(is_ascii_digit);
  141. size_t end_index = lexer.tell();
  142. auto digits = lexer.input().substring_view(start_index, end_index - start_index);
  143. auto optional_value = AK::StringUtils::convert_to_floating_point<double>(digits, TrimWhitespace::No);
  144. value *= optional_value.value();
  145. }
  146. // 12. If position is past the end of input, jump to the step labeled conversion.
  147. if (lexer.is_eof()) {
  148. goto conversion;
  149. }
  150. fraction: {
  151. // 13. Fraction: If the character indicated by position is a U+002E FULL STOP (.), run these substeps:
  152. if (lexer.next_is('.')) {
  153. // 13.1. Advance position to the next character.
  154. lexer.consume();
  155. // 13.2. If position is past the end of input,
  156. // or if the character indicated by position is not an ASCII digit,
  157. // U+0065 LATIN SMALL LETTER E (e), or U+0045 LATIN CAPITAL LETTER E (E),
  158. // then jump to the step labeled conversion.
  159. if (lexer.is_eof() || (!lexer.next_is(is_ascii_digit) && !lexer.next_is('e') && !lexer.next_is('E'))) {
  160. goto conversion;
  161. }
  162. // 13.3. If the character indicated by position is a U+0065 LATIN SMALL LETTER E character (e) or a U+0045 LATIN CAPITAL LETTER E character (E),
  163. // skip the remainder of these substeps.
  164. if (lexer.next_is('e') || lexer.next_is('E')) {
  165. goto fraction_exit;
  166. }
  167. // fraction_loop:
  168. while (true) {
  169. // 13.4. Fraction loop: Multiply divisor by ten.
  170. divisor *= 10;
  171. // 13.5. Add the value of the character indicated by position, interpreted as a base-ten digit (0..9) and divided by divisor, to value.
  172. value += (lexer.peek() - '0') / divisor;
  173. // 13.6. Advance position to the next character.
  174. lexer.consume();
  175. // 13.7. If position is past the end of input, then jump to the step labeled conversion.
  176. if (lexer.is_eof()) {
  177. goto conversion;
  178. }
  179. // 13.8. If the character indicated by position is an ASCII digit, jump back to the step labeled fraction loop in these substeps.
  180. if (!lexer.next_is(is_ascii_digit)) {
  181. break;
  182. }
  183. }
  184. }
  185. fraction_exit:
  186. }
  187. // 14. If the character indicated by position is U+0065 (e) or a U+0045 (E), then:
  188. if (lexer.next_is('e') || lexer.next_is('E')) {
  189. // 14.1. Advance position to the next character.
  190. lexer.consume();
  191. // 14.2. If position is past the end of input, then jump to the step labeled conversion.
  192. if (lexer.is_eof()) {
  193. goto conversion;
  194. }
  195. // 14.3. If the character indicated by position is a U+002D HYPHEN-MINUS character (-):
  196. if (lexer.next_is('-')) {
  197. // 14.3.1. Change exponent to −1.
  198. exponent = -1;
  199. // 14.3.2. Advance position to the next character.
  200. lexer.consume();
  201. // 14.3.3. If position is past the end of input, then jump to the step labeled conversion.
  202. if (lexer.is_eof()) {
  203. goto conversion;
  204. }
  205. }
  206. // Otherwise, if the character indicated by position is a U+002B PLUS SIGN character (+):
  207. else if (lexer.next_is('+')) {
  208. // 14.3.1. Advance position to the next character.
  209. lexer.consume();
  210. // 14.3.2. If position is past the end of input, then jump to the step labeled conversion.
  211. if (lexer.is_eof()) {
  212. goto conversion;
  213. }
  214. }
  215. // 14.4. If the character indicated by position is not an ASCII digit, then jump to the step labeled conversion.
  216. if (!lexer.next_is(is_ascii_digit)) {
  217. goto conversion;
  218. }
  219. // 14.5. Collect a sequence of code points that are ASCII digits from input given position, and interpret the resulting sequence as a base-ten integer.
  220. // Multiply exponent by that integer.
  221. {
  222. size_t start_index = lexer.tell();
  223. lexer.consume_while(is_ascii_digit);
  224. size_t end_index = lexer.tell();
  225. auto digits = lexer.input().substring_view(start_index, end_index - start_index);
  226. auto optional_value = AK::StringUtils::convert_to_int<i32>(digits);
  227. exponent *= optional_value.value();
  228. }
  229. // 14.6. Multiply value by ten raised to the exponentth power.
  230. value *= pow(10, exponent);
  231. }
  232. conversion: {
  233. // 15. Conversion: Let S be the set of finite IEEE 754 double-precision floating-point values except −0,
  234. // but with two special values added: 2^1024 and −2^1024.
  235. if (!isfinite(value)) {
  236. return {};
  237. }
  238. if ((value == 0) && signbit(value)) {
  239. return 0;
  240. }
  241. // 16. Let rounded-value be the number in S that is closest to value, selecting the number with an even significand if there are two equally close values.
  242. // (The two special values 2^1024 and −2^1024 are considered to have even significands for this purpose.)
  243. double rounded_value = value;
  244. // 17. If rounded-value is 2^1024 or −2^1024, return an error.
  245. if (abs(rounded_value) >= pow(2, 1024)) {
  246. return {};
  247. }
  248. // 18. Return rounded-value.
  249. return rounded_value;
  250. }
  251. }
  252. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-floating-point-number
  253. bool is_valid_floating_point_number(StringView string)
  254. {
  255. GenericLexer lexer { string };
  256. // 1. Optionally, a U+002D HYPHEN-MINUS character (-).
  257. lexer.consume_specific('-');
  258. // 2. One or both of the following, in the given order:
  259. // 2.1. A series of one or more ASCII digits.
  260. bool has_leading_digits = !lexer.consume_while(is_ascii_digit).is_empty();
  261. // 2.2. Both of the following, in the given order:
  262. // 2.2.1. A single U+002E FULL STOP character (.).
  263. if (lexer.consume_specific('.')) {
  264. // 2.2.2. A series of one or more ASCII digits.
  265. if (lexer.consume_while(is_ascii_digit).is_empty())
  266. return false;
  267. } else if (!has_leading_digits) {
  268. // Doesn’t begin with digits, doesn’t begin with a full stop followed by digits.
  269. return false;
  270. }
  271. // 3. Optionally:
  272. // 3.1. Either a U+0065 LATIN SMALL LETTER E character (e) or a U+0045 LATIN CAPITAL
  273. // LETTER E character (E).
  274. if (lexer.consume_specific('e') || lexer.consume_specific('E')) {
  275. // 3.2. Optionally, a U+002D HYPHEN-MINUS character (-) or U+002B PLUS SIGN
  276. // character (+).
  277. lexer.consume_specific('-') || lexer.consume_specific('+');
  278. // 3.3. A series of one or more ASCII digits.
  279. if (lexer.consume_while(is_ascii_digit).is_empty())
  280. return false;
  281. }
  282. return lexer.tell_remaining() == 0;
  283. }
  284. WebIDL::ExceptionOr<String> convert_non_negative_integer_to_string(JS::Realm& realm, WebIDL::Long value)
  285. {
  286. if (value < 0)
  287. return WebIDL::IndexSizeError::create(realm, "The attribute is limited to only non-negative numbers"_string);
  288. return String::number(value);
  289. }
  290. }