Numbers.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. /*
  2. * Copyright (c) 2023, Jonatan Klemets <jonatan.r.klemets@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/GenericLexer.h>
  7. #include <LibWeb/HTML/Numbers.h>
  8. #include <LibWeb/Infra/CharacterTypes.h>
  9. #include <math.h>
  10. namespace Web::HTML {
  11. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-integers
  12. Optional<i32> parse_integer(StringView string)
  13. {
  14. // 1. Let input be the string being parsed.
  15. // 2. Let position be a pointer into input, initially pointing at the start of the string.
  16. GenericLexer lexer { string };
  17. // 3. Let sign have the value "positive".
  18. // NOTE: Skipped, see comment on step 6.
  19. // 4. Skip ASCII whitespace within input given position.
  20. lexer.ignore_while(Web::Infra::is_ascii_whitespace);
  21. // 5. If position is past the end of input, return an error.
  22. if (lexer.is_eof()) {
  23. return {};
  24. }
  25. // 6. If the character indicated by position (the first character) is a U+002D HYPHEN-MINUS character (-):
  26. //
  27. // If we parse a signed integer, then we include the sign character (if present) in the collect step
  28. // (step 8) and lean on `AK::StringUtils::convert_to_int` to handle it for us.
  29. size_t start_index = lexer.tell();
  30. if (lexer.peek() == '-' || lexer.peek() == '+') {
  31. lexer.consume();
  32. }
  33. // 7. If the character indicated by position is not an ASCII digit, then return an error.
  34. if (!lexer.next_is(is_ascii_digit)) {
  35. return {};
  36. }
  37. // 8. Collect a sequence of code points that are ASCII digits from input given position, and interpret the resulting sequence as a base-ten integer. Let value be that integer.
  38. lexer.consume_while(is_ascii_digit);
  39. size_t end_index = lexer.tell();
  40. auto digits = lexer.input().substring_view(start_index, end_index - start_index);
  41. auto optional_value = AK::StringUtils::convert_to_int<i32>(digits);
  42. // 9. If sign is "positive", return value, otherwise return the result of subtracting value from zero.
  43. // NOTE: Skipped, see comment on step 6.
  44. return optional_value;
  45. }
  46. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-negative-integers
  47. Optional<u32> parse_non_negative_integer(StringView string)
  48. {
  49. // 1. Let input be the string being parsed.
  50. // 2. Let value be the result of parsing input using the rules for parsing integers.
  51. //
  52. // NOTE: Because we call `parse_integer`, we parse all integers as signed. If we need the extra
  53. // size that an unsigned integer offers, then this would need to be improved. That said,
  54. // I don't think we need to support such large integers at the moment.
  55. auto optional_value = parse_integer(string);
  56. // 3. If value is an error, return an error.
  57. if (!optional_value.has_value()) {
  58. return {};
  59. }
  60. // 4. If value is less than zero, return an error.
  61. if (optional_value.value() < 0) {
  62. return {};
  63. }
  64. // 5. Return value.
  65. return static_cast<u32>(optional_value.value());
  66. }
  67. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-floating-point-number-values
  68. Optional<double> parse_floating_point_number(StringView string)
  69. {
  70. // 1. Let input be the string being parsed.
  71. // 2. Let position be a pointer into input, initially pointing at the start of the string.
  72. GenericLexer lexer { string };
  73. // 3. Let value have the value 1.
  74. double value = 1;
  75. // 4. Let divisor have the value 1.
  76. double divisor = 1;
  77. // 5. Let exponent have the value 1.
  78. i16 exponent = 1;
  79. // 6. Skip ASCII whitespace within input given position.
  80. lexer.ignore_while(Web::Infra::is_ascii_whitespace);
  81. // 7. If position is past the end of input, return an error.
  82. if (lexer.is_eof()) {
  83. return {};
  84. }
  85. // 8. If the character indicated by position is a U+002D HYPHEN-MINUS character (-):
  86. if (lexer.next_is('-')) {
  87. // 8.1. Change value and divisor to −1.
  88. value = -1;
  89. divisor = -1;
  90. // 8.2. Advance position to the next character.
  91. lexer.consume();
  92. // 8.3. If position is past the end of input, return an error.
  93. if (lexer.is_eof()) {
  94. return {};
  95. }
  96. }
  97. // Otherwise, if the character indicated by position (the first character) is a U+002B PLUS SIGN character (+):
  98. else if (lexer.next_is('+')) {
  99. // 8.1. Advance position to the next character. (The "+" is ignored, but it is not conforming.)
  100. lexer.consume();
  101. // 8.2. If position is past the end of input, return an error.
  102. if (lexer.is_eof()) {
  103. return {};
  104. }
  105. }
  106. // 9. If the character indicated by position is a U+002E FULL STOP (.),
  107. // and that is not the last character in input,
  108. // and the character after the character indicated by position is an ASCII digit,
  109. // then set value to zero and jump to the step labeled fraction.
  110. if (lexer.next_is('.') && (lexer.tell_remaining() > 1) && is_ascii_digit(lexer.peek(1))) {
  111. value = 0;
  112. goto fraction;
  113. }
  114. // 10. If the character indicated by position is not an ASCII digit, then return an error.
  115. if (!lexer.next_is(is_ascii_digit)) {
  116. return {};
  117. }
  118. // 11. Collect a sequence of code points that are ASCII digits from input given position, and interpret the resulting sequence as a base-ten integer.
  119. // Multiply value by that integer.
  120. {
  121. size_t start_index = lexer.tell();
  122. lexer.consume_while(is_ascii_digit);
  123. size_t end_index = lexer.tell();
  124. auto digits = lexer.input().substring_view(start_index, end_index - start_index);
  125. auto optional_value = AK::StringUtils::convert_to_floating_point<double>(digits, TrimWhitespace::No);
  126. value *= optional_value.value();
  127. }
  128. // 12. If position is past the end of input, jump to the step labeled conversion.
  129. if (lexer.is_eof()) {
  130. goto conversion;
  131. }
  132. fraction: {
  133. // 13. Fraction: If the character indicated by position is a U+002E FULL STOP (.), run these substeps:
  134. if (lexer.next_is('.')) {
  135. // 13.1. Advance position to the next character.
  136. lexer.consume();
  137. // 13.2. If position is past the end of input,
  138. // or if the character indicated by position is not an ASCII digit,
  139. // U+0065 LATIN SMALL LETTER E (e), or U+0045 LATIN CAPITAL LETTER E (E),
  140. // then jump to the step labeled conversion.
  141. if (lexer.is_eof() || (!lexer.next_is(is_ascii_digit) && !lexer.next_is('e') && !lexer.next_is('E'))) {
  142. goto conversion;
  143. }
  144. // 13.3. If the character indicated by position is a U+0065 LATIN SMALL LETTER E character (e) or a U+0045 LATIN CAPITAL LETTER E character (E),
  145. // skip the remainder of these substeps.
  146. if (lexer.next_is('e') || lexer.next_is('E')) {
  147. goto fraction_exit;
  148. }
  149. // fraction_loop:
  150. while (true) {
  151. // 13.4. Fraction loop: Multiply divisor by ten.
  152. divisor *= 10;
  153. // 13.5. Add the value of the character indicated by position, interpreted as a base-ten digit (0..9) and divided by divisor, to value.
  154. value += (lexer.peek() - '0') / divisor;
  155. // 13.6. Advance position to the next character.
  156. lexer.consume();
  157. // 13.7. If position is past the end of input, then jump to the step labeled conversion.
  158. if (lexer.is_eof()) {
  159. goto conversion;
  160. }
  161. // 13.8. If the character indicated by position is an ASCII digit, jump back to the step labeled fraction loop in these substeps.
  162. if (!lexer.next_is(is_ascii_digit)) {
  163. break;
  164. }
  165. }
  166. }
  167. fraction_exit:
  168. }
  169. // 14. If the character indicated by position is U+0065 (e) or a U+0045 (E), then:
  170. if (lexer.next_is('e') || lexer.next_is('E')) {
  171. // 14.1. Advance position to the next character.
  172. lexer.consume();
  173. // 14.2. If position is past the end of input, then jump to the step labeled conversion.
  174. if (lexer.is_eof()) {
  175. goto conversion;
  176. }
  177. // 14.3. If the character indicated by position is a U+002D HYPHEN-MINUS character (-):
  178. if (lexer.next_is('-')) {
  179. // 14.3.1. Change exponent to −1.
  180. exponent = -1;
  181. // 14.3.2. Advance position to the next character.
  182. lexer.consume();
  183. // 14.3.3. If position is past the end of input, then jump to the step labeled conversion.
  184. if (lexer.is_eof()) {
  185. goto conversion;
  186. }
  187. }
  188. // Otherwise, if the character indicated by position is a U+002B PLUS SIGN character (+):
  189. else if (lexer.next_is('+')) {
  190. // 14.3.1. Advance position to the next character.
  191. lexer.consume();
  192. // 14.3.2. If position is past the end of input, then jump to the step labeled conversion.
  193. if (lexer.is_eof()) {
  194. goto conversion;
  195. }
  196. }
  197. // 14.4. If the character indicated by position is not an ASCII digit, then jump to the step labeled conversion.
  198. if (!lexer.next_is(is_ascii_digit)) {
  199. goto conversion;
  200. }
  201. // 14.5. Collect a sequence of code points that are ASCII digits from input given position, and interpret the resulting sequence as a base-ten integer.
  202. // Multiply exponent by that integer.
  203. {
  204. size_t start_index = lexer.tell();
  205. lexer.consume_while(is_ascii_digit);
  206. size_t end_index = lexer.tell();
  207. auto digits = lexer.input().substring_view(start_index, end_index - start_index);
  208. auto optional_value = AK::StringUtils::convert_to_int<i32>(digits);
  209. exponent *= optional_value.value();
  210. }
  211. // 14.6. Multiply value by ten raised to the exponentth power.
  212. value *= pow(10, exponent);
  213. }
  214. conversion: {
  215. // 15. Conversion: Let S be the set of finite IEEE 754 double-precision floating-point values except −0,
  216. // but with two special values added: 2^1024 and −2^1024.
  217. if (!isfinite(value)) {
  218. return {};
  219. }
  220. if ((value == 0) && signbit(value)) {
  221. return 0;
  222. }
  223. // 16. Let rounded-value be the number in S that is closest to value, selecting the number with an even significand if there are two equally close values.
  224. // (The two special values 2^1024 and −2^1024 are considered to have even significands for this purpose.)
  225. double rounded_value = value;
  226. // 17. If rounded-value is 2^1024 or −2^1024, return an error.
  227. if (abs(rounded_value) >= pow(2, 1024)) {
  228. return {};
  229. }
  230. // 18. Return rounded-value.
  231. return rounded_value;
  232. }
  233. }
  234. // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-floating-point-number
  235. bool is_valid_floating_point_number(StringView string)
  236. {
  237. GenericLexer lexer { string };
  238. // 1. Optionally, a U+002D HYPHEN-MINUS character (-).
  239. lexer.consume_specific('-');
  240. // 2. One or both of the following, in the given order:
  241. // 2.1. A series of one or more ASCII digits.
  242. bool has_leading_digits = !lexer.consume_while(is_ascii_digit).is_empty();
  243. // 2.2. Both of the following, in the given order:
  244. // 2.2.1. A single U+002E FULL STOP character (.).
  245. if (lexer.consume_specific('.')) {
  246. // 2.2.2. A series of one or more ASCII digits.
  247. if (lexer.consume_while(is_ascii_digit).is_empty())
  248. return false;
  249. } else if (!has_leading_digits) {
  250. // Doesn’t begin with digits, doesn’t begin with a full stop followed by digits.
  251. return false;
  252. }
  253. // 3. Optionally:
  254. // 3.1. Either a U+0065 LATIN SMALL LETTER E character (e) or a U+0045 LATIN CAPITAL
  255. // LETTER E character (E).
  256. if (lexer.consume_specific('e') || lexer.consume_specific('E')) {
  257. // 3.2. Optionally, a U+002D HYPHEN-MINUS character (-) or U+002B PLUS SIGN
  258. // character (+).
  259. lexer.consume_specific('-') || lexer.consume_specific('+');
  260. // 3.3. A series of one or more ASCII digits.
  261. if (lexer.consume_while(is_ascii_digit).is_empty())
  262. return false;
  263. }
  264. return lexer.tell_remaining() == 0;
  265. }
  266. WebIDL::ExceptionOr<String> convert_non_negative_integer_to_string(JS::Realm& realm, WebIDL::Long value)
  267. {
  268. if (value < 0)
  269. return WebIDL::IndexSizeError::create(realm, "The attribute is limited to only non-negative numbers"_string);
  270. return String::number(value);
  271. }
  272. }