ladybird/Userland/Libraries/LibJS/Runtime/Temporal/ISO8601.cpp
Linus Groh de23f0b68c LibJS: Start fleshing out an ISO 8601 parser for Temporal
This is the start of a parser for the ISO 8601 grammar used in the
Temporal spec:
https://tc39.es/proposal-temporal/#sec-temporal-iso8601grammar

We will, on purpose, not use a generic ISO 8601 parser from AK or
similar for two reasons:

- Many AOs make specific assumptions about which productions exist and
  access them directly, even when they're part of a larger production.
- The spec says "The grammar deviates from the standard given in ISO
  8601 in the following ways:" and then lists 17 of such deviations.
  Making that work with a general purpose parser is not worth it.

The public API is not being used anywhere yet, but will be in the next
couple of commits. Likewise, the Production enum will be populated with
all the productions accessed directly (e.g. TemporalDateString).

Many thanks to Ali for showing me how to improve my initial approach
full of macros with a nice RAII helper - it's much nicer :^)

Co-Authored-By: Ali Mohammad Pur <mpfard@serenityos.org>
2021-11-20 23:10:09 +00:00

489 lines
13 KiB
C++

/*
* Copyright (c) 2021, Linus Groh <linusg@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/CharacterTypes.h>
#include <LibJS/Runtime/Temporal/ISO8601.h>
namespace JS::Temporal {
namespace Detail {
// https://tc39.es/proposal-temporal/#prod-DecimalDigit
bool ISO8601Parser::parse_decimal_digit()
{
// DecimalDigit : one of
// 0 1 2 3 4 5 6 7 8 9
if (m_state.lexer.next_is(is_ascii_digit)) {
m_state.lexer.consume();
return true;
}
return false;
}
// https://tc39.es/proposal-temporal/#prod-NonZeroDigit
bool ISO8601Parser::parse_non_zero_digit()
{
// NonZeroDigit : one of
// 1 2 3 4 5 6 7 8 9
if (m_state.lexer.next_is(is_ascii_digit) && !m_state.lexer.next_is('0')) {
m_state.lexer.consume();
return true;
}
return false;
}
// https://tc39.es/proposal-temporal/#prod-ASCIISign
bool ISO8601Parser::parse_ascii_sign()
{
// ASCIISign : one of
// + -
return m_state.lexer.consume_specific('+')
|| m_state.lexer.consume_specific('-');
}
// https://tc39.es/proposal-temporal/#prod-Sign
bool ISO8601Parser::parse_sign()
{
// Sign :
// ASCIISign
// U+2212
StateTransaction transaction { *this };
auto success = parse_ascii_sign()
|| m_state.lexer.consume_specific("\xE2\x88\x92"sv);
if (!success)
return false;
m_state.parse_result.sign = transaction.parsed_string_view();
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-Hour
bool ISO8601Parser::parse_hour()
{
// Hour :
// 0 DecimalDigit
// 1 DecimalDigit
// 20
// 21
// 22
// 23
StateTransaction transaction { *this };
if (m_state.lexer.consume_specific('0') || m_state.lexer.consume_specific('1')) {
if (!parse_decimal_digit())
return false;
} else {
auto success = m_state.lexer.consume_specific("20"sv)
|| m_state.lexer.consume_specific("21"sv)
|| m_state.lexer.consume_specific("22"sv)
|| m_state.lexer.consume_specific("23"sv);
if (!success)
return false;
}
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-MinuteSecond
bool ISO8601Parser::parse_minute_second()
{
// MinuteSecond :
// 0 DecimalDigit
// 1 DecimalDigit
// 2 DecimalDigit
// 3 DecimalDigit
// 4 DecimalDigit
// 5 DecimalDigit
StateTransaction transaction { *this };
auto success = m_state.lexer.consume_specific('0')
|| m_state.lexer.consume_specific('1')
|| m_state.lexer.consume_specific('2')
|| m_state.lexer.consume_specific('3')
|| m_state.lexer.consume_specific('4')
|| m_state.lexer.consume_specific('5');
if (!success)
return false;
if (!parse_decimal_digit())
return false;
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-DecimalSeparator
bool ISO8601Parser::parse_decimal_separator()
{
// DecimalSeparator : one of
// . ,
return m_state.lexer.consume_specific('.')
|| m_state.lexer.consume_specific(',');
}
// https://tc39.es/proposal-temporal/#prod-DateTimeSeparator
bool ISO8601Parser::parse_date_time_separator()
{
// DateTimeSeparator :
// <SP>
// T
// t
return m_state.lexer.consume_specific(' ')
|| m_state.lexer.consume_specific('T')
|| m_state.lexer.consume_specific('t');
}
// https://tc39.es/proposal-temporal/#prod-DateYear
bool ISO8601Parser::parse_date_year()
{
// DateFourDigitYear :
// DecimalDigit DecimalDigit DecimalDigit DecimalDigit
// DateExtendedYear :
// Sign DecimalDigit DecimalDigit DecimalDigit DecimalDigit DecimalDigit DecimalDigit
// DateYear :
// DateFourDigitYear
// DateExtendedYear
StateTransaction transaction { *this };
if (parse_sign()) {
for (size_t i = 0; i < 6; ++i) {
if (!parse_decimal_digit())
return false;
}
} else {
for (size_t i = 0; i < 4; ++i) {
if (!parse_decimal_digit())
return false;
}
}
m_state.parse_result.date_year = transaction.parsed_string_view();
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-DateMonth
bool ISO8601Parser::parse_date_month()
{
// DateMonth :
// 0 NonZeroDigit
// 10
// 11
// 12
StateTransaction transaction { *this };
if (m_state.lexer.consume_specific('0')) {
if (!parse_non_zero_digit())
return false;
} else {
auto success = m_state.lexer.consume_specific("10"sv)
|| m_state.lexer.consume_specific("11"sv)
|| m_state.lexer.consume_specific("12"sv);
if (!success)
return false;
}
m_state.parse_result.date_month = transaction.parsed_string_view();
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-DateDay
bool ISO8601Parser::parse_date_day()
{
// DateDay :
// 0 NonZeroDigit
// 1 DecimalDigit
// 2 DecimalDigit
// 30
// 31
StateTransaction transaction { *this };
if (m_state.lexer.consume_specific('0')) {
if (!parse_non_zero_digit())
return false;
} else if (m_state.lexer.consume_specific('1') || m_state.lexer.consume_specific('2')) {
if (!parse_decimal_digit())
return false;
} else {
auto success = m_state.lexer.consume_specific("30"sv)
|| m_state.lexer.consume_specific("31"sv);
if (!success)
return false;
}
m_state.parse_result.date_day = transaction.parsed_string_view();
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-Date
bool ISO8601Parser::parse_date()
{
// Date :
// DateYear - DateMonth - DateDay
// DateYear DateMonth DateDay
StateTransaction transaction { *this };
if (!parse_date_year())
return false;
auto with_dashes = m_state.lexer.consume_specific('-');
if (!parse_date_month())
return false;
if (with_dashes && !m_state.lexer.consume_specific('-'))
return false;
if (!parse_date_day())
return false;
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-TimeHour
bool ISO8601Parser::parse_time_hour()
{
// TimeHour :
// Hour
StateTransaction transaction { *this };
if (!parse_hour())
return false;
m_state.parse_result.time_hour = transaction.parsed_string_view();
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-TimeMinute
bool ISO8601Parser::parse_time_minute()
{
// TimeMinute :
// MinuteSecond
StateTransaction transaction { *this };
if (!parse_minute_second())
return false;
m_state.parse_result.time_minute = transaction.parsed_string_view();
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-TimeSecond
bool ISO8601Parser::parse_time_second()
{
// TimeSecond :
// MinuteSecond
// 60
StateTransaction transaction { *this };
auto success = parse_minute_second()
|| m_state.lexer.consume_specific("60"sv);
if (!success)
return false;
m_state.parse_result.time_second = transaction.parsed_string_view();
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-FractionalPart
bool ISO8601Parser::parse_fractional_part()
{
// FractionalPart :
// DecimalDigit DecimalDigit[opt] DecimalDigit[opt] DecimalDigit[opt] DecimalDigit[opt] DecimalDigit[opt] DecimalDigit[opt] DecimalDigit[opt] DecimalDigit[opt]
if (!parse_decimal_digit())
return false;
for (size_t i = 0; i < 8; ++i) {
if (!parse_decimal_digit())
break;
}
return true;
}
// https://tc39.es/proposal-temporal/#prod-TimeFractionalPart
bool ISO8601Parser::parse_time_fractional_part()
{
// TimeFractionalPart :
// FractionalPart
StateTransaction transaction { *this };
if (!parse_fractional_part())
return false;
m_state.parse_result.time_fractional_part = transaction.parsed_string_view();
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-Fraction
bool ISO8601Parser::parse_fraction()
{
// Fraction :
// DecimalSeparator TimeFractionalPart
StateTransaction transaction { *this };
if (!parse_decimal_separator())
return false;
if (!parse_time_fractional_part())
return false;
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-TimeFraction
bool ISO8601Parser::parse_time_fraction()
{
// TimeFraction :
// Fraction
return parse_fraction();
}
// https://tc39.es/proposal-temporal/#prod-TimeZoneOffsetRequired
bool ISO8601Parser::parse_time_zone_offset_required()
{
// TimeZoneOffsetRequired :
// TimeZoneUTCOffset TimeZoneBracketedAnnotation[opt]
return false;
}
// https://tc39.es/proposal-temporal/#prod-TimeZoneNameRequired
bool ISO8601Parser::parse_time_zone_name_required()
{
// TimeZoneNameRequired :
// TimeZoneUTCOffset[opt] TimeZoneBracketedAnnotation
return false;
}
// https://tc39.es/proposal-temporal/#prod-TimeZone
bool ISO8601Parser::parse_time_zone()
{
// TimeZone :
// TimeZoneOffsetRequired
// TimeZoneNameRequired
return parse_time_zone_offset_required()
|| parse_time_zone_name_required();
}
// https://tc39.es/proposal-temporal/#prod-CalendarName
bool ISO8601Parser::parse_calendar_name()
{
// CalChar :
// Alpha
// DecimalDigit
// CalendarNameComponent :
// CalChar CalChar CalChar CalChar[opt] CalChar[opt] CalChar[opt] CalChar[opt] CalChar[opt]
// CalendarNameTail :
// CalendarNameComponent
// CalendarNameComponent - CalendarNameTail
// CalendarName :
// CalendarNameTail
auto parse_calendar_name_component = [&] {
for (size_t i = 0; i < 8; ++i) {
if (!m_state.lexer.next_is(is_ascii_alphanumeric))
return i > 2;
m_state.lexer.consume();
}
return true;
};
StateTransaction transaction { *this };
do {
if (!parse_calendar_name_component())
return false;
} while (m_state.lexer.consume_specific('-'));
m_state.parse_result.calendar_name = transaction.parsed_string_view();
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-Calendar
bool ISO8601Parser::parse_calendar()
{
// Calendar :
// [u-ca= CalendarName ]
StateTransaction transaction { *this };
if (!m_state.lexer.consume_specific("[u-ca="sv))
return false;
if (!parse_calendar_name())
return false;
if (!m_state.lexer.consume_specific(']'))
return false;
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-TimeSpec
bool ISO8601Parser::parse_time_spec()
{
// TimeSpec :
// TimeHour
// TimeHour : TimeMinute
// TimeHour TimeMinute
// TimeHour : TimeMinute : TimeSecond TimeFraction[opt]
// TimeHour TimeMinute TimeSecond TimeFraction[opt]
StateTransaction transaction { *this };
if (!parse_time_hour())
return false;
if (m_state.lexer.consume_specific(':')) {
if (!parse_time_minute())
return false;
if (m_state.lexer.consume_specific(':')) {
if (!parse_time_second())
return false;
(void)parse_time_fraction();
}
} else if (parse_time_minute()) {
if (parse_time_second())
(void)parse_time_fraction();
}
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-TimeSpecSeparator
bool ISO8601Parser::parse_time_spec_separator()
{
// TimeSpecSeparator :
// DateTimeSeparator TimeSpec
StateTransaction transaction { *this };
if (!parse_date_time_separator())
return false;
if (!parse_time_spec())
return false;
transaction.commit();
return true;
}
// https://tc39.es/proposal-temporal/#prod-DateTime
bool ISO8601Parser::parse_date_time()
{
// DateTime :
// Date TimeSpecSeparator[opt] TimeZone[opt]
if (!parse_date())
return false;
(void)parse_time_spec_separator();
(void)parse_time_zone();
return true;
}
// https://tc39.es/proposal-temporal/#prod-CalendarDateTime
bool ISO8601Parser::parse_calendar_date_time()
{
// CalendarDateTime :
// DateTime Calendar[opt]
if (!parse_date_time())
return false;
(void)parse_calendar();
return true;
}
}
#define JS_ENUMERATE_ISO8601_PRODUCTION_PARSERS
Optional<ParseResult> parse_iso8601(Production production, StringView input)
{
auto parser = Detail::ISO8601Parser { input };
switch (production) {
#define __JS_ENUMERATE(ProductionName, parse_production) \
case Production::ProductionName: \
if (!parser.parse_production()) \
return {}; \
break;
JS_ENUMERATE_ISO8601_PRODUCTION_PARSERS
#undef __JS_ENUMERATE
default:
VERIFY_NOT_REACHED();
}
// If we parsed successfully but didn't reach the end, the string doesn't match the given production.
if (!parser.lexer().is_eof())
return {};
return parser.parse_result();
}
}