LibCore: Port DateTime::parse to GenericLexer

The main motivation is to get rid of pesky `strtol`, which requires zero
termination of a `string` argument. This also somewhat simplifies the
code.
This commit is contained in:
Dan Klishch 2023-10-30 15:56:43 -04:00 committed by Daniel Bertalan
parent b65d281bbb
commit 568e5d5631
Notes: sideshowbarker 2024-07-16 23:13:25 +09:00

View file

@ -6,6 +6,7 @@
#include <AK/CharacterTypes.h>
#include <AK/DateConstants.h>
#include <AK/GenericLexer.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/Time.h>
@ -286,7 +287,6 @@ DeprecatedString DateTime::to_deprecated_string(StringView format) const
Optional<DateTime> DateTime::parse(StringView format, DeprecatedString const& string)
{
unsigned format_pos = 0;
unsigned string_pos = 0;
struct tm tm = {};
tm.tm_isdst = -1;
@ -294,55 +294,47 @@ Optional<DateTime> DateTime::parse(StringView format, DeprecatedString const& st
auto parsing_failed = false;
auto tm_represents_utc_time = false;
GenericLexer string_lexer(string);
auto parse_number = [&] {
if (string_pos >= string.length()) {
auto result = string_lexer.consume_decimal_integer<int>();
if (result.is_error()) {
parsing_failed = true;
return 0;
}
char* end_ptr = nullptr;
errno = 0;
int number = strtol(string.characters() + string_pos, &end_ptr, 10);
auto chars_parsed = end_ptr - (string.characters() + string_pos);
if (chars_parsed == 0 || errno != 0)
parsing_failed = true;
else
string_pos += chars_parsed;
return number;
return result.value();
};
auto consume = [&](char x) {
if (string_pos >= string.length()) {
auto consume = [&](char c) {
if (!string_lexer.consume_specific(c))
parsing_failed = true;
return;
};
auto consume_specific_ascii_case_insensitive = [&](StringView name) {
auto next_string = string_lexer.peek_string(name.length());
if (next_string.has_value() && next_string->equals_ignoring_ascii_case(name)) {
string_lexer.consume(name.length());
return true;
}
if (string[string_pos] != x)
parsing_failed = true;
else
string_pos++;
return false;
};
while (format_pos < format.length() && string_pos < string.length()) {
while (format_pos < format.length() && !string_lexer.is_eof()) {
if (format[format_pos] != '%') {
if (format[format_pos] != string[string_pos]) {
return {};
}
consume(format[format_pos]);
format_pos++;
string_pos++;
continue;
}
format_pos++;
if (format_pos == format.length()) {
if (format_pos == format.length())
return {};
}
switch (format[format_pos]) {
case 'a': {
auto wday = 0;
for (auto name : short_day_names) {
if (string.substring_view(string_pos).starts_with(name, AK::CaseSensitivity::CaseInsensitive)) {
string_pos += name.length();
if (consume_specific_ascii_case_insensitive(name)) {
tm.tm_wday = wday;
break;
}
@ -355,8 +347,7 @@ Optional<DateTime> DateTime::parse(StringView format, DeprecatedString const& st
case 'A': {
auto wday = 0;
for (auto name : long_day_names) {
if (string.substring_view(string_pos).starts_with(name, AK::CaseSensitivity::CaseInsensitive)) {
string_pos += name.length();
if (consume_specific_ascii_case_insensitive(name)) {
tm.tm_wday = wday;
break;
}
@ -370,8 +361,7 @@ Optional<DateTime> DateTime::parse(StringView format, DeprecatedString const& st
case 'b': {
auto mon = 0;
for (auto name : short_month_names) {
if (string.substring_view(string_pos).starts_with(name, AK::CaseSensitivity::CaseInsensitive)) {
string_pos += name.length();
if (consume_specific_ascii_case_insensitive(name)) {
tm.tm_mon = mon;
break;
}
@ -384,8 +374,7 @@ Optional<DateTime> DateTime::parse(StringView format, DeprecatedString const& st
case 'B': {
auto mon = 0;
for (auto name : long_month_names) {
if (string.substring_view(string_pos).starts_with(name, AK::CaseSensitivity::CaseInsensitive)) {
string_pos += name.length();
if (consume_specific_ascii_case_insensitive(name)) {
tm.tm_mon = mon;
break;
}
@ -400,10 +389,9 @@ Optional<DateTime> DateTime::parse(StringView format, DeprecatedString const& st
tm.tm_year = (num - 19) * 100;
break;
}
case 'd': {
case 'd':
tm.tm_mday = parse_number();
break;
}
case 'D': {
int mon = parse_number();
consume('/');
@ -415,64 +403,52 @@ Optional<DateTime> DateTime::parse(StringView format, DeprecatedString const& st
tm.tm_year = (year + 1900) % 100;
break;
}
case 'e': {
case 'e':
tm.tm_mday = parse_number();
break;
}
case 'H': {
case 'H':
tm.tm_hour = parse_number();
break;
}
case 'I': {
int num = parse_number();
tm.tm_hour = num % 12;
break;
}
case 'j': {
case 'j':
// a little trickery here... we can get mktime() to figure out mon and mday using out of range values.
// yday is not used so setting it is pointless.
tm.tm_mday = parse_number();
tm.tm_mon = 0;
mktime(&tm);
break;
}
case 'm': {
int num = parse_number();
tm.tm_mon = num - 1;
break;
}
case 'M': {
case 'M':
tm.tm_min = parse_number();
break;
}
case 'n':
case 't':
while (is_ascii_blank(string[string_pos])) {
string_pos++;
}
string_lexer.consume_while(is_ascii_blank);
break;
case 'r':
case 'p': {
auto ampm = string.substring_view(string_pos, 2);
if (ampm == "PM" && tm.tm_hour < 12) {
auto ampm = string_lexer.consume(2);
if (ampm == "PM") {
if (tm.tm_hour < 12)
tm.tm_hour += 12;
} else if (ampm != "AM") {
return {};
}
string_pos += 2;
break;
}
case 'r': {
auto ampm = string.substring_view(string_pos, 2);
if (ampm == "PM" && tm.tm_hour < 12) {
tm.tm_hour += 12;
}
string_pos += 2;
break;
}
case 'R': {
case 'R':
tm.tm_hour = parse_number();
consume(':');
tm.tm_min = parse_number();
break;
}
case 'S':
tm.tm_sec = parse_number();
break;
@ -498,26 +474,22 @@ Optional<DateTime> DateTime::parse(StringView format, DeprecatedString const& st
}
case 'z': {
tm_represents_utc_time = true;
if (string[string_pos] == 'Z') {
if (string_lexer.consume_specific('Z')) {
// UTC time
string_pos++;
break;
}
int sign;
if (string[string_pos] == '+')
if (string_lexer.consume_specific('+'))
sign = -1;
else if (string[string_pos] == '-')
else if (string_lexer.consume_specific('-'))
sign = +1;
else
return {};
string_pos++;
auto hours = parse_number();
int minutes;
if (string_pos < string.length() && string[string_pos] == ':') {
string_pos++;
if (string_lexer.consume_specific(':')) {
minutes = parse_number();
} else {
minutes = hours % 100;
@ -529,25 +501,21 @@ Optional<DateTime> DateTime::parse(StringView format, DeprecatedString const& st
break;
}
case '%':
if (string[string_pos] != '%') {
return {};
}
string_pos += 1;
consume('%');
break;
default:
parsing_failed = true;
break;
}
if (parsing_failed) {
if (parsing_failed)
return {};
}
format_pos++;
}
if (string_pos != string.length() || format_pos != format.length()) {
if (!string_lexer.is_eof() || format_pos != format.length())
return {};
}
// If an explicit timezone was present, the time in tm was shifted to UTC.
// Convert it to local time, since that is what `mktime` expects.