/* * Copyright (c) 2021-2023, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include #if ENABLE_UNICODE_DATA # include #endif namespace Unicode { Optional __attribute__((weak)) code_point_display_name(u32) { return {}; } Optional __attribute__((weak)) code_point_block_display_name(u32) { return {}; } Optional __attribute__((weak)) code_point_abbreviation(u32) { return {}; } u32 __attribute__((weak)) canonical_combining_class(u32) { return {}; } ReadonlySpan __attribute__((weak)) block_display_names() { return {}; } u32 __attribute__((weak)) to_unicode_lowercase(u32 code_point) { return to_ascii_lowercase(code_point); } u32 __attribute__((weak)) to_unicode_uppercase(u32 code_point) { return to_ascii_uppercase(code_point); } u32 __attribute__((weak)) to_unicode_titlecase(u32 code_point) { return to_ascii_uppercase(code_point); } template class CasefoldStringComparator { public: explicit CasefoldStringComparator(ViewType string) : m_string(string) , m_it(m_string.begin()) { } bool has_more_data() const { return !m_casefolded_code_points.is_empty() || (m_it != m_string.end()); } u32 next_code_point() { VERIFY(has_more_data()); if (m_casefolded_code_points.is_empty()) { m_current_code_point = *m_it; ++m_it; m_casefolded_code_points = Unicode::Detail::casefold_code_point(m_current_code_point); VERIFY(!m_casefolded_code_points.is_empty()); // Must at least contain the provided code point. } auto code_point = m_casefolded_code_points[0]; m_casefolded_code_points = m_casefolded_code_points.substring_view(1); return code_point; } private: ViewType m_string; typename ViewType::Iterator m_it; u32 m_current_code_point { 0 }; Utf32View m_casefolded_code_points; }; // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34145 template bool equals_ignoring_case(ViewType lhs, ViewType rhs) { // A string X is a caseless match for a string Y if and only if: // toCasefold(X) = toCasefold(Y) CasefoldStringComparator lhs_comparator { lhs }; CasefoldStringComparator rhs_comparator { rhs }; while (lhs_comparator.has_more_data() && rhs_comparator.has_more_data()) { if (lhs_comparator.next_code_point() != rhs_comparator.next_code_point()) return false; } return !lhs_comparator.has_more_data() && !rhs_comparator.has_more_data(); } template bool equals_ignoring_case(Utf8View, Utf8View); template bool equals_ignoring_case(Utf16View, Utf16View); template bool equals_ignoring_case(Utf32View, Utf32View); Optional __attribute__((weak)) general_category_from_string(StringView) { return {}; } bool __attribute__((weak)) code_point_has_general_category(u32, GeneralCategory) { return {}; } Optional __attribute__((weak)) property_from_string(StringView) { return {}; } bool __attribute__((weak)) code_point_has_property(u32, Property) { return {}; } bool is_ecma262_property([[maybe_unused]] Property property) { #if ENABLE_UNICODE_DATA // EMCA-262 only allows a subset of Unicode properties: https://tc39.es/ecma262/#table-binary-unicode-properties switch (property) { case Unicode::Property::ASCII: case Unicode::Property::ASCII_Hex_Digit: case Unicode::Property::Alphabetic: case Unicode::Property::Any: case Unicode::Property::Assigned: case Unicode::Property::Bidi_Control: case Unicode::Property::Bidi_Mirrored: case Unicode::Property::Case_Ignorable: case Unicode::Property::Cased: case Unicode::Property::Changes_When_Casefolded: case Unicode::Property::Changes_When_Casemapped: case Unicode::Property::Changes_When_Lowercased: case Unicode::Property::Changes_When_NFKC_Casefolded: case Unicode::Property::Changes_When_Titlecased: case Unicode::Property::Changes_When_Uppercased: case Unicode::Property::Dash: case Unicode::Property::Default_Ignorable_Code_Point: case Unicode::Property::Deprecated: case Unicode::Property::Diacritic: case Unicode::Property::Emoji: case Unicode::Property::Emoji_Component: case Unicode::Property::Emoji_Modifier: case Unicode::Property::Emoji_Modifier_Base: case Unicode::Property::Emoji_Presentation: case Unicode::Property::Extended_Pictographic: case Unicode::Property::Extender: case Unicode::Property::Grapheme_Base: case Unicode::Property::Grapheme_Extend: case Unicode::Property::Hex_Digit: case Unicode::Property::IDS_Binary_Operator: case Unicode::Property::IDS_Trinary_Operator: case Unicode::Property::ID_Continue: case Unicode::Property::ID_Start: case Unicode::Property::Ideographic: case Unicode::Property::Join_Control: case Unicode::Property::Logical_Order_Exception: case Unicode::Property::Lowercase: case Unicode::Property::Math: case Unicode::Property::Noncharacter_Code_Point: case Unicode::Property::Pattern_Syntax: case Unicode::Property::Pattern_White_Space: case Unicode::Property::Quotation_Mark: case Unicode::Property::Radical: case Unicode::Property::Regional_Indicator: case Unicode::Property::Sentence_Terminal: case Unicode::Property::Soft_Dotted: case Unicode::Property::Terminal_Punctuation: case Unicode::Property::Unified_Ideograph: case Unicode::Property::Uppercase: case Unicode::Property::Variation_Selector: case Unicode::Property::White_Space: case Unicode::Property::XID_Continue: case Unicode::Property::XID_Start: return true; default: return false; } #else return false; #endif } Optional