/* * Copyright (c) 2018-2021, Andreas Kling * Copyright (c) 2022, Tobias Christiansen * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include namespace Web::Layout { GC_DEFINE_ALLOCATOR(TextNode); TextNode::TextNode(DOM::Document& document, DOM::Text& text) : Node(document, &text) { } TextNode::~TextNode() = default; static bool is_all_whitespace(StringView string) { for (size_t i = 0; i < string.length(); ++i) { if (!is_ascii_space(string[i])) return false; } return true; } // https://w3c.github.io/mathml-core/#new-text-transform-values static String apply_math_auto_text_transform(String const& string) { // https://w3c.github.io/mathml-core/#italic-mappings auto map_code_point_to_italic = [](u32 code_point) -> u32 { switch (code_point) { case 0x0041: return 0x1D434; case 0x0042: return 0x1D435; case 0x0043: return 0x1D436; case 0x0044: return 0x1D437; case 0x0045: return 0x1D438; case 0x0046: return 0x1D439; case 0x0047: return 0x1D43A; case 0x0048: return 0x1D43B; case 0x0049: return 0x1D43C; case 0x004A: return 0x1D43D; case 0x004B: return 0x1D43E; case 0x004C: return 0x1D43F; case 0x004D: return 0x1D440; case 0x004E: return 0x1D441; case 0x004F: return 0x1D442; case 0x0050: return 0x1D443; case 0x0051: return 0x1D444; case 0x0052: return 0x1D445; case 0x0053: return 0x1D446; case 0x0054: return 0x1D447; case 0x0055: return 0x1D448; case 0x0056: return 0x1D449; case 0x0057: return 0x1D44A; case 0x0058: return 0x1D44B; case 0x0059: return 0x1D44C; case 0x005A: return 0x1D44D; case 0x0061: return 0x1D44E; case 0x0062: return 0x1D44F; case 0x0063: return 0x1D450; case 0x0064: return 0x1D451; case 0x0065: return 0x1D452; case 0x0066: return 0x1D453; case 0x0067: return 0x1D454; case 0x0068: return 0x0210E; case 0x0069: return 0x1D456; case 0x006A: return 0x1D457; case 0x006B: return 0x1D458; case 0x006C: return 0x1D459; case 0x006D: return 0x1D45A; case 0x006E: return 0x1D45B; case 0x006F: return 0x1D45C; case 0x0070: return 0x1D45D; case 0x0071: return 0x1D45E; case 0x0072: return 0x1D45F; case 0x0073: return 0x1D460; case 0x0074: return 0x1D461; case 0x0075: return 0x1D462; case 0x0076: return 0x1D463; case 0x0077: return 0x1D464; case 0x0078: return 0x1D465; case 0x0079: return 0x1D466; case 0x007A: return 0x1D467; case 0x0131: return 0x1D6A4; case 0x0237: return 0x1D6A5; case 0x0391: return 0x1D6E2; case 0x0392: return 0x1D6E3; case 0x0393: return 0x1D6E4; case 0x0394: return 0x1D6E5; case 0x0395: return 0x1D6E6; case 0x0396: return 0x1D6E7; case 0x0397: return 0x1D6E8; case 0x0398: return 0x1D6E9; case 0x0399: return 0x1D6EA; case 0x039A: return 0x1D6EB; case 0x039B: return 0x1D6EC; case 0x039C: return 0x1D6ED; case 0x039D: return 0x1D6EE; case 0x039E: return 0x1D6EF; case 0x039F: return 0x1D6F0; case 0x03A0: return 0x1D6F1; case 0x03A1: return 0x1D6F2; case 0x03F4: return 0x1D6F3; case 0x03A3: return 0x1D6F4; case 0x03A4: return 0x1D6F5; case 0x03A5: return 0x1D6F6; case 0x03A6: return 0x1D6F7; case 0x03A7: return 0x1D6F8; case 0x03A8: return 0x1D6F9; case 0x03A9: return 0x1D6FA; case 0x2207: return 0x1D6FB; case 0x03B1: return 0x1D6FC; case 0x03B2: return 0x1D6FD; case 0x03B3: return 0x1D6FE; case 0x03B4: return 0x1D6FF; case 0x03B5: return 0x1D700; case 0x03B6: return 0x1D701; case 0x03B7: return 0x1D702; case 0x03B8: return 0x1D703; case 0x03B9: return 0x1D704; case 0x03BA: return 0x1D705; case 0x03BB: return 0x1D706; case 0x03BC: return 0x1D707; case 0x03BD: return 0x1D708; case 0x03BE: return 0x1D709; case 0x03BF: return 0x1D70A; case 0x03C0: return 0x1D70B; case 0x03C1: return 0x1D70C; case 0x03C2: return 0x1D70D; case 0x03C3: return 0x1D70E; case 0x03C4: return 0x1D70F; case 0x03C5: return 0x1D710; case 0x03C6: return 0x1D711; case 0x03C7: return 0x1D712; case 0x03C8: return 0x1D713; case 0x03C9: return 0x1D714; case 0x2202: return 0x1D715; case 0x03F5: return 0x1D716; case 0x03D1: return 0x1D717; case 0x03F0: return 0x1D718; case 0x03D5: return 0x1D719; case 0x03F1: return 0x1D71A; case 0x03D6: return 0x1D71B; default: return code_point; } }; StringBuilder builder(string.bytes().size()); for (auto code_point : string.code_points()) builder.append_code_point(map_code_point_to_italic(code_point)); return MUST(builder.to_string()); } static String apply_text_transform(String const& string, CSS::TextTransform text_transform, Optional const& locale) { switch (text_transform) { case CSS::TextTransform::Uppercase: return MUST(string.to_uppercase(locale)); case CSS::TextTransform::Lowercase: return MUST(string.to_lowercase(locale)); case CSS::TextTransform::None: return string; case CSS::TextTransform::MathAuto: return apply_math_auto_text_transform(string); case CSS::TextTransform::Capitalize: { return MUST(string.to_titlecase(locale, TrailingCodePointTransformation::PreserveExisting)); } case CSS::TextTransform::FullSizeKana: { // FIXME: Implement this! return string; } case CSS::TextTransform::FullWidth: { return MUST(string.to_fullwidth()); } } VERIFY_NOT_REACHED(); } static bool is_segment_break(u32 code_point) { // FIXME: What code points are segment breaks? return code_point == '\n'; } // https://drafts.csswg.org/css-text/#white-space-phase-1 static String apply_white_space(String&& input, CSS::WhiteSpace white_space) { auto data_view = input.code_points(); // AD-HOC: A fast returns to avoid unnecessarily allocating a StringBuilder. bool contains_space = false; for (auto c : data_view) { if (is_ascii_space(c)) { contains_space = true; break; } } if (!contains_space) return move(input); // For each inline (including anonymous inlines; see CSS 2.1 § 9.2.2.1 Anonymous inline boxes [CSS2]) // within an inline formatting context, white space characters are processed as follows prior to line breaking // and bidi reordering, ignoring bidi formatting characters (characters with the Bidi_Control property [UAX9]) // as if they were not there: switch (white_space) { case CSS::WhiteSpace::Normal: case CSS::WhiteSpace::Nowrap: case CSS::WhiteSpace::PreLine: { // If white-space is set to normal, nowrap, or pre-line, white space characters are considered collapsible // and are processed by performing the following steps: StringBuilder builder(input.byte_count()); // TODO: We need to know if each line is first or last, so we gather up all of the lines before processing. // There's probably a smarter way to do this? Vector lines; input.code_points().for_each_split_view(is_segment_break, SplitBehavior::KeepEmpty, [&](Utf8View line) { lines.append(move(line)); }); auto collapsible_spaces_and_tabs = Utf8View(" \t"sv); for (auto i = 0u; i < lines.size(); ++i) { auto line = lines[i]; bool const is_first = i == 0; bool const is_last = i == lines.size() - 1; // 1. Any sequence of collapsible spaces and tabs immediately preceding or following a segment break is removed. // NOTE: The first line doesn't follow a segment break, and the last line doesn't precede one. if (!is_first) line = line.trim(collapsible_spaces_and_tabs, TrimMode::Left); if (!is_last) line = line.trim(collapsible_spaces_and_tabs, TrimMode::Right); // 2. Collapsible segment breaks are transformed for rendering according to the segment break transformation rules. Optional segment_break_character; if (!is_last) { // https://drafts.csswg.org/css-text/#line-break-transform // When white-space is pre, pre-wrap, break-spaces, or pre-line, segment breaks are not collapsible // and are instead transformed into a preserved line feed (U+000A). // NOTE: Only pre-line is possible here due to the earlier switch if (white_space == CSS::WhiteSpace::PreLine) { segment_break_character = '\n'; } // For other values of white-space, segment breaks are collapsible, and are collapsed as follows: else { // 1. First, any collapsible segment break immediately following another collapsible segment break is removed. // NOTE: For us, that means skipping empty lines. if (line.is_empty()) continue; // 2. Then any remaining segment break is either transformed into a space (U+0020) or removed depending on // the context before and after the break. The rules for this operation are UA-defined in this level. // TODO: What should we actually be doing here? if (!builder.string_view().ends_with(' ')) segment_break_character = ' '; } } // 3. Every collapsible tab is converted to a collapsible space (U+0020). // 4. Any collapsible space immediately following another collapsible space — even one outside the boundary of the // inline containing that space, provided both spaces are within the same inline formatting context - is collapsed // to have zero advance width. (It is invisible, but retains its soft wrap opportunity, if any.) bool just_saw_space = false; for (auto code_point : line) { // "Carriage returns (U+000D) are treated identically to spaces (U+0020) in all respects." // - https://drafts.csswg.org/css-text/#white-space-processing if (code_point == '\t' || code_point == ' ' || code_point == '\r') { if (just_saw_space) continue; just_saw_space = true; builder.append(' '); continue; } just_saw_space = false; builder.append_code_point(code_point); } // NOTE: Append the segment break from step 2, but only if it wouldn't collapse with the most recent code point. if (segment_break_character.has_value() && !builder.string_view().ends_with(*segment_break_character)) builder.append(*segment_break_character); } return builder.to_string_without_validation(); } case CSS::WhiteSpace::Pre: case CSS::WhiteSpace::PreWrap: { // If white-space is set to pre, pre-wrap, or break-spaces, any sequence of spaces is treated as a sequence // of non-breaking spaces. However, for pre-wrap, a soft wrap opportunity exists at the end of a sequence of // spaces and/or tabs, while for break-spaces, a soft wrap opportunity exists after every space and every tab. // FIXME: break-spaces // NOTE: Soft wrap opportunities are not relevant here so we just return the input. return move(input); } } VERIFY_NOT_REACHED(); } void TextNode::invalidate_text_for_rendering() { m_text_for_rendering = {}; m_grapheme_segmenter.clear(); } String const& TextNode::text_for_rendering() const { if (!m_text_for_rendering.has_value()) m_text_for_rendering = compute_text_for_rendering(); return *m_text_for_rendering; } // NOTE: This collapses whitespace into a single ASCII space if the CSS white-space property tells us to. String TextNode::compute_text_for_rendering() const { if (dom_node().is_password_input()) return MUST(String::repeated('*', dom_node().data().code_points().length())); auto const* parent_element = dom_node().parent_element(); auto const maybe_lang = parent_element ? parent_element->lang() : Optional {}; auto const lang = maybe_lang.has_value() ? maybe_lang.value() : Optional {}; auto data = apply_text_transform(dom_node().data(), computed_values().text_transform(), lang); if (data.is_empty() || (dom_node().is_editable() && !dom_node().is_uninteresting_whitespace_node())) return data; return apply_white_space(move(data), computed_values().white_space()); } Unicode::Segmenter& TextNode::grapheme_segmenter() const { if (!m_grapheme_segmenter) { m_grapheme_segmenter = document().grapheme_segmenter().clone(); m_grapheme_segmenter->set_segmented_text(text_for_rendering()); } return *m_grapheme_segmenter; } TextNode::ChunkIterator::ChunkIterator(TextNode const& text_node, bool wrap_lines, bool respect_linebreaks) : m_wrap_lines(wrap_lines) , m_respect_linebreaks(respect_linebreaks) , m_utf8_view(text_node.text_for_rendering()) , m_font_cascade_list(text_node.computed_values().font_list()) , m_grapheme_segmenter(text_node.grapheme_segmenter()) { } static Gfx::GlyphRun::TextType text_type_for_code_point(u32 code_point) { switch (Unicode::bidirectional_class(code_point)) { case Unicode::BidiClass::WhiteSpaceNeutral: case Unicode::BidiClass::BlockSeparator: case Unicode::BidiClass::SegmentSeparator: case Unicode::BidiClass::CommonNumberSeparator: case Unicode::BidiClass::DirNonSpacingMark: case Unicode::BidiClass::ArabicNumber: case Unicode::BidiClass::EuropeanNumber: case Unicode::BidiClass::EuropeanNumberSeparator: case Unicode::BidiClass::EuropeanNumberTerminator: return Gfx::GlyphRun::TextType::ContextDependent; case Unicode::BidiClass::BoundaryNeutral: case Unicode::BidiClass::OtherNeutral: case Unicode::BidiClass::FirstStrongIsolate: case Unicode::BidiClass::PopDirectionalFormat: case Unicode::BidiClass::PopDirectionalIsolate: return Gfx::GlyphRun::TextType::Common; case Unicode::BidiClass::LeftToRight: case Unicode::BidiClass::LeftToRightEmbedding: case Unicode::BidiClass::LeftToRightIsolate: case Unicode::BidiClass::LeftToRightOverride: return Gfx::GlyphRun::TextType::Ltr; case Unicode::BidiClass::RightToLeft: case Unicode::BidiClass::RightToLeftArabic: case Unicode::BidiClass::RightToLeftEmbedding: case Unicode::BidiClass::RightToLeftIsolate: case Unicode::BidiClass::RightToLeftOverride: return Gfx::GlyphRun::TextType::Rtl; default: VERIFY_NOT_REACHED(); } } Optional TextNode::ChunkIterator::next() { if (!m_peek_queue.is_empty()) return m_peek_queue.take_first(); return next_without_peek(); } Optional TextNode::ChunkIterator::peek(size_t count) { while (m_peek_queue.size() <= count) { auto next = next_without_peek(); if (!next.has_value()) return {}; m_peek_queue.append(*next); } return m_peek_queue[count]; } Optional TextNode::ChunkIterator::next_without_peek() { if (m_current_index >= m_utf8_view.byte_length()) return {}; auto current_code_point = [this]() { return *m_utf8_view.iterator_at_byte_offset_without_validation(m_current_index); }; auto next_grapheme_boundary = [this]() { return m_grapheme_segmenter.next_boundary(m_current_index).value_or(m_utf8_view.byte_length()); }; auto code_point = current_code_point(); auto start_of_chunk = m_current_index; Gfx::Font const& font = m_font_cascade_list.font_for_code_point(code_point); auto text_type = text_type_for_code_point(code_point); auto broken_on_tab = false; while (m_current_index < m_utf8_view.byte_length()) { code_point = current_code_point(); if (code_point == '\t') { if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) return result.release_value(); broken_on_tab = true; // consume any consecutive tabs while (m_current_index < m_utf8_view.byte_length() && current_code_point() == '\t') { m_current_index = next_grapheme_boundary(); } } if (&font != &m_font_cascade_list.font_for_code_point(code_point)) { if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) return result.release_value(); } if (m_respect_linebreaks && code_point == '\n') { // Newline encountered, and we're supposed to preserve them. // If we have accumulated some code points in the current chunk, commit them now and continue with the newline next time. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) return result.release_value(); // Otherwise, commit the newline! m_current_index = next_grapheme_boundary(); auto result = try_commit_chunk(start_of_chunk, m_current_index, true, broken_on_tab, font, text_type); VERIFY(result.has_value()); return result.release_value(); } if (m_wrap_lines) { if (text_type != text_type_for_code_point(code_point)) { if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) { return result.release_value(); } } if (is_ascii_space(code_point)) { // Whitespace encountered, and we're allowed to break on whitespace. // If we have accumulated some code points in the current chunk, commit them now and continue with the whitespace next time. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) { return result.release_value(); } // Otherwise, commit the whitespace! m_current_index = next_grapheme_boundary(); if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) return result.release_value(); continue; } } m_current_index = next_grapheme_boundary(); } if (start_of_chunk != m_utf8_view.byte_length()) { // Try to output whatever's left at the end of the text node. if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.byte_length(), false, broken_on_tab, font, text_type); result.has_value()) return result.release_value(); } return {}; } Optional TextNode::ChunkIterator::try_commit_chunk(size_t start, size_t end, bool has_breaking_newline, bool has_breaking_tab, Gfx::Font const& font, Gfx::GlyphRun::TextType text_type) const { if (auto byte_length = end - start; byte_length > 0) { auto chunk_view = m_utf8_view.substring_view(start, byte_length); return Chunk { .view = chunk_view, .font = font, .start = start, .length = byte_length, .has_breaking_newline = has_breaking_newline, .has_breaking_tab = has_breaking_tab, .is_all_whitespace = is_all_whitespace(chunk_view.as_string()), .text_type = text_type, }; } return {}; } GC::Ptr TextNode::create_paintable() const { return Painting::TextPaintable::create(*this, text_for_rendering()); } }