LibWeb: Port text segmentation to the ICU text segmenter

This commit is contained in:
Timothy Flynn 2024-06-19 09:02:21 -04:00 committed by Andreas Kling
parent 14071c52f9
commit 12f177e9e9
Notes: sideshowbarker 2024-07-17 03:27:40 +09:00
5 changed files with 40 additions and 20 deletions

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibLocale/Segmenter.h>
#include <LibWeb/Bindings/CharacterDataPrototype.h>
#include <LibWeb/DOM/CharacterData.h>
#include <LibWeb/DOM/Document.h>
@ -22,6 +23,8 @@ CharacterData::CharacterData(Document& document, NodeType type, String const& da
{
}
CharacterData::~CharacterData() = default;
void CharacterData::initialize(JS::Realm& realm)
{
Base::initialize(realm);
@ -124,6 +127,10 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
static_cast<Layout::TextNode&>(*layout_node).invalidate_text_for_rendering();
document().set_needs_layout();
if (m_segmenter)
m_segmenter->set_segmented_text(m_data);
return {};
}
@ -148,4 +155,14 @@ WebIDL::ExceptionOr<void> CharacterData::delete_data(size_t offset, size_t count
return replace_data(offset, count, String {});
}
Locale::Segmenter& CharacterData::segmenter()
{
if (!m_segmenter) {
m_segmenter = Locale::Segmenter::create(Locale::SegmenterGranularity::Grapheme);
m_segmenter->set_segmented_text(m_data);
}
return *m_segmenter;
}
}

View file

@ -7,6 +7,7 @@
#pragma once
#include <AK/String.h>
#include <LibLocale/Forward.h>
#include <LibWeb/DOM/ChildNode.h>
#include <LibWeb/DOM/Node.h>
#include <LibWeb/DOM/NonDocumentTypeChildNode.h>
@ -22,7 +23,7 @@ class CharacterData
JS_DECLARE_ALLOCATOR(CharacterData);
public:
virtual ~CharacterData() override = default;
virtual ~CharacterData() override;
String const& data() const { return m_data; }
void set_data(String const&);
@ -40,6 +41,8 @@ public:
WebIDL::ExceptionOr<void> delete_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units);
WebIDL::ExceptionOr<void> replace_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units, String const&);
Locale::Segmenter& segmenter();
protected:
CharacterData(Document&, NodeType, String const&);
@ -47,6 +50,8 @@ protected:
private:
String m_data;
OwnPtr<Locale::Segmenter> m_segmenter;
};
}

View file

@ -6,7 +6,7 @@
*/
#include <AK/Utf8View.h>
#include <LibUnicode/Segmentation.h>
#include <LibLocale/Segmenter.h>
#include <LibWeb/DOM/Node.h>
#include <LibWeb/DOM/Position.h>
#include <LibWeb/DOM/Text.h>
@ -40,9 +40,8 @@ bool Position::increment_offset()
return false;
auto& node = verify_cast<DOM::Text>(*m_node);
auto text = Utf8View(node.data());
if (auto offset = Unicode::next_grapheme_segmentation_boundary(text, m_offset); offset.has_value()) {
if (auto offset = node.segmenter().next_boundary(m_offset); offset.has_value()) {
m_offset = *offset;
return true;
}
@ -57,9 +56,8 @@ bool Position::decrement_offset()
return false;
auto& node = verify_cast<DOM::Text>(*m_node);
auto text = Utf8View(node.data());
if (auto offset = Unicode::previous_grapheme_segmentation_boundary(text, m_offset); offset.has_value()) {
if (auto offset = node.segmenter().previous_boundary(m_offset); offset.has_value()) {
m_offset = *offset;
return true;
}

View file

@ -10,7 +10,7 @@
#include <LibGfx/Painter.h>
#include <LibGfx/Quad.h>
#include <LibGfx/Rect.h>
#include <LibUnicode/Segmentation.h>
#include <LibLocale/Segmenter.h>
#include <LibWeb/Bindings/CanvasRenderingContext2DPrototype.h>
#include <LibWeb/Bindings/Intrinsics.h>
#include <LibWeb/HTML/CanvasRenderingContext2D.h>
@ -468,7 +468,7 @@ CanvasRenderingContext2D::PreparedText CanvasRenderingContext2D::prepare_text(By
for (auto c : text) {
builder.append(Infra::is_ascii_whitespace(c) ? ' ' : c);
}
auto replaced_text = builder.string_view();
auto replaced_text = MUST(builder.to_string());
// 3. Let font be the current font of target, as given by that object's font attribute.
auto font = current_font();
@ -497,8 +497,6 @@ CanvasRenderingContext2D::PreparedText CanvasRenderingContext2D::prepare_text(By
size_t width = font->width(text.view());
size_t height = font->pixel_size();
Utf8View replaced_text_view { replaced_text };
// 6. If maxWidth was provided and the hypothetical width of the inline box in the hypothetical line box is greater than maxWidth CSS pixels, then change font to have a more condensed font (if one is available or if a reasonably readable one can be synthesized by applying a horizontal scale factor to the font) or a smaller font, and return to the previous step.
// FIXME: Record the font size used for this piece of text, and actually retry with a smaller size if needed.
@ -520,17 +518,19 @@ CanvasRenderingContext2D::PreparedText CanvasRenderingContext2D::prepare_text(By
// 8. Let result be an array constructed by iterating over each glyph in the inline box from left to right (if any), adding to the array, for each glyph, the shape of the glyph as it is in the inline box, positioned on a coordinate space using CSS pixels with its origin is at the anchor point.
PreparedText prepared_text { {}, physical_alignment, { 0, 0, static_cast<int>(width), static_cast<int>(height) } };
prepared_text.glyphs.ensure_capacity(replaced_text.length());
prepared_text.glyphs.ensure_capacity(replaced_text.bytes_as_string_view().length());
size_t previous_grapheme_boundary = 0;
Unicode::for_each_grapheme_segmentation_boundary(replaced_text_view, [&](auto boundary) {
auto segmenter = Locale::Segmenter::create(Locale::SegmenterGranularity::Grapheme);
size_t previous_boundary = 0;
segmenter->for_each_boundary(replaced_text, [&](auto boundary) {
if (boundary == 0)
return IterationDecision::Continue;
auto glyph_view = replaced_text_view.substring_view(previous_grapheme_boundary, boundary - previous_grapheme_boundary);
auto glyph = String::from_utf8(glyph_view.as_string()).release_value_but_fixme_should_propagate_errors();
auto glyph = MUST(replaced_text.substring_from_byte_offset(previous_boundary, boundary - previous_boundary));
prepared_text.glyphs.append({ move(glyph), { static_cast<int>(boundary), 0 } });
previous_boundary = boundary;
return IterationDecision::Continue;
});

View file

@ -6,7 +6,7 @@
#include <AK/StringBuilder.h>
#include <AK/Utf8View.h>
#include <LibUnicode/Segmentation.h>
#include <LibLocale/Segmenter.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOM/Position.h>
#include <LibWeb/DOM/Range.h>
@ -22,15 +22,15 @@ void EditEventHandler::handle_delete_character_after(JS::NonnullGCPtr<DOM::Posit
auto& node = verify_cast<DOM::Text>(*cursor_position->node());
auto& text = node.data();
auto next_grapheme_offset = Unicode::next_grapheme_segmentation_boundary(Utf8View { text }, cursor_position->offset());
if (!next_grapheme_offset.has_value()) {
auto next_offset = node.segmenter().next_boundary(cursor_position->offset());
if (!next_offset.has_value()) {
// FIXME: Move to the next node and delete the first character there.
return;
}
StringBuilder builder;
builder.append(text.bytes_as_string_view().substring_view(0, cursor_position->offset()));
builder.append(text.bytes_as_string_view().substring_view(*next_grapheme_offset));
builder.append(text.bytes_as_string_view().substring_view(*next_offset));
node.set_data(MUST(builder.to_string()));
m_navigable->did_edit({});