LibWeb: Port text segmentation to the ICU text segmenter
This commit is contained in:
parent
14071c52f9
commit
12f177e9e9
Notes:
sideshowbarker
2024-07-17 03:27:40 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/12f177e9e9 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/218
5 changed files with 40 additions and 20 deletions
|
@ -4,6 +4,7 @@
|
|||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibLocale/Segmenter.h>
|
||||
#include <LibWeb/Bindings/CharacterDataPrototype.h>
|
||||
#include <LibWeb/DOM/CharacterData.h>
|
||||
#include <LibWeb/DOM/Document.h>
|
||||
|
@ -22,6 +23,8 @@ CharacterData::CharacterData(Document& document, NodeType type, String const& da
|
|||
{
|
||||
}
|
||||
|
||||
CharacterData::~CharacterData() = default;
|
||||
|
||||
void CharacterData::initialize(JS::Realm& realm)
|
||||
{
|
||||
Base::initialize(realm);
|
||||
|
@ -124,6 +127,10 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
|
|||
static_cast<Layout::TextNode&>(*layout_node).invalidate_text_for_rendering();
|
||||
|
||||
document().set_needs_layout();
|
||||
|
||||
if (m_segmenter)
|
||||
m_segmenter->set_segmented_text(m_data);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@ -148,4 +155,14 @@ WebIDL::ExceptionOr<void> CharacterData::delete_data(size_t offset, size_t count
|
|||
return replace_data(offset, count, String {});
|
||||
}
|
||||
|
||||
Locale::Segmenter& CharacterData::segmenter()
|
||||
{
|
||||
if (!m_segmenter) {
|
||||
m_segmenter = Locale::Segmenter::create(Locale::SegmenterGranularity::Grapheme);
|
||||
m_segmenter->set_segmented_text(m_data);
|
||||
}
|
||||
|
||||
return *m_segmenter;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <AK/String.h>
|
||||
#include <LibLocale/Forward.h>
|
||||
#include <LibWeb/DOM/ChildNode.h>
|
||||
#include <LibWeb/DOM/Node.h>
|
||||
#include <LibWeb/DOM/NonDocumentTypeChildNode.h>
|
||||
|
@ -22,7 +23,7 @@ class CharacterData
|
|||
JS_DECLARE_ALLOCATOR(CharacterData);
|
||||
|
||||
public:
|
||||
virtual ~CharacterData() override = default;
|
||||
virtual ~CharacterData() override;
|
||||
|
||||
String const& data() const { return m_data; }
|
||||
void set_data(String const&);
|
||||
|
@ -40,6 +41,8 @@ public:
|
|||
WebIDL::ExceptionOr<void> delete_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units);
|
||||
WebIDL::ExceptionOr<void> replace_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units, String const&);
|
||||
|
||||
Locale::Segmenter& segmenter();
|
||||
|
||||
protected:
|
||||
CharacterData(Document&, NodeType, String const&);
|
||||
|
||||
|
@ -47,6 +50,8 @@ protected:
|
|||
|
||||
private:
|
||||
String m_data;
|
||||
|
||||
OwnPtr<Locale::Segmenter> m_segmenter;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*/
|
||||
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibUnicode/Segmentation.h>
|
||||
#include <LibLocale/Segmenter.h>
|
||||
#include <LibWeb/DOM/Node.h>
|
||||
#include <LibWeb/DOM/Position.h>
|
||||
#include <LibWeb/DOM/Text.h>
|
||||
|
@ -40,9 +40,8 @@ bool Position::increment_offset()
|
|||
return false;
|
||||
|
||||
auto& node = verify_cast<DOM::Text>(*m_node);
|
||||
auto text = Utf8View(node.data());
|
||||
|
||||
if (auto offset = Unicode::next_grapheme_segmentation_boundary(text, m_offset); offset.has_value()) {
|
||||
if (auto offset = node.segmenter().next_boundary(m_offset); offset.has_value()) {
|
||||
m_offset = *offset;
|
||||
return true;
|
||||
}
|
||||
|
@ -57,9 +56,8 @@ bool Position::decrement_offset()
|
|||
return false;
|
||||
|
||||
auto& node = verify_cast<DOM::Text>(*m_node);
|
||||
auto text = Utf8View(node.data());
|
||||
|
||||
if (auto offset = Unicode::previous_grapheme_segmentation_boundary(text, m_offset); offset.has_value()) {
|
||||
if (auto offset = node.segmenter().previous_boundary(m_offset); offset.has_value()) {
|
||||
m_offset = *offset;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include <LibGfx/Painter.h>
|
||||
#include <LibGfx/Quad.h>
|
||||
#include <LibGfx/Rect.h>
|
||||
#include <LibUnicode/Segmentation.h>
|
||||
#include <LibLocale/Segmenter.h>
|
||||
#include <LibWeb/Bindings/CanvasRenderingContext2DPrototype.h>
|
||||
#include <LibWeb/Bindings/Intrinsics.h>
|
||||
#include <LibWeb/HTML/CanvasRenderingContext2D.h>
|
||||
|
@ -468,7 +468,7 @@ CanvasRenderingContext2D::PreparedText CanvasRenderingContext2D::prepare_text(By
|
|||
for (auto c : text) {
|
||||
builder.append(Infra::is_ascii_whitespace(c) ? ' ' : c);
|
||||
}
|
||||
auto replaced_text = builder.string_view();
|
||||
auto replaced_text = MUST(builder.to_string());
|
||||
|
||||
// 3. Let font be the current font of target, as given by that object's font attribute.
|
||||
auto font = current_font();
|
||||
|
@ -497,8 +497,6 @@ CanvasRenderingContext2D::PreparedText CanvasRenderingContext2D::prepare_text(By
|
|||
size_t width = font->width(text.view());
|
||||
size_t height = font->pixel_size();
|
||||
|
||||
Utf8View replaced_text_view { replaced_text };
|
||||
|
||||
// 6. If maxWidth was provided and the hypothetical width of the inline box in the hypothetical line box is greater than maxWidth CSS pixels, then change font to have a more condensed font (if one is available or if a reasonably readable one can be synthesized by applying a horizontal scale factor to the font) or a smaller font, and return to the previous step.
|
||||
// FIXME: Record the font size used for this piece of text, and actually retry with a smaller size if needed.
|
||||
|
||||
|
@ -520,17 +518,19 @@ CanvasRenderingContext2D::PreparedText CanvasRenderingContext2D::prepare_text(By
|
|||
|
||||
// 8. Let result be an array constructed by iterating over each glyph in the inline box from left to right (if any), adding to the array, for each glyph, the shape of the glyph as it is in the inline box, positioned on a coordinate space using CSS pixels with its origin is at the anchor point.
|
||||
PreparedText prepared_text { {}, physical_alignment, { 0, 0, static_cast<int>(width), static_cast<int>(height) } };
|
||||
prepared_text.glyphs.ensure_capacity(replaced_text.length());
|
||||
prepared_text.glyphs.ensure_capacity(replaced_text.bytes_as_string_view().length());
|
||||
|
||||
size_t previous_grapheme_boundary = 0;
|
||||
Unicode::for_each_grapheme_segmentation_boundary(replaced_text_view, [&](auto boundary) {
|
||||
auto segmenter = Locale::Segmenter::create(Locale::SegmenterGranularity::Grapheme);
|
||||
|
||||
size_t previous_boundary = 0;
|
||||
segmenter->for_each_boundary(replaced_text, [&](auto boundary) {
|
||||
if (boundary == 0)
|
||||
return IterationDecision::Continue;
|
||||
|
||||
auto glyph_view = replaced_text_view.substring_view(previous_grapheme_boundary, boundary - previous_grapheme_boundary);
|
||||
auto glyph = String::from_utf8(glyph_view.as_string()).release_value_but_fixme_should_propagate_errors();
|
||||
|
||||
auto glyph = MUST(replaced_text.substring_from_byte_offset(previous_boundary, boundary - previous_boundary));
|
||||
prepared_text.glyphs.append({ move(glyph), { static_cast<int>(boundary), 0 } });
|
||||
|
||||
previous_boundary = boundary;
|
||||
return IterationDecision::Continue;
|
||||
});
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibUnicode/Segmentation.h>
|
||||
#include <LibLocale/Segmenter.h>
|
||||
#include <LibWeb/DOM/Document.h>
|
||||
#include <LibWeb/DOM/Position.h>
|
||||
#include <LibWeb/DOM/Range.h>
|
||||
|
@ -22,15 +22,15 @@ void EditEventHandler::handle_delete_character_after(JS::NonnullGCPtr<DOM::Posit
|
|||
auto& node = verify_cast<DOM::Text>(*cursor_position->node());
|
||||
auto& text = node.data();
|
||||
|
||||
auto next_grapheme_offset = Unicode::next_grapheme_segmentation_boundary(Utf8View { text }, cursor_position->offset());
|
||||
if (!next_grapheme_offset.has_value()) {
|
||||
auto next_offset = node.segmenter().next_boundary(cursor_position->offset());
|
||||
if (!next_offset.has_value()) {
|
||||
// FIXME: Move to the next node and delete the first character there.
|
||||
return;
|
||||
}
|
||||
|
||||
StringBuilder builder;
|
||||
builder.append(text.bytes_as_string_view().substring_view(0, cursor_position->offset()));
|
||||
builder.append(text.bytes_as_string_view().substring_view(*next_grapheme_offset));
|
||||
builder.append(text.bytes_as_string_view().substring_view(*next_offset));
|
||||
node.set_data(MUST(builder.to_string()));
|
||||
|
||||
m_navigable->did_edit({});
|
||||
|
|
Loading…
Add table
Reference in a new issue