LibWeb: Support finding text split across multiple text nodes

Previously, the find in page function would fail to find text which was
split across multiple text nodes. For example, given the following
markup: `WH<span>F` the query `WHF` would previously fail to be
matched.

This is done by traversing all of the document's text nodes -
constructing a complete string to query against and keeping track of
the locations where that string is split across multiple nodes.
This commit is contained in:
Tim Ledbetter 2024-06-13 19:43:08 +01:00 committed by Andreas Kling
parent dbc94ce92e
commit ec4d29849d
Notes: sideshowbarker 2024-07-18 00:34:07 +09:00

View file

@ -5113,29 +5113,65 @@ Vector<JS::Handle<DOM::Range>> Document::find_matching_text(String const& query,
if (!document_element() || !document_element()->layout_node())
return {};
Vector<JS::Handle<DOM::Range>> matches;
struct TextPositionNode {
DOM::Text& dom_node;
size_t start_offset { 0 };
};
StringBuilder builder;
Vector<TextPositionNode> text_positions;
size_t current_start_position = 0;
String current_node_text;
document_element()->layout_node()->for_each_in_inclusive_subtree_of_type<Layout::TextNode>([&](auto const& text_node) {
auto const& text = text_node.text_for_rendering();
size_t offset = 0;
while (true) {
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
? text.find_byte_offset_ignoring_case(query, offset)
: text.find_byte_offset(query, offset);
if (!match_index.has_value())
break;
auto range = create_range();
auto& dom_node = const_cast<DOM::Text&>(text_node.dom_node());
(void)range->set_start(dom_node, match_index.value());
(void)range->set_end(dom_node, match_index.value() + query.code_points().length());
matches.append(range);
offset = match_index.value() + 1;
auto& dom_node = const_cast<DOM::Text&>(text_node.dom_node());
if (text_positions.is_empty()) {
text_positions.empend(dom_node);
} else {
current_start_position += current_node_text.bytes_as_string_view().length();
text_positions.empend(dom_node, current_start_position);
}
current_node_text = text_node.text_for_rendering();
builder.append(current_node_text);
return TraversalDecision::Continue;
});
if (text_positions.is_empty())
return {};
size_t offset = 0;
auto* match_start_position = &text_positions[0];
auto text = builder.to_string_without_validation();
Vector<JS::Handle<DOM::Range>> matches;
while (true) {
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
? text.find_byte_offset_ignoring_case(query, offset)
: text.find_byte_offset(query, offset);
if (!match_index.has_value())
break;
size_t i = 0;
for (; i < text_positions.size() && match_index.value() > text_positions[i].start_offset; ++i)
match_start_position = &text_positions[i];
auto range = create_range();
auto start_position = match_index.value() - match_start_position->start_offset;
auto& start_dom_node = match_start_position->dom_node;
(void)range->set_start(start_dom_node, start_position);
auto* match_end_position = match_start_position;
for (; i < text_positions.size() && match_index.value() + query.bytes_as_string_view().length() > text_positions[i].start_offset; ++i)
match_end_position = &text_positions[i];
auto& end_dom_node = match_end_position->dom_node;
auto end_position = match_index.value() - match_end_position->start_offset + query.bytes_as_string_view().length();
(void)range->set_end(end_dom_node, end_position);
matches.append(range);
offset = match_index.value() + query.bytes_as_string_view().length() + 1;
match_start_position = match_end_position;
}
return matches;
}