TextNode.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <andreas@ladybird.org>
  3. * Copyright (c) 2022, Tobias Christiansen <tobyase@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/CharacterTypes.h>
  8. #include <AK/StringBuilder.h>
  9. #include <LibUnicode/CharacterTypes.h>
  10. #include <LibUnicode/Locale.h>
  11. #include <LibWeb/DOM/Document.h>
  12. #include <LibWeb/Layout/BlockContainer.h>
  13. #include <LibWeb/Layout/InlineFormattingContext.h>
  14. #include <LibWeb/Layout/TextNode.h>
  15. #include <LibWeb/Painting/TextPaintable.h>
  16. namespace Web::Layout {
  17. GC_DEFINE_ALLOCATOR(TextNode);
  18. TextNode::TextNode(DOM::Document& document, DOM::Text& text)
  19. : Node(document, &text)
  20. {
  21. }
  22. TextNode::~TextNode() = default;
  23. static bool is_all_whitespace(StringView string)
  24. {
  25. for (size_t i = 0; i < string.length(); ++i) {
  26. if (!is_ascii_space(string[i]))
  27. return false;
  28. }
  29. return true;
  30. }
  31. // https://w3c.github.io/mathml-core/#new-text-transform-values
  32. static String apply_math_auto_text_transform(String const& string)
  33. {
  34. // https://w3c.github.io/mathml-core/#italic-mappings
  35. auto map_code_point_to_italic = [](u32 code_point) -> u32 {
  36. switch (code_point) {
  37. case 0x0041:
  38. return 0x1D434;
  39. case 0x0042:
  40. return 0x1D435;
  41. case 0x0043:
  42. return 0x1D436;
  43. case 0x0044:
  44. return 0x1D437;
  45. case 0x0045:
  46. return 0x1D438;
  47. case 0x0046:
  48. return 0x1D439;
  49. case 0x0047:
  50. return 0x1D43A;
  51. case 0x0048:
  52. return 0x1D43B;
  53. case 0x0049:
  54. return 0x1D43C;
  55. case 0x004A:
  56. return 0x1D43D;
  57. case 0x004B:
  58. return 0x1D43E;
  59. case 0x004C:
  60. return 0x1D43F;
  61. case 0x004D:
  62. return 0x1D440;
  63. case 0x004E:
  64. return 0x1D441;
  65. case 0x004F:
  66. return 0x1D442;
  67. case 0x0050:
  68. return 0x1D443;
  69. case 0x0051:
  70. return 0x1D444;
  71. case 0x0052:
  72. return 0x1D445;
  73. case 0x0053:
  74. return 0x1D446;
  75. case 0x0054:
  76. return 0x1D447;
  77. case 0x0055:
  78. return 0x1D448;
  79. case 0x0056:
  80. return 0x1D449;
  81. case 0x0057:
  82. return 0x1D44A;
  83. case 0x0058:
  84. return 0x1D44B;
  85. case 0x0059:
  86. return 0x1D44C;
  87. case 0x005A:
  88. return 0x1D44D;
  89. case 0x0061:
  90. return 0x1D44E;
  91. case 0x0062:
  92. return 0x1D44F;
  93. case 0x0063:
  94. return 0x1D450;
  95. case 0x0064:
  96. return 0x1D451;
  97. case 0x0065:
  98. return 0x1D452;
  99. case 0x0066:
  100. return 0x1D453;
  101. case 0x0067:
  102. return 0x1D454;
  103. case 0x0068:
  104. return 0x0210E;
  105. case 0x0069:
  106. return 0x1D456;
  107. case 0x006A:
  108. return 0x1D457;
  109. case 0x006B:
  110. return 0x1D458;
  111. case 0x006C:
  112. return 0x1D459;
  113. case 0x006D:
  114. return 0x1D45A;
  115. case 0x006E:
  116. return 0x1D45B;
  117. case 0x006F:
  118. return 0x1D45C;
  119. case 0x0070:
  120. return 0x1D45D;
  121. case 0x0071:
  122. return 0x1D45E;
  123. case 0x0072:
  124. return 0x1D45F;
  125. case 0x0073:
  126. return 0x1D460;
  127. case 0x0074:
  128. return 0x1D461;
  129. case 0x0075:
  130. return 0x1D462;
  131. case 0x0076:
  132. return 0x1D463;
  133. case 0x0077:
  134. return 0x1D464;
  135. case 0x0078:
  136. return 0x1D465;
  137. case 0x0079:
  138. return 0x1D466;
  139. case 0x007A:
  140. return 0x1D467;
  141. case 0x0131:
  142. return 0x1D6A4;
  143. case 0x0237:
  144. return 0x1D6A5;
  145. case 0x0391:
  146. return 0x1D6E2;
  147. case 0x0392:
  148. return 0x1D6E3;
  149. case 0x0393:
  150. return 0x1D6E4;
  151. case 0x0394:
  152. return 0x1D6E5;
  153. case 0x0395:
  154. return 0x1D6E6;
  155. case 0x0396:
  156. return 0x1D6E7;
  157. case 0x0397:
  158. return 0x1D6E8;
  159. case 0x0398:
  160. return 0x1D6E9;
  161. case 0x0399:
  162. return 0x1D6EA;
  163. case 0x039A:
  164. return 0x1D6EB;
  165. case 0x039B:
  166. return 0x1D6EC;
  167. case 0x039C:
  168. return 0x1D6ED;
  169. case 0x039D:
  170. return 0x1D6EE;
  171. case 0x039E:
  172. return 0x1D6EF;
  173. case 0x039F:
  174. return 0x1D6F0;
  175. case 0x03A0:
  176. return 0x1D6F1;
  177. case 0x03A1:
  178. return 0x1D6F2;
  179. case 0x03F4:
  180. return 0x1D6F3;
  181. case 0x03A3:
  182. return 0x1D6F4;
  183. case 0x03A4:
  184. return 0x1D6F5;
  185. case 0x03A5:
  186. return 0x1D6F6;
  187. case 0x03A6:
  188. return 0x1D6F7;
  189. case 0x03A7:
  190. return 0x1D6F8;
  191. case 0x03A8:
  192. return 0x1D6F9;
  193. case 0x03A9:
  194. return 0x1D6FA;
  195. case 0x2207:
  196. return 0x1D6FB;
  197. case 0x03B1:
  198. return 0x1D6FC;
  199. case 0x03B2:
  200. return 0x1D6FD;
  201. case 0x03B3:
  202. return 0x1D6FE;
  203. case 0x03B4:
  204. return 0x1D6FF;
  205. case 0x03B5:
  206. return 0x1D700;
  207. case 0x03B6:
  208. return 0x1D701;
  209. case 0x03B7:
  210. return 0x1D702;
  211. case 0x03B8:
  212. return 0x1D703;
  213. case 0x03B9:
  214. return 0x1D704;
  215. case 0x03BA:
  216. return 0x1D705;
  217. case 0x03BB:
  218. return 0x1D706;
  219. case 0x03BC:
  220. return 0x1D707;
  221. case 0x03BD:
  222. return 0x1D708;
  223. case 0x03BE:
  224. return 0x1D709;
  225. case 0x03BF:
  226. return 0x1D70A;
  227. case 0x03C0:
  228. return 0x1D70B;
  229. case 0x03C1:
  230. return 0x1D70C;
  231. case 0x03C2:
  232. return 0x1D70D;
  233. case 0x03C3:
  234. return 0x1D70E;
  235. case 0x03C4:
  236. return 0x1D70F;
  237. case 0x03C5:
  238. return 0x1D710;
  239. case 0x03C6:
  240. return 0x1D711;
  241. case 0x03C7:
  242. return 0x1D712;
  243. case 0x03C8:
  244. return 0x1D713;
  245. case 0x03C9:
  246. return 0x1D714;
  247. case 0x2202:
  248. return 0x1D715;
  249. case 0x03F5:
  250. return 0x1D716;
  251. case 0x03D1:
  252. return 0x1D717;
  253. case 0x03F0:
  254. return 0x1D718;
  255. case 0x03D5:
  256. return 0x1D719;
  257. case 0x03F1:
  258. return 0x1D71A;
  259. case 0x03D6:
  260. return 0x1D71B;
  261. default:
  262. return code_point;
  263. }
  264. };
  265. StringBuilder builder(string.bytes().size());
  266. for (auto code_point : string.code_points())
  267. builder.append_code_point(map_code_point_to_italic(code_point));
  268. return MUST(builder.to_string());
  269. }
  270. static ErrorOr<String> apply_text_transform(String const& string, CSS::TextTransform text_transform, Optional<StringView> const& locale)
  271. {
  272. switch (text_transform) {
  273. case CSS::TextTransform::Uppercase:
  274. return string.to_uppercase(locale);
  275. case CSS::TextTransform::Lowercase:
  276. return string.to_lowercase(locale);
  277. case CSS::TextTransform::None:
  278. return string;
  279. case CSS::TextTransform::MathAuto:
  280. return apply_math_auto_text_transform(string);
  281. case CSS::TextTransform::Capitalize: {
  282. return string.to_titlecase(locale, TrailingCodePointTransformation::PreserveExisting);
  283. }
  284. case CSS::TextTransform::FullSizeKana: {
  285. // FIXME: Implement this!
  286. return string;
  287. }
  288. case CSS::TextTransform::FullWidth: {
  289. return string.to_fullwidth();
  290. }
  291. }
  292. VERIFY_NOT_REACHED();
  293. }
  294. void TextNode::invalidate_text_for_rendering()
  295. {
  296. m_text_for_rendering = {};
  297. m_grapheme_segmenter.clear();
  298. }
  299. String const& TextNode::text_for_rendering() const
  300. {
  301. if (!m_text_for_rendering.has_value())
  302. const_cast<TextNode*>(this)->compute_text_for_rendering();
  303. return *m_text_for_rendering;
  304. }
  305. // NOTE: This collapses whitespace into a single ASCII space if the CSS white-space property tells us to.
  306. void TextNode::compute_text_for_rendering()
  307. {
  308. if (dom_node().is_password_input()) {
  309. m_text_for_rendering = MUST(String::repeated('*', dom_node().data().code_points().length()));
  310. return;
  311. }
  312. bool collapse = [](CSS::WhiteSpace white_space) {
  313. switch (white_space) {
  314. case CSS::WhiteSpace::Normal:
  315. case CSS::WhiteSpace::Nowrap:
  316. case CSS::WhiteSpace::PreLine:
  317. return true;
  318. case CSS::WhiteSpace::Pre:
  319. case CSS::WhiteSpace::PreWrap:
  320. return false;
  321. }
  322. VERIFY_NOT_REACHED();
  323. }(computed_values().white_space());
  324. if (dom_node().is_editable() && !dom_node().is_uninteresting_whitespace_node())
  325. collapse = false;
  326. auto const* parent_element = dom_node().parent_element();
  327. auto const maybe_lang = parent_element ? parent_element->lang() : Optional<String> {};
  328. auto const lang = maybe_lang.has_value() ? maybe_lang.value() : Optional<StringView> {};
  329. auto data = apply_text_transform(dom_node().data(), computed_values().text_transform(), lang).release_value_but_fixme_should_propagate_errors();
  330. auto data_view = data.bytes_as_string_view();
  331. if (!collapse || data.is_empty()) {
  332. m_text_for_rendering = data;
  333. return;
  334. }
  335. // NOTE: A couple fast returns to avoid unnecessarily allocating a StringBuilder.
  336. if (data_view.length() == 1) {
  337. if (is_ascii_space(data_view[0])) {
  338. static String s_single_space_string = " "_string;
  339. m_text_for_rendering = s_single_space_string;
  340. } else {
  341. m_text_for_rendering = data;
  342. }
  343. return;
  344. }
  345. bool contains_space = false;
  346. for (auto c : data_view) {
  347. if (is_ascii_space(c)) {
  348. contains_space = true;
  349. break;
  350. }
  351. }
  352. if (!contains_space) {
  353. m_text_for_rendering = data;
  354. return;
  355. }
  356. StringBuilder builder(data_view.length());
  357. size_t index = 0;
  358. auto skip_over_whitespace = [&index, &data_view] {
  359. while (index < data_view.length() && is_ascii_space(data_view[index]))
  360. ++index;
  361. };
  362. while (index < data_view.length()) {
  363. if (is_ascii_space(data_view[index])) {
  364. builder.append(' ');
  365. ++index;
  366. skip_over_whitespace();
  367. } else {
  368. builder.append(data_view[index]);
  369. ++index;
  370. }
  371. }
  372. m_text_for_rendering = MUST(builder.to_string());
  373. }
  374. Unicode::Segmenter& TextNode::grapheme_segmenter() const
  375. {
  376. if (!m_grapheme_segmenter) {
  377. m_grapheme_segmenter = document().grapheme_segmenter().clone();
  378. m_grapheme_segmenter->set_segmented_text(text_for_rendering());
  379. }
  380. return *m_grapheme_segmenter;
  381. }
  382. TextNode::ChunkIterator::ChunkIterator(TextNode const& text_node, bool wrap_lines, bool respect_linebreaks)
  383. : m_wrap_lines(wrap_lines)
  384. , m_respect_linebreaks(respect_linebreaks)
  385. , m_utf8_view(text_node.text_for_rendering())
  386. , m_font_cascade_list(text_node.computed_values().font_list())
  387. , m_grapheme_segmenter(text_node.grapheme_segmenter())
  388. {
  389. }
  390. static Gfx::GlyphRun::TextType text_type_for_code_point(u32 code_point)
  391. {
  392. switch (Unicode::bidirectional_class(code_point)) {
  393. case Unicode::BidiClass::WhiteSpaceNeutral:
  394. case Unicode::BidiClass::BlockSeparator:
  395. case Unicode::BidiClass::SegmentSeparator:
  396. case Unicode::BidiClass::CommonNumberSeparator:
  397. case Unicode::BidiClass::DirNonSpacingMark:
  398. case Unicode::BidiClass::ArabicNumber:
  399. case Unicode::BidiClass::EuropeanNumber:
  400. case Unicode::BidiClass::EuropeanNumberSeparator:
  401. case Unicode::BidiClass::EuropeanNumberTerminator:
  402. return Gfx::GlyphRun::TextType::ContextDependent;
  403. case Unicode::BidiClass::BoundaryNeutral:
  404. case Unicode::BidiClass::OtherNeutral:
  405. case Unicode::BidiClass::FirstStrongIsolate:
  406. case Unicode::BidiClass::PopDirectionalFormat:
  407. case Unicode::BidiClass::PopDirectionalIsolate:
  408. return Gfx::GlyphRun::TextType::Common;
  409. case Unicode::BidiClass::LeftToRight:
  410. case Unicode::BidiClass::LeftToRightEmbedding:
  411. case Unicode::BidiClass::LeftToRightIsolate:
  412. case Unicode::BidiClass::LeftToRightOverride:
  413. return Gfx::GlyphRun::TextType::Ltr;
  414. case Unicode::BidiClass::RightToLeft:
  415. case Unicode::BidiClass::RightToLeftArabic:
  416. case Unicode::BidiClass::RightToLeftEmbedding:
  417. case Unicode::BidiClass::RightToLeftIsolate:
  418. case Unicode::BidiClass::RightToLeftOverride:
  419. return Gfx::GlyphRun::TextType::Rtl;
  420. default:
  421. VERIFY_NOT_REACHED();
  422. }
  423. }
  424. Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
  425. {
  426. if (!m_peek_queue.is_empty())
  427. return m_peek_queue.take_first();
  428. return next_without_peek();
  429. }
  430. Optional<TextNode::Chunk> TextNode::ChunkIterator::peek(size_t count)
  431. {
  432. while (m_peek_queue.size() <= count) {
  433. auto next = next_without_peek();
  434. if (!next.has_value())
  435. return {};
  436. m_peek_queue.append(*next);
  437. }
  438. return m_peek_queue[count];
  439. }
  440. Optional<TextNode::Chunk> TextNode::ChunkIterator::next_without_peek()
  441. {
  442. if (m_current_index >= m_utf8_view.byte_length())
  443. return {};
  444. auto current_code_point = [this]() {
  445. return *m_utf8_view.iterator_at_byte_offset_without_validation(m_current_index);
  446. };
  447. auto next_grapheme_boundary = [this]() {
  448. return m_grapheme_segmenter.next_boundary(m_current_index).value_or(m_utf8_view.byte_length());
  449. };
  450. auto code_point = current_code_point();
  451. auto start_of_chunk = m_current_index;
  452. Gfx::Font const& font = m_font_cascade_list.font_for_code_point(code_point);
  453. auto text_type = text_type_for_code_point(code_point);
  454. auto broken_on_tab = false;
  455. while (m_current_index < m_utf8_view.byte_length()) {
  456. code_point = current_code_point();
  457. if (code_point == '\t') {
  458. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value())
  459. return result.release_value();
  460. broken_on_tab = true;
  461. // consume any consecutive tabs
  462. while (m_current_index < m_utf8_view.byte_length() && current_code_point() == '\t') {
  463. m_current_index = next_grapheme_boundary();
  464. }
  465. }
  466. if (&font != &m_font_cascade_list.font_for_code_point(code_point)) {
  467. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value())
  468. return result.release_value();
  469. }
  470. if (m_respect_linebreaks && code_point == '\n') {
  471. // Newline encountered, and we're supposed to preserve them.
  472. // If we have accumulated some code points in the current chunk, commit them now and continue with the newline next time.
  473. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value())
  474. return result.release_value();
  475. // Otherwise, commit the newline!
  476. m_current_index = next_grapheme_boundary();
  477. auto result = try_commit_chunk(start_of_chunk, m_current_index, true, broken_on_tab, font, text_type);
  478. VERIFY(result.has_value());
  479. return result.release_value();
  480. }
  481. if (m_wrap_lines) {
  482. if (text_type != text_type_for_code_point(code_point)) {
  483. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) {
  484. return result.release_value();
  485. }
  486. }
  487. if (is_ascii_space(code_point)) {
  488. // Whitespace encountered, and we're allowed to break on whitespace.
  489. // If we have accumulated some code points in the current chunk, commit them now and continue with the whitespace next time.
  490. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) {
  491. return result.release_value();
  492. }
  493. // Otherwise, commit the whitespace!
  494. m_current_index = next_grapheme_boundary();
  495. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value())
  496. return result.release_value();
  497. continue;
  498. }
  499. }
  500. m_current_index
  501. = next_grapheme_boundary();
  502. }
  503. if (start_of_chunk != m_utf8_view.byte_length()) {
  504. // Try to output whatever's left at the end of the text node.
  505. if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.byte_length(), false, broken_on_tab, font, text_type); result.has_value())
  506. return result.release_value();
  507. }
  508. return {};
  509. }
  510. Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(size_t start, size_t end, bool has_breaking_newline, bool has_breaking_tab, Gfx::Font const& font, Gfx::GlyphRun::TextType text_type) const
  511. {
  512. if (auto byte_length = end - start; byte_length > 0) {
  513. auto chunk_view = m_utf8_view.substring_view(start, byte_length);
  514. return Chunk {
  515. .view = chunk_view,
  516. .font = font,
  517. .start = start,
  518. .length = byte_length,
  519. .has_breaking_newline = has_breaking_newline,
  520. .has_breaking_tab = has_breaking_tab,
  521. .is_all_whitespace = is_all_whitespace(chunk_view.as_string()),
  522. .text_type = text_type,
  523. };
  524. }
  525. return {};
  526. }
  527. GC::Ptr<Painting::Paintable> TextNode::create_paintable() const
  528. {
  529. return Painting::TextPaintable::create(*this, text_for_rendering());
  530. }
  531. }