TextNode.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <andreas@ladybird.org>
  3. * Copyright (c) 2022, Tobias Christiansen <tobyase@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/CharacterTypes.h>
  8. #include <AK/StringBuilder.h>
  9. #include <LibUnicode/CharacterTypes.h>
  10. #include <LibWeb/DOM/Document.h>
  11. #include <LibWeb/Layout/BlockContainer.h>
  12. #include <LibWeb/Layout/InlineFormattingContext.h>
  13. #include <LibWeb/Layout/TextNode.h>
  14. #include <LibWeb/Painting/TextPaintable.h>
  15. namespace Web::Layout {
  16. JS_DEFINE_ALLOCATOR(TextNode);
  17. TextNode::TextNode(DOM::Document& document, DOM::Text& text)
  18. : Node(document, &text)
  19. {
  20. }
  21. TextNode::~TextNode() = default;
  22. static bool is_all_whitespace(StringView string)
  23. {
  24. for (size_t i = 0; i < string.length(); ++i) {
  25. if (!is_ascii_space(string[i]))
  26. return false;
  27. }
  28. return true;
  29. }
  30. // https://w3c.github.io/mathml-core/#new-text-transform-values
  31. static String apply_math_auto_text_transform(String const& string)
  32. {
  33. // https://w3c.github.io/mathml-core/#italic-mappings
  34. auto map_code_point_to_italic = [](u32 code_point) -> u32 {
  35. switch (code_point) {
  36. case 0x0041:
  37. return 0x1D434;
  38. case 0x0042:
  39. return 0x1D435;
  40. case 0x0043:
  41. return 0x1D436;
  42. case 0x0044:
  43. return 0x1D437;
  44. case 0x0045:
  45. return 0x1D438;
  46. case 0x0046:
  47. return 0x1D439;
  48. case 0x0047:
  49. return 0x1D43A;
  50. case 0x0048:
  51. return 0x1D43B;
  52. case 0x0049:
  53. return 0x1D43C;
  54. case 0x004A:
  55. return 0x1D43D;
  56. case 0x004B:
  57. return 0x1D43E;
  58. case 0x004C:
  59. return 0x1D43F;
  60. case 0x004D:
  61. return 0x1D440;
  62. case 0x004E:
  63. return 0x1D441;
  64. case 0x004F:
  65. return 0x1D442;
  66. case 0x0050:
  67. return 0x1D443;
  68. case 0x0051:
  69. return 0x1D444;
  70. case 0x0052:
  71. return 0x1D445;
  72. case 0x0053:
  73. return 0x1D446;
  74. case 0x0054:
  75. return 0x1D447;
  76. case 0x0055:
  77. return 0x1D448;
  78. case 0x0056:
  79. return 0x1D449;
  80. case 0x0057:
  81. return 0x1D44A;
  82. case 0x0058:
  83. return 0x1D44B;
  84. case 0x0059:
  85. return 0x1D44C;
  86. case 0x005A:
  87. return 0x1D44D;
  88. case 0x0061:
  89. return 0x1D44E;
  90. case 0x0062:
  91. return 0x1D44F;
  92. case 0x0063:
  93. return 0x1D450;
  94. case 0x0064:
  95. return 0x1D451;
  96. case 0x0065:
  97. return 0x1D452;
  98. case 0x0066:
  99. return 0x1D453;
  100. case 0x0067:
  101. return 0x1D454;
  102. case 0x0068:
  103. return 0x0210E;
  104. case 0x0069:
  105. return 0x1D456;
  106. case 0x006A:
  107. return 0x1D457;
  108. case 0x006B:
  109. return 0x1D458;
  110. case 0x006C:
  111. return 0x1D459;
  112. case 0x006D:
  113. return 0x1D45A;
  114. case 0x006E:
  115. return 0x1D45B;
  116. case 0x006F:
  117. return 0x1D45C;
  118. case 0x0070:
  119. return 0x1D45D;
  120. case 0x0071:
  121. return 0x1D45E;
  122. case 0x0072:
  123. return 0x1D45F;
  124. case 0x0073:
  125. return 0x1D460;
  126. case 0x0074:
  127. return 0x1D461;
  128. case 0x0075:
  129. return 0x1D462;
  130. case 0x0076:
  131. return 0x1D463;
  132. case 0x0077:
  133. return 0x1D464;
  134. case 0x0078:
  135. return 0x1D465;
  136. case 0x0079:
  137. return 0x1D466;
  138. case 0x007A:
  139. return 0x1D467;
  140. case 0x0131:
  141. return 0x1D6A4;
  142. case 0x0237:
  143. return 0x1D6A5;
  144. case 0x0391:
  145. return 0x1D6E2;
  146. case 0x0392:
  147. return 0x1D6E3;
  148. case 0x0393:
  149. return 0x1D6E4;
  150. case 0x0394:
  151. return 0x1D6E5;
  152. case 0x0395:
  153. return 0x1D6E6;
  154. case 0x0396:
  155. return 0x1D6E7;
  156. case 0x0397:
  157. return 0x1D6E8;
  158. case 0x0398:
  159. return 0x1D6E9;
  160. case 0x0399:
  161. return 0x1D6EA;
  162. case 0x039A:
  163. return 0x1D6EB;
  164. case 0x039B:
  165. return 0x1D6EC;
  166. case 0x039C:
  167. return 0x1D6ED;
  168. case 0x039D:
  169. return 0x1D6EE;
  170. case 0x039E:
  171. return 0x1D6EF;
  172. case 0x039F:
  173. return 0x1D6F0;
  174. case 0x03A0:
  175. return 0x1D6F1;
  176. case 0x03A1:
  177. return 0x1D6F2;
  178. case 0x03F4:
  179. return 0x1D6F3;
  180. case 0x03A3:
  181. return 0x1D6F4;
  182. case 0x03A4:
  183. return 0x1D6F5;
  184. case 0x03A5:
  185. return 0x1D6F6;
  186. case 0x03A6:
  187. return 0x1D6F7;
  188. case 0x03A7:
  189. return 0x1D6F8;
  190. case 0x03A8:
  191. return 0x1D6F9;
  192. case 0x03A9:
  193. return 0x1D6FA;
  194. case 0x2207:
  195. return 0x1D6FB;
  196. case 0x03B1:
  197. return 0x1D6FC;
  198. case 0x03B2:
  199. return 0x1D6FD;
  200. case 0x03B3:
  201. return 0x1D6FE;
  202. case 0x03B4:
  203. return 0x1D6FF;
  204. case 0x03B5:
  205. return 0x1D700;
  206. case 0x03B6:
  207. return 0x1D701;
  208. case 0x03B7:
  209. return 0x1D702;
  210. case 0x03B8:
  211. return 0x1D703;
  212. case 0x03B9:
  213. return 0x1D704;
  214. case 0x03BA:
  215. return 0x1D705;
  216. case 0x03BB:
  217. return 0x1D706;
  218. case 0x03BC:
  219. return 0x1D707;
  220. case 0x03BD:
  221. return 0x1D708;
  222. case 0x03BE:
  223. return 0x1D709;
  224. case 0x03BF:
  225. return 0x1D70A;
  226. case 0x03C0:
  227. return 0x1D70B;
  228. case 0x03C1:
  229. return 0x1D70C;
  230. case 0x03C2:
  231. return 0x1D70D;
  232. case 0x03C3:
  233. return 0x1D70E;
  234. case 0x03C4:
  235. return 0x1D70F;
  236. case 0x03C5:
  237. return 0x1D710;
  238. case 0x03C6:
  239. return 0x1D711;
  240. case 0x03C7:
  241. return 0x1D712;
  242. case 0x03C8:
  243. return 0x1D713;
  244. case 0x03C9:
  245. return 0x1D714;
  246. case 0x2202:
  247. return 0x1D715;
  248. case 0x03F5:
  249. return 0x1D716;
  250. case 0x03D1:
  251. return 0x1D717;
  252. case 0x03F0:
  253. return 0x1D718;
  254. case 0x03D5:
  255. return 0x1D719;
  256. case 0x03F1:
  257. return 0x1D71A;
  258. case 0x03D6:
  259. return 0x1D71B;
  260. default:
  261. return code_point;
  262. }
  263. };
  264. StringBuilder builder(string.bytes().size());
  265. for (auto code_point : string.code_points())
  266. builder.append_code_point(map_code_point_to_italic(code_point));
  267. return MUST(builder.to_string());
  268. }
  269. static ErrorOr<String> apply_text_transform(String const& string, CSS::TextTransform text_transform)
  270. {
  271. switch (text_transform) {
  272. case CSS::TextTransform::Uppercase:
  273. return string.to_uppercase();
  274. case CSS::TextTransform::Lowercase:
  275. return string.to_lowercase();
  276. case CSS::TextTransform::None:
  277. return string;
  278. case CSS::TextTransform::MathAuto:
  279. return apply_math_auto_text_transform(string);
  280. case CSS::TextTransform::Capitalize: {
  281. return string.to_titlecase({}, TrailingCodePointTransformation::PreserveExisting);
  282. }
  283. case CSS::TextTransform::FullSizeKana:
  284. case CSS::TextTransform::FullWidth:
  285. // FIXME: Implement these!
  286. return string;
  287. }
  288. VERIFY_NOT_REACHED();
  289. }
  290. void TextNode::invalidate_text_for_rendering()
  291. {
  292. m_text_for_rendering = {};
  293. m_grapheme_segmenter.clear();
  294. }
  295. String const& TextNode::text_for_rendering() const
  296. {
  297. if (!m_text_for_rendering.has_value())
  298. const_cast<TextNode*>(this)->compute_text_for_rendering();
  299. return *m_text_for_rendering;
  300. }
  301. // NOTE: This collapses whitespace into a single ASCII space if the CSS white-space property tells us to.
  302. void TextNode::compute_text_for_rendering()
  303. {
  304. bool collapse = [](CSS::WhiteSpace white_space) {
  305. switch (white_space) {
  306. case CSS::WhiteSpace::Normal:
  307. case CSS::WhiteSpace::Nowrap:
  308. case CSS::WhiteSpace::PreLine:
  309. return true;
  310. case CSS::WhiteSpace::Pre:
  311. case CSS::WhiteSpace::PreWrap:
  312. return false;
  313. }
  314. VERIFY_NOT_REACHED();
  315. }(computed_values().white_space());
  316. if (dom_node().is_editable() && !dom_node().is_uninteresting_whitespace_node())
  317. collapse = false;
  318. auto data = apply_text_transform(dom_node().data(), computed_values().text_transform()).release_value_but_fixme_should_propagate_errors();
  319. auto data_view = data.bytes_as_string_view();
  320. if (dom_node().is_password_input()) {
  321. m_text_for_rendering = MUST(String::repeated('*', data_view.length()));
  322. return;
  323. }
  324. if (!collapse || data.is_empty()) {
  325. m_text_for_rendering = data;
  326. return;
  327. }
  328. // NOTE: A couple fast returns to avoid unnecessarily allocating a StringBuilder.
  329. if (data_view.length() == 1) {
  330. if (is_ascii_space(data_view[0])) {
  331. static String s_single_space_string = " "_string;
  332. m_text_for_rendering = s_single_space_string;
  333. } else {
  334. m_text_for_rendering = data;
  335. }
  336. return;
  337. }
  338. bool contains_space = false;
  339. for (auto c : data_view) {
  340. if (is_ascii_space(c)) {
  341. contains_space = true;
  342. break;
  343. }
  344. }
  345. if (!contains_space) {
  346. m_text_for_rendering = data;
  347. return;
  348. }
  349. StringBuilder builder(data_view.length());
  350. size_t index = 0;
  351. auto skip_over_whitespace = [&index, &data_view] {
  352. while (index < data_view.length() && is_ascii_space(data_view[index]))
  353. ++index;
  354. };
  355. while (index < data_view.length()) {
  356. if (is_ascii_space(data_view[index])) {
  357. builder.append(' ');
  358. ++index;
  359. skip_over_whitespace();
  360. } else {
  361. builder.append(data_view[index]);
  362. ++index;
  363. }
  364. }
  365. m_text_for_rendering = MUST(builder.to_string());
  366. }
  367. Unicode::Segmenter& TextNode::grapheme_segmenter() const
  368. {
  369. if (!m_grapheme_segmenter) {
  370. m_grapheme_segmenter = document().grapheme_segmenter().clone();
  371. m_grapheme_segmenter->set_segmented_text(text_for_rendering());
  372. }
  373. return *m_grapheme_segmenter;
  374. }
  375. TextNode::ChunkIterator::ChunkIterator(TextNode const& text_node, bool wrap_lines, bool respect_linebreaks)
  376. : m_wrap_lines(wrap_lines)
  377. , m_respect_linebreaks(respect_linebreaks)
  378. , m_utf8_view(text_node.text_for_rendering())
  379. , m_font_cascade_list(text_node.computed_values().font_list())
  380. , m_grapheme_segmenter(text_node.grapheme_segmenter())
  381. {
  382. }
  383. static Gfx::GlyphRun::TextType text_type_for_code_point(u32 code_point)
  384. {
  385. switch (Unicode::bidirectional_class(code_point)) {
  386. case Unicode::BidiClass::WhiteSpaceNeutral:
  387. case Unicode::BidiClass::BlockSeparator:
  388. case Unicode::BidiClass::SegmentSeparator:
  389. case Unicode::BidiClass::CommonNumberSeparator:
  390. case Unicode::BidiClass::DirNonSpacingMark:
  391. case Unicode::BidiClass::ArabicNumber:
  392. case Unicode::BidiClass::EuropeanNumber:
  393. case Unicode::BidiClass::EuropeanNumberSeparator:
  394. case Unicode::BidiClass::EuropeanNumberTerminator:
  395. return Gfx::GlyphRun::TextType::ContextDependent;
  396. case Unicode::BidiClass::BoundaryNeutral:
  397. case Unicode::BidiClass::OtherNeutral:
  398. case Unicode::BidiClass::FirstStrongIsolate:
  399. case Unicode::BidiClass::PopDirectionalFormat:
  400. case Unicode::BidiClass::PopDirectionalIsolate:
  401. return Gfx::GlyphRun::TextType::Common;
  402. case Unicode::BidiClass::LeftToRight:
  403. case Unicode::BidiClass::LeftToRightEmbedding:
  404. case Unicode::BidiClass::LeftToRightIsolate:
  405. case Unicode::BidiClass::LeftToRightOverride:
  406. return Gfx::GlyphRun::TextType::Ltr;
  407. case Unicode::BidiClass::RightToLeft:
  408. case Unicode::BidiClass::RightToLeftArabic:
  409. case Unicode::BidiClass::RightToLeftEmbedding:
  410. case Unicode::BidiClass::RightToLeftIsolate:
  411. case Unicode::BidiClass::RightToLeftOverride:
  412. return Gfx::GlyphRun::TextType::Rtl;
  413. default:
  414. VERIFY_NOT_REACHED();
  415. }
  416. }
  417. Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
  418. {
  419. if (!m_peek_queue.is_empty())
  420. return m_peek_queue.take_first();
  421. return next_without_peek();
  422. }
  423. Optional<TextNode::Chunk> TextNode::ChunkIterator::peek(size_t count)
  424. {
  425. while (m_peek_queue.size() <= count) {
  426. auto next = next_without_peek();
  427. if (!next.has_value())
  428. return {};
  429. m_peek_queue.append(*next);
  430. }
  431. return m_peek_queue[count];
  432. }
  433. Optional<TextNode::Chunk> TextNode::ChunkIterator::next_without_peek()
  434. {
  435. if (m_current_index >= m_utf8_view.byte_length())
  436. return {};
  437. auto current_code_point = [this]() {
  438. return *m_utf8_view.iterator_at_byte_offset_without_validation(m_current_index);
  439. };
  440. auto next_grapheme_boundary = [this]() {
  441. return m_grapheme_segmenter.next_boundary(m_current_index).value_or(m_utf8_view.byte_length());
  442. };
  443. auto code_point = current_code_point();
  444. auto start_of_chunk = m_current_index;
  445. Gfx::Font const& font = m_font_cascade_list.font_for_code_point(code_point);
  446. auto text_type = text_type_for_code_point(code_point);
  447. auto broken_on_tab = false;
  448. while (m_current_index < m_utf8_view.byte_length()) {
  449. code_point = current_code_point();
  450. if (code_point == '\t') {
  451. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value())
  452. return result.release_value();
  453. broken_on_tab = true;
  454. // consume any consecutive tabs
  455. while (m_current_index < m_utf8_view.byte_length() && current_code_point() == '\t') {
  456. m_current_index = next_grapheme_boundary();
  457. }
  458. }
  459. if (&font != &m_font_cascade_list.font_for_code_point(code_point)) {
  460. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value())
  461. return result.release_value();
  462. }
  463. if (m_respect_linebreaks && code_point == '\n') {
  464. // Newline encountered, and we're supposed to preserve them.
  465. // If we have accumulated some code points in the current chunk, commit them now and continue with the newline next time.
  466. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value())
  467. return result.release_value();
  468. // Otherwise, commit the newline!
  469. m_current_index = next_grapheme_boundary();
  470. auto result = try_commit_chunk(start_of_chunk, m_current_index, true, broken_on_tab, font, text_type);
  471. VERIFY(result.has_value());
  472. return result.release_value();
  473. }
  474. if (m_wrap_lines) {
  475. if (text_type != text_type_for_code_point(code_point)) {
  476. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) {
  477. return result.release_value();
  478. }
  479. }
  480. if (is_ascii_space(code_point)) {
  481. // Whitespace encountered, and we're allowed to break on whitespace.
  482. // If we have accumulated some code points in the current chunk, commit them now and continue with the whitespace next time.
  483. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value()) {
  484. return result.release_value();
  485. }
  486. // Otherwise, commit the whitespace!
  487. m_current_index = next_grapheme_boundary();
  488. if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, broken_on_tab, font, text_type); result.has_value())
  489. return result.release_value();
  490. continue;
  491. }
  492. }
  493. m_current_index
  494. = next_grapheme_boundary();
  495. }
  496. if (start_of_chunk != m_utf8_view.byte_length()) {
  497. // Try to output whatever's left at the end of the text node.
  498. if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.byte_length(), false, broken_on_tab, font, text_type); result.has_value())
  499. return result.release_value();
  500. }
  501. return {};
  502. }
  503. Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(size_t start, size_t end, bool has_breaking_newline, bool has_breaking_tab, Gfx::Font const& font, Gfx::GlyphRun::TextType text_type) const
  504. {
  505. if (auto byte_length = end - start; byte_length > 0) {
  506. auto chunk_view = m_utf8_view.substring_view(start, byte_length);
  507. return Chunk {
  508. .view = chunk_view,
  509. .font = font,
  510. .start = start,
  511. .length = byte_length,
  512. .has_breaking_newline = has_breaking_newline,
  513. .has_breaking_tab = has_breaking_tab,
  514. .is_all_whitespace = is_all_whitespace(chunk_view.as_string()),
  515. .text_type = text_type,
  516. };
  517. }
  518. return {};
  519. }
  520. JS::GCPtr<Painting::Paintable> TextNode::create_paintable() const
  521. {
  522. return Painting::TextPaintable::create(*this, text_for_rendering());
  523. }
  524. }