Browse Source

LibGfx: Add basic support for bidirectional text rendering

This adds a *very* simplified version of the UNICODE BIDIRECTIONAL
ALGORITHM (https://www.unicode.org/reports/tr9/), that can render most
bidirectional text but also produces awkward results in a large amount
of edge cases, and as such, this should probably be replaced with a
fully spec compliant implementation at some point.
Idan Horowitz 4 years ago
parent
commit
115b445dab

+ 1 - 0
Userland/Libraries/LibGfx/CMakeLists.txt

@@ -29,6 +29,7 @@ set(SOURCES
     Size.cpp
     Size.cpp
     StylePainter.cpp
     StylePainter.cpp
     SystemTheme.cpp
     SystemTheme.cpp
+    TextDirection.cpp
     Triangle.cpp
     Triangle.cpp
     Typeface.cpp
     Typeface.cpp
     WindowTheme.cpp
     WindowTheme.cpp

+ 193 - 4
Userland/Libraries/LibGfx/Painter.cpp

@@ -1,5 +1,6 @@
 /*
 /*
  * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
  *
  *
  * SPDX-License-Identifier: BSD-2-Clause
  * SPDX-License-Identifier: BSD-2-Clause
  */
  */
@@ -23,6 +24,7 @@
 #include <LibGfx/CharacterBitmap.h>
 #include <LibGfx/CharacterBitmap.h>
 #include <LibGfx/Palette.h>
 #include <LibGfx/Palette.h>
 #include <LibGfx/Path.h>
 #include <LibGfx/Path.h>
+#include <LibGfx/TextDirection.h>
 #include <math.h>
 #include <math.h>
 #include <stdio.h>
 #include <stdio.h>
 
 
@@ -1017,12 +1019,12 @@ struct ElidedText<Utf32View> {
 };
 };
 
 
 template<typename TextType, typename DrawGlyphFunction>
 template<typename TextType, typename DrawGlyphFunction>
-void draw_text_line(const IntRect& a_rect, const TextType& text, const Font& font, TextAlignment alignment, TextElision elision, DrawGlyphFunction draw_glyph)
+void draw_text_line(const IntRect& a_rect, const TextType& text, const Font& font, TextAlignment alignment, TextElision elision, TextDirection direction, DrawGlyphFunction draw_glyph)
 {
 {
     auto rect = a_rect;
     auto rect = a_rect;
     TextType final_text(text);
     TextType final_text(text);
     typename ElidedText<TextType>::Type elided_text;
     typename ElidedText<TextType>::Type elided_text;
-    if (elision == TextElision::Right) {
+    if (elision == TextElision::Right) { // FIXME: This needs to be specialized for bidirectional text
         int text_width = font.width(final_text);
         int text_width = font.width(final_text);
         if (font.width(final_text) > rect.width()) {
         if (font.width(final_text) > rect.width()) {
             int glyph_spacing = font.glyph_spacing();
             int glyph_spacing = font.glyph_spacing();
@@ -1074,14 +1076,22 @@ void draw_text_line(const IntRect& a_rect, const TextType& text, const Font& fon
     auto point = rect.location();
     auto point = rect.location();
     int space_width = font.glyph_width(' ') + font.glyph_spacing();
     int space_width = font.glyph_width(' ') + font.glyph_spacing();
 
 
+    if (direction == TextDirection::RTL) {
+        point.move_by(rect.width(), 0); // Start drawing from the end
+        space_width = -space_width;     // Draw spaces backwards
+    }
+
     for (u32 code_point : final_text) {
     for (u32 code_point : final_text) {
         if (code_point == ' ') {
         if (code_point == ' ') {
             point.move_by(space_width, 0);
             point.move_by(space_width, 0);
             continue;
             continue;
         }
         }
         IntSize glyph_size(font.glyph_or_emoji_width(code_point) + font.glyph_spacing(), font.glyph_height());
         IntSize glyph_size(font.glyph_or_emoji_width(code_point) + font.glyph_spacing(), font.glyph_height());
+        if (direction == TextDirection::RTL)
+            point.move_by(-glyph_size.width(), 0); // If we are drawing right to left, we have to move backwards before drawing the glyph
         draw_glyph({ point, glyph_size }, code_point);
         draw_glyph({ point, glyph_size }, code_point);
-        point.move_by(glyph_size.width(), 0);
+        if (direction == TextDirection::LTR)
+            point.move_by(glyph_size.width(), 0);
     }
     }
 }
 }
 
 
@@ -1105,9 +1115,170 @@ static inline size_t draw_text_get_length(const Utf32View& text)
     return text.length();
     return text.length();
 }
 }
 
 
+template<typename TextType>
+Vector<DirectionalRun> split_text_into_directional_runs(const TextType& text, TextDirection initial_direction)
+{
+    // FIXME: This is a *very* simplified version of the UNICODE BIDIRECTIONAL ALGORITHM (https://www.unicode.org/reports/tr9/), that can render most bidirectional text
+    //  but also produces awkward results in a large amount of edge cases. This should probably be replaced with a fully spec compliant implementation at some point.
+
+    // FIXME: Support HTML "dir" attribute (how?)
+    u8 paragraph_embedding_level = initial_direction == TextDirection::LTR ? 0 : 1;
+    Vector<u8> embedding_levels;
+    embedding_levels.ensure_capacity(text.length());
+    for (size_t i = 0; i < text.length(); i++)
+        embedding_levels.unchecked_append(paragraph_embedding_level);
+
+    // FIXME: Support Explicit Directional Formatting Characters
+
+    Vector<BidirectionalClass> character_classes;
+    character_classes.ensure_capacity(text.length());
+    for (u32 code_point : text)
+        character_classes.unchecked_append(get_char_bidi_class(code_point));
+
+    // resolving weak types
+    BidirectionalClass paragraph_class = initial_direction == TextDirection::LTR ? BidirectionalClass::STRONG_LTR : BidirectionalClass::STRONG_RTL;
+    for (size_t i = 0; i < character_classes.size(); i++) {
+        if (character_classes[i] != BidirectionalClass::WEAK_SEPARATORS)
+            continue;
+        for (ssize_t j = i - 1; j >= 0; j--) {
+            auto character_class = character_classes[j];
+            if (character_class != BidirectionalClass::STRONG_RTL && character_class != BidirectionalClass::STRONG_LTR)
+                continue;
+            character_classes[i] = character_class;
+            break;
+        }
+        if (character_classes[i] == BidirectionalClass::WEAK_SEPARATORS)
+            character_classes[i] = paragraph_class;
+    }
+
+    // resolving neutral types
+    auto left_side = BidirectionalClass::NEUTRAL;
+    auto sequence_length = 0;
+    for (size_t i = 0; i < character_classes.size(); i++) {
+        auto character_class = character_classes[i];
+        if (left_side == BidirectionalClass::NEUTRAL) {
+            if (character_class != BidirectionalClass::NEUTRAL)
+                left_side = character_class;
+            else
+                character_classes[i] = paragraph_class;
+            continue;
+        }
+        if (character_class != BidirectionalClass::NEUTRAL) {
+            BidirectionalClass sequence_class;
+            if (bidi_class_to_direction(left_side) == bidi_class_to_direction(character_class)) {
+                sequence_class = left_side == BidirectionalClass::STRONG_RTL ? BidirectionalClass::STRONG_RTL : BidirectionalClass::STRONG_LTR;
+            } else {
+                sequence_class = paragraph_class;
+            }
+            for (auto j = 0; j < sequence_length; j++) {
+                character_classes[i - j - 1] = sequence_class;
+            }
+            sequence_length = 0;
+            left_side = character_class;
+        } else {
+            sequence_length++;
+        }
+    }
+    for (auto i = 0; i < sequence_length; i++)
+        character_classes[character_classes.size() - i - 1] = paragraph_class;
+
+    // resolving implicit levels
+    for (size_t i = 0; i < character_classes.size(); i++) {
+        auto character_class = character_classes[i];
+        if ((embedding_levels[i] % 2) == 0) {
+            if (character_class == BidirectionalClass::STRONG_RTL)
+                embedding_levels[i] += 1;
+            else if (character_class == BidirectionalClass::WEAK_NUMBERS || character_class == BidirectionalClass::WEAK_SEPARATORS)
+                embedding_levels[i] += 2;
+        } else {
+            if (character_class == BidirectionalClass::STRONG_LTR || character_class == BidirectionalClass::WEAK_NUMBERS || character_class == BidirectionalClass::WEAK_SEPARATORS)
+                embedding_levels[i] += 1;
+        }
+    }
+
+    // splitting into runs
+    auto run_code_points_start = text.begin();
+    auto next_code_points_slice = [&](auto length) {
+        Vector<u32> run_code_points;
+        run_code_points.ensure_capacity(length);
+        for (size_t j = 0; j < length; ++j, ++run_code_points_start)
+            run_code_points.unchecked_append(*run_code_points_start);
+        return run_code_points;
+    };
+    Vector<DirectionalRun> runs;
+    size_t start = 0;
+    u8 level = embedding_levels[0];
+    for (size_t i = 1; i < embedding_levels.size(); ++i) {
+        if (embedding_levels[i] == level)
+            continue;
+        auto code_points_slice = next_code_points_slice(i - start);
+        runs.append({ move(code_points_slice), level });
+        start = i;
+        level = embedding_levels[i];
+    }
+    auto code_points_slice = next_code_points_slice(embedding_levels.size() - start);
+    runs.append({ move(code_points_slice), level });
+
+    // reordering resolved levels
+    // FIXME: missing special cases for trailing whitespace characters
+    u8 minimum_level = 128;
+    u8 maximum_level = 0;
+    for (auto& run : runs) {
+        minimum_level = min(minimum_level, run.embedding_level());
+        maximum_level = max(minimum_level, run.embedding_level());
+    }
+    if ((minimum_level % 2) == 0)
+        minimum_level++;
+    auto runs_count = runs.size() - 1;
+    while (maximum_level <= minimum_level) {
+        size_t run_index = 0;
+        while (run_index < runs_count) {
+            while (run_index < runs_count && runs[run_index].embedding_level() < maximum_level)
+                run_index++;
+            auto reverse_start = run_index;
+            while (run_index <= runs_count && runs[run_index].embedding_level() >= maximum_level)
+                run_index++;
+            auto reverse_end = run_index - 1;
+            while (reverse_start < reverse_end) {
+                swap(runs[reverse_start], runs[reverse_end]);
+                reverse_start++;
+                reverse_end--;
+            }
+        }
+        maximum_level--;
+    }
+
+    // mirroring RTL mirror characters
+    for (auto& run : runs) {
+        if (run.direction() == TextDirection::LTR)
+            continue;
+        for (auto& code_point : run.code_points()) {
+            code_point = get_mirror_char(code_point);
+        }
+    }
+
+    return runs;
+}
+
+template<typename TextType>
+bool text_contains_bidirectional_text(const TextType& text, TextDirection initial_direction)
+{
+    for (u32 code_point : text) {
+        auto char_class = get_char_bidi_class(code_point);
+        if (char_class == BidirectionalClass::NEUTRAL)
+            continue;
+        if (bidi_class_to_direction(char_class) != initial_direction)
+            return true;
+    }
+    return false;
+}
+
 template<typename TextType, typename DrawGlyphFunction>
 template<typename TextType, typename DrawGlyphFunction>
 void do_draw_text(const IntRect& rect, const TextType& text, const Font& font, TextAlignment alignment, TextElision elision, DrawGlyphFunction draw_glyph)
 void do_draw_text(const IntRect& rect, const TextType& text, const Font& font, TextAlignment alignment, TextElision elision, DrawGlyphFunction draw_glyph)
 {
 {
+    if (draw_text_get_length(text) == 0)
+        return;
+
     Vector<TextType, 32> lines;
     Vector<TextType, 32> lines;
 
 
     size_t start_of_current_line = 0;
     size_t start_of_current_line = 0;
@@ -1161,9 +1332,27 @@ void do_draw_text(const IntRect& rect, const TextType& text, const Font& font, T
 
 
     for (size_t i = 0; i < lines.size(); ++i) {
     for (size_t i = 0; i < lines.size(); ++i) {
         auto& line = lines[i];
         auto& line = lines[i];
+
         IntRect line_rect { bounding_rect.x(), bounding_rect.y() + static_cast<int>(i) * line_height, bounding_rect.width(), line_height };
         IntRect line_rect { bounding_rect.x(), bounding_rect.y() + static_cast<int>(i) * line_height, bounding_rect.width(), line_height };
         line_rect.intersect(rect);
         line_rect.intersect(rect);
-        draw_text_line(line_rect, line, font, alignment, elision, draw_glyph);
+
+        TextDirection line_direction = get_text_direction(line);
+        if (text_contains_bidirectional_text(line, line_direction)) { // Slow Path: The line contains mixed BiDi classes
+            auto directional_runs = split_text_into_directional_runs(line, line_direction);
+            auto current_dx = line_direction == TextDirection::LTR ? 0 : line_rect.width();
+            for (auto& directional_run : directional_runs) {
+                auto run_width = font.width(directional_run.text());
+                if (line_direction == TextDirection::RTL)
+                    current_dx -= run_width;
+                auto run_rect = line_rect.translated(current_dx, 0);
+                run_rect.set_width(run_width);
+                draw_text_line(run_rect, directional_run.text(), font, alignment, elision, directional_run.direction(), draw_glyph);
+                if (line_direction == TextDirection::LTR)
+                    current_dx += run_width;
+            }
+        } else {
+            draw_text_line(line_rect, line, font, alignment, elision, line_direction, draw_glyph);
+        }
     }
     }
 }
 }
 
 

+ 34 - 0
Userland/Libraries/LibGfx/TextDirection.cpp

@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/Array.h>
+#include <LibGfx/TextDirection.h>
+
+namespace Gfx {
+
+// FIXME: These should be parsed from the official UnicodeData.txt that specifies the class for each character (this function doesnt take into account a large amount of characters)
+static consteval Array<BidirectionalClass, 0x1F000> generate_char_bidi_class_lookup_table()
+{
+    Array<BidirectionalClass, 0x1F000> lookup_table {};
+    for (u32 ch = 0; ch < 0x1F000; ch++) {
+        auto char_class = BidirectionalClass::STRONG_LTR;
+        if ((ch >= 0x600 && ch <= 0x7BF) || (ch >= 0x8A0 && ch <= 0x8FF) || (ch >= 0xFB50 && ch <= 0xFDCF) || (ch >= 0xFDF0 && ch <= 0xFDFF) || (ch >= 0xFE70 && ch <= 0xFEFF) || (ch >= 0x1EE00 && ch <= 0x1EEFF))
+            char_class = BidirectionalClass::STRONG_RTL; // Arabic RTL
+        if ((ch >= 0x590 && ch <= 0x5FF) || (ch >= 0x7C0 && ch <= 0x89F) || (ch == 0x200F) || (ch >= 0xFB1D && ch <= 0xFB4F) || (ch >= 0x10800 && ch <= 0x10FFF) || (ch >= 0x1E800 && ch <= 0x1EDFF) || (ch >= 0x1EF00 && ch <= 0x1EFFF))
+            char_class = BidirectionalClass::STRONG_RTL; // Hebrew RTL
+        if ((ch >= 0x30 && ch <= 0x39) || (ch >= 0x660 && ch <= 0x669) || (ch >= 0x10D30 && ch <= 0x10E7E))
+            char_class = BidirectionalClass::WEAK_NUMBERS; // Numerals
+        if ((ch >= 0x23 && ch <= 0x25) || (ch >= 0x2B && ch <= 0x2F) || (ch == 0x3A))
+            char_class = BidirectionalClass::WEAK_SEPARATORS; // Seperators
+        if ((ch >= 0x9 && ch <= 0xD) || (ch >= 0x1C && ch <= 0x22) || (ch >= 0x26 && ch <= 0x2A) || (ch >= 0x3B && ch <= 0x40) || (ch >= 0x5B && ch <= 0x60) || (ch >= 0x7B && ch <= 0x7E))
+            char_class = BidirectionalClass::NEUTRAL;
+        lookup_table[ch] = char_class;
+    }
+    return lookup_table;
+}
+constexpr Array<BidirectionalClass, 0x1F000> char_bidi_class_lookup_table = generate_char_bidi_class_lookup_table();
+
+}

+ 103 - 0
Userland/Libraries/LibGfx/TextDirection.h

@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Utf32View.h>
+#include <AK/Vector.h>
+
+namespace Gfx {
+
+enum class BidirectionalClass {
+    STRONG_LTR,
+    STRONG_RTL,
+    WEAK_NUMBERS,
+    WEAK_SEPARATORS,
+    NEUTRAL,
+};
+
+extern const Array<BidirectionalClass, 0x1F000> char_bidi_class_lookup_table;
+
+constexpr BidirectionalClass get_char_bidi_class(u32 ch)
+{
+    if (ch >= char_bidi_class_lookup_table.size())
+        return BidirectionalClass::STRONG_LTR;
+    return char_bidi_class_lookup_table[ch];
+}
+
+// FIXME: These should be parsed from the official BidiMirroring.txt that specifies the mirroring character for each character (this function doesnt take into account a large amount of characters)
+constexpr u32 get_mirror_char(u32 ch)
+{
+    if (ch == 0x28)
+        return 0x29;
+    if (ch == 0x29)
+        return 0x28;
+    if (ch == 0x3C)
+        return 0x3E;
+    if (ch == 0x3E)
+        return 0x3C;
+    if (ch == 0x5B)
+        return 0x5D;
+    if (ch == 0x7B)
+        return 0x7D;
+    if (ch == 0x7D)
+        return 0x7B;
+    if (ch == 0xAB)
+        return 0xBB;
+    if (ch == 0xBB)
+        return 0xAB;
+    if (ch == 0x2039)
+        return 0x203A;
+    if (ch == 0x203A)
+        return 0x2039;
+    return ch;
+}
+
+enum class TextDirection {
+    LTR,
+    RTL,
+};
+
+constexpr TextDirection bidi_class_to_direction(BidirectionalClass class_)
+{
+    VERIFY(class_ != BidirectionalClass::NEUTRAL);
+    if (class_ == BidirectionalClass::STRONG_LTR || class_ == BidirectionalClass::WEAK_NUMBERS || class_ == BidirectionalClass::WEAK_SEPARATORS)
+        return TextDirection::LTR;
+    return TextDirection::RTL;
+}
+
+// Assumes the text has a homogeneous direction
+template<typename TextType>
+constexpr TextDirection get_text_direction(TextType text)
+{
+    for (u32 code_point : text) {
+        auto char_direction = get_char_bidi_class(code_point);
+        if (char_direction != BidirectionalClass::NEUTRAL)
+            return bidi_class_to_direction(char_direction);
+    }
+    return TextDirection::LTR;
+}
+
+class DirectionalRun {
+public:
+    DirectionalRun(Vector<u32> code_points, u8 embedding_level)
+        : m_code_points(move(code_points))
+        , m_embedding_level(embedding_level)
+    {
+    }
+
+    [[nodiscard]] Utf32View text() const { return { m_code_points.data(), m_code_points.size() }; }
+    [[nodiscard]] u8 embedding_level() const { return m_embedding_level; }
+    [[nodiscard]] TextDirection direction() const { return (m_embedding_level % 2) == 0 ? TextDirection::LTR : TextDirection::RTL; }
+
+    Vector<u32>& code_points() { return m_code_points; }
+
+private:
+    Vector<u32> m_code_points;
+    u8 m_embedding_level;
+};
+
+}