Browse Source

LibVT: Add incremental UTF-8 parsing to the terminal input handler

Instead of relying on the GUI code to handle UTF-8, we now process
and parse the incoming data into 32-bit codepoints ourselves.

This means that you can now show emojis in the terminal and they will
only take up one character cell each. :^)
Andreas Kling 5 years ago
parent
commit
b8498dc55e
2 changed files with 70 additions and 14 deletions
  1. 62 12
      Libraries/LibVT/Terminal.cpp
  2. 8 2
      Libraries/LibVT/Terminal.h

+ 62 - 12
Libraries/LibVT/Terminal.cpp

@@ -809,6 +809,23 @@ void Terminal::on_input(u8 ch)
 #ifdef TERMINAL_DEBUG
 #ifdef TERMINAL_DEBUG
     dbgprintf("Terminal::on_char: %b (%c), fg=%u, bg=%u\n", ch, ch, m_current_attribute.foreground_color, m_current_attribute.background_color);
     dbgprintf("Terminal::on_char: %b (%c), fg=%u, bg=%u\n", ch, ch, m_current_attribute.foreground_color, m_current_attribute.background_color);
 #endif
 #endif
+
+    auto fail_utf8_parse = [this] {
+        m_parser_state = Normal;
+        on_codepoint('%');
+    };
+
+    auto advance_utf8_parse = [this, ch] {
+        m_parser_codepoint <<= 6;
+        m_parser_codepoint |= ch & 0x3f;
+        if (m_parser_state == UTF8Needs1Byte) {
+            on_codepoint(m_parser_codepoint);
+            m_parser_state = Normal;
+        } else {
+            m_parser_state = (ParserState)(m_parser_state + 1);
+        }
+    };
+
     switch (m_parser_state) {
     switch (m_parser_state) {
     case GotEscape:
     case GotEscape:
         if (ch == '[') {
         if (ch == '[') {
@@ -888,8 +905,36 @@ void Terminal::on_input(u8 ch)
         m_parser_state = Normal;
         m_parser_state = Normal;
         m_swallow_current = false;
         m_swallow_current = false;
         return;
         return;
+    case UTF8Needs1Byte:
+    case UTF8Needs2Bytes:
+    case UTF8Needs3Bytes:
+        if ((ch & 0xc0) != 0x80) {
+            fail_utf8_parse();
+        } else {
+            advance_utf8_parse();
+        }
+        return;
+
     case Normal:
     case Normal:
-        break;
+        if (!(ch & 0x80))
+            break;
+        if ((ch & 0xe0) == 0xc0) {
+            m_parser_state = UTF8Needs1Byte;
+            m_parser_codepoint = ch & 0x1f;
+            return;
+        }
+        if ((ch & 0xf0) == 0xe0) {
+            m_parser_state = UTF8Needs2Bytes;
+            m_parser_codepoint = ch & 0x0f;
+            return;
+        }
+        if ((ch & 0xf8) == 0xf0) {
+            m_parser_state = UTF8Needs3Bytes;
+            m_parser_codepoint = ch & 0x07;
+            return;
+        }
+        fail_utf8_parse();
+        return;
     }
     }
 
 
     switch (ch) {
     switch (ch) {
@@ -925,21 +970,26 @@ void Terminal::on_input(u8 ch)
         return;
         return;
     }
     }
 
 
+    on_codepoint(ch);
+}
+
+void Terminal::on_codepoint(u32 codepoint)
+{
     auto new_column = m_cursor_column + 1;
     auto new_column = m_cursor_column + 1;
     if (new_column < columns()) {
     if (new_column < columns()) {
-        put_character_at(m_cursor_row, m_cursor_column, ch);
+        put_character_at(m_cursor_row, m_cursor_column, codepoint);
         set_cursor(m_cursor_row, new_column);
         set_cursor(m_cursor_row, new_column);
+        return;
+    }
+    if (m_stomp) {
+        m_stomp = false;
+        newline();
+        put_character_at(m_cursor_row, m_cursor_column, codepoint);
+        set_cursor(m_cursor_row, 1);
     } else {
     } else {
-        if (m_stomp) {
-            m_stomp = false;
-            newline();
-            put_character_at(m_cursor_row, m_cursor_column, ch);
-            set_cursor(m_cursor_row, 1);
-        } else {
-            // Curious: We wait once on the right-hand side
-            m_stomp = true;
-            put_character_at(m_cursor_row, m_cursor_column, ch);
-        }
+        // Curious: We wait once on the right-hand side
+        m_stomp = true;
+        put_character_at(m_cursor_row, m_cursor_column, codepoint);
     }
     }
 }
 }
 
 

+ 8 - 2
Libraries/LibVT/Terminal.h

@@ -101,6 +101,8 @@ public:
 private:
 private:
     typedef Vector<unsigned, 4> ParamVector;
     typedef Vector<unsigned, 4> ParamVector;
 
 
+    void on_codepoint(u32);
+
     void scroll_up();
     void scroll_up();
     void scroll_down();
     void scroll_down();
     void newline();
     void newline();
@@ -171,7 +173,7 @@ private:
     void execute_xterm_command();
     void execute_xterm_command();
     void execute_hashtag(u8);
     void execute_hashtag(u8);
 
 
-    enum EscapeState {
+    enum ParserState {
         Normal,
         Normal,
         GotEscape,
         GotEscape,
         ExpectParameter,
         ExpectParameter,
@@ -180,9 +182,13 @@ private:
         ExpectHashtagDigit,
         ExpectHashtagDigit,
         ExpectXtermParameter,
         ExpectXtermParameter,
         ExpectStringTerminator,
         ExpectStringTerminator,
+        UTF8Needs3Bytes,
+        UTF8Needs2Bytes,
+        UTF8Needs1Byte,
     };
     };
 
 
-    EscapeState m_parser_state { Normal };
+    ParserState m_parser_state { Normal };
+    u32 m_parser_codepoint { 0 };
     Vector<u8> m_parameters;
     Vector<u8> m_parameters;
     Vector<u8> m_intermediates;
     Vector<u8> m_intermediates;
     Vector<u8> m_xterm_parameters;
     Vector<u8> m_xterm_parameters;