浏览代码

LibLine: Handle unicode correctly

This commit also fixes a problem with us throwing out data that was
inserted while a command was running.
AnotherTest 5 年之前
父节点
当前提交
3bc3f36cfe
共有 4 个文件被更改,包括 90 次插入29 次删除
  1. 80 21
      Libraries/LibLine/Editor.cpp
  2. 7 5
      Libraries/LibLine/Editor.h
  3. 1 1
      Shell/Shell.cpp
  4. 2 2
      Userland/js.cpp

+ 80 - 21
Libraries/LibLine/Editor.cpp

@@ -26,6 +26,8 @@
 
 
 #include "Editor.h"
 #include "Editor.h"
 #include <AK/StringBuilder.h>
 #include <AK/StringBuilder.h>
+#include <AK/Utf32View.h>
+#include <AK/Utf8View.h>
 #include <ctype.h>
 #include <ctype.h>
 #include <stdio.h>
 #include <stdio.h>
 #include <sys/ioctl.h>
 #include <sys/ioctl.h>
@@ -76,21 +78,25 @@ void Editor::clear_line()
 
 
 void Editor::insert(const String& string)
 void Editor::insert(const String& string)
 {
 {
-    for (auto ch : string)
+    for (auto ch : Utf8View { string })
         insert(ch);
         insert(ch);
 }
 }
 
 
-void Editor::insert(const char ch)
+void Editor::insert(const u32 cp)
 {
 {
-    m_pending_chars.append(&ch, 1);
+    StringBuilder builder;
+    builder.append(Utf32View(&cp, 1));
+    auto str = builder.build();
+    m_pending_chars.append(str.characters(), str.length());
+
     if (m_cursor == m_buffer.size()) {
     if (m_cursor == m_buffer.size()) {
-        m_buffer.append(ch);
+        m_buffer.append(cp);
         m_cursor = m_buffer.size();
         m_cursor = m_buffer.size();
         m_inline_search_cursor = m_cursor;
         m_inline_search_cursor = m_cursor;
         return;
         return;
     }
     }
 
 
-    m_buffer.insert(m_cursor, ch);
+    m_buffer.insert(m_cursor, cp);
     ++m_chars_inserted_in_the_middle;
     ++m_chars_inserted_in_the_middle;
     ++m_cursor;
     ++m_cursor;
     m_inline_search_cursor = m_cursor;
     m_inline_search_cursor = m_cursor;
@@ -143,17 +149,18 @@ String Editor::get_line(const String& prompt)
             m_finish = false;
             m_finish = false;
             printf("\n");
             printf("\n");
             fflush(stdout);
             fflush(stdout);
-            auto string = String::copy(m_buffer);
+            auto string = line();
             m_buffer.clear();
             m_buffer.clear();
             m_is_editing = false;
             m_is_editing = false;
             restore();
             restore();
             return string;
             return string;
         }
         }
         char keybuf[16];
         char keybuf[16];
-        ssize_t nread = read(0, keybuf, sizeof(keybuf));
-        // FIXME: exit()ing here is a bit off. Should communicate failure to caller somehow instead.
-        if (nread == 0)
-            exit(0);
+        ssize_t nread = 0;
+
+        if (!m_incomplete_data.size())
+            nread = read(0, keybuf, sizeof(keybuf));
+
         if (nread < 0) {
         if (nread < 0) {
             if (errno == EINTR) {
             if (errno == EINTR) {
                 if (!m_was_interrupted) {
                 if (!m_was_interrupted) {
@@ -183,6 +190,13 @@ String Editor::get_line(const String& prompt)
             exit(2);
             exit(2);
         }
         }
 
 
+        m_incomplete_data.append(keybuf, nread);
+        nread = m_incomplete_data.size();
+
+        // FIXME: exit()ing here is a bit off. Should communicate failure to caller somehow instead.
+        if (nread == 0)
+            exit(0);
+
         auto reverse_tab = false;
         auto reverse_tab = false;
         auto increment_suggestion_index = [&] {
         auto increment_suggestion_index = [&] {
             if (m_suggestions.size())
             if (m_suggestions.size())
@@ -196,8 +210,20 @@ String Editor::get_line(const String& prompt)
             m_next_suggestion_index--;
             m_next_suggestion_index--;
         };
         };
         auto ctrl_held = false;
         auto ctrl_held = false;
-        for (ssize_t i = 0; i < nread; ++i) {
-            char ch = keybuf[i];
+
+        // discard starting bytes until they make sense as utf-8
+        size_t valid_bytes = 0;
+        while (nread) {
+            Utf8View { StringView { m_incomplete_data.data(), (size_t)nread } }.validate(valid_bytes);
+            if (valid_bytes)
+                break;
+            m_incomplete_data.take_first();
+            --nread;
+        }
+
+        Utf8View input_view { StringView { m_incomplete_data.data(), valid_bytes } };
+
+        for (auto ch : input_view) {
             if (ch == 0)
             if (ch == 0)
                 continue;
                 continue;
 
 
@@ -219,7 +245,9 @@ String Editor::get_line(const String& prompt)
                 {
                 {
                     m_searching_backwards = true;
                     m_searching_backwards = true;
                     auto inline_search_cursor = m_inline_search_cursor;
                     auto inline_search_cursor = m_inline_search_cursor;
-                    String search_phrase { m_buffer.data(), inline_search_cursor };
+                    StringBuilder builder;
+                    builder.append(Utf32View { m_buffer.data(), inline_search_cursor });
+                    String search_phrase = builder.to_string();
                     if (search(search_phrase, true, true)) {
                     if (search(search_phrase, true, true)) {
                         ++m_search_offset;
                         ++m_search_offset;
                     } else {
                     } else {
@@ -233,7 +261,9 @@ String Editor::get_line(const String& prompt)
                 case 'B': // down
                 case 'B': // down
                 {
                 {
                     auto inline_search_cursor = m_inline_search_cursor;
                     auto inline_search_cursor = m_inline_search_cursor;
-                    String search_phrase { m_buffer.data(), inline_search_cursor };
+                    StringBuilder builder;
+                    builder.append(Utf32View { m_buffer.data(), inline_search_cursor });
+                    String search_phrase = builder.to_string();
                     auto search_changed_directions = m_searching_backwards;
                     auto search_changed_directions = m_searching_backwards;
                     m_searching_backwards = false;
                     m_searching_backwards = false;
                     if (m_search_offset > 0) {
                     if (m_search_offset > 0) {
@@ -390,7 +420,9 @@ String Editor::get_line(const String& prompt)
                     }
                     }
                 }
                 }
 
 
-                String token = is_empty_token ? String() : String(&m_buffer[token_start], m_cursor - token_start);
+                StringBuilder builder;
+                builder.append(Utf32View { m_buffer.data() + token_start, m_cursor - token_start });
+                String token = is_empty_token ? String() : builder.to_string();
 
 
                 // ask for completions only on the first tab
                 // ask for completions only on the first tab
                 // and scan for the largest common prefix to display
                 // and scan for the largest common prefix to display
@@ -688,7 +720,9 @@ String Editor::get_line(const String& prompt)
                     m_pre_search_cursor = m_cursor;
                     m_pre_search_cursor = m_cursor;
                     m_search_editor = make<Editor>(Configuration { Configuration::Eager, m_configuration.split_mechanism }); // Has anyone seen 'Inception'?
                     m_search_editor = make<Editor>(Configuration { Configuration::Eager, m_configuration.split_mechanism }); // Has anyone seen 'Inception'?
                     m_search_editor->on_display_refresh = [this](Editor& search_editor) {
                     m_search_editor->on_display_refresh = [this](Editor& search_editor) {
-                        search(StringView { search_editor.buffer().data(), search_editor.buffer().size() });
+                        StringBuilder builder;
+                        builder.append(Utf32View { search_editor.buffer().data(), search_editor.buffer().size() });
+                        search(builder.build());
                         refresh_display();
                         refresh_display();
                         return;
                         return;
                     };
                     };
@@ -792,6 +826,14 @@ String Editor::get_line(const String& prompt)
 
 
             insert(ch);
             insert(ch);
         }
         }
+
+        if (valid_bytes == m_incomplete_data.size()) {
+            m_incomplete_data.clear();
+        } else {
+            ASSERT_NOT_REACHED();
+            for (size_t i = 0; i < valid_bytes; ++i)
+                m_incomplete_data.take_first();
+        }
     }
     }
 }
 }
 
 
@@ -918,6 +960,7 @@ void Editor::refresh_display()
 
 
     vt_clear_to_end_of_line();
     vt_clear_to_end_of_line();
     HashMap<u32, Style> empty_styles {};
     HashMap<u32, Style> empty_styles {};
+    StringBuilder builder;
     for (size_t i = 0; i < m_buffer.size(); ++i) {
     for (size_t i = 0; i < m_buffer.size(); ++i) {
         auto ends = m_spans_ending.get(i).value_or(empty_styles);
         auto ends = m_spans_ending.get(i).value_or(empty_styles);
         auto starts = m_spans_starting.get(i).value_or(empty_styles);
         auto starts = m_spans_starting.get(i).value_or(empty_styles);
@@ -929,7 +972,9 @@ void Editor::refresh_display()
             // set new options
             // set new options
             vt_apply_style(starts.begin()->value); // apply some random style that starts here
             vt_apply_style(starts.begin()->value); // apply some random style that starts here
         }
         }
-        fputc(m_buffer[i], stdout);
+        builder.clear();
+        builder.append(Utf32View { &m_buffer[i], 1 });
+        fputs(builder.to_string().characters(), stdout);
     }
     }
     vt_apply_style({}); // don't bleed to EOL
     vt_apply_style({}); // don't bleed to EOL
     m_pending_chars.clear();
     m_pending_chars.clear();
@@ -1061,8 +1106,11 @@ size_t Editor::actual_rendered_string_length(const StringView& string) const
         BracketArgsSemi = 7,
         BracketArgsSemi = 7,
         Title = 9,
         Title = 9,
     } state { Free };
     } state { Free };
-    for (size_t i = 0; i < string.length(); ++i) {
-        auto c = string[i];
+    Utf8View view { string };
+    auto it = view.begin();
+
+    for (size_t i = 0; i < view.length_in_codepoints(); ++i, ++it) {
+        auto c = *it;
         switch (state) {
         switch (state) {
         case Free:
         case Free:
             if (c == '\x1b') {
             if (c == '\x1b') {
@@ -1080,7 +1128,9 @@ size_t Editor::actual_rendered_string_length(const StringView& string) const
             break;
             break;
         case Escape:
         case Escape:
             if (c == ']') {
             if (c == ']') {
-                if (string.length() > i && string[i + 1] == '0')
+                ++i;
+                ++it;
+                if (*it == '0')
                     state = Title;
                     state = Title;
                 continue;
                 continue;
             }
             }
@@ -1119,6 +1169,7 @@ Vector<size_t, 2> Editor::vt_dsr()
     u32 length { 0 };
     u32 length { 0 };
 
 
     // read whatever junk there is before talking to the terminal
     // read whatever junk there is before talking to the terminal
+    // and insert them later when we're reading user input
     bool more_junk_to_read { false };
     bool more_junk_to_read { false };
     timeval timeout { 0, 0 };
     timeval timeout { 0, 0 };
     fd_set readfds;
     fd_set readfds;
@@ -1130,7 +1181,7 @@ Vector<size_t, 2> Editor::vt_dsr()
         (void)select(1, &readfds, nullptr, nullptr, &timeout);
         (void)select(1, &readfds, nullptr, nullptr, &timeout);
         if (FD_ISSET(0, &readfds)) {
         if (FD_ISSET(0, &readfds)) {
             auto nread = read(0, buf, 16);
             auto nread = read(0, buf, 16);
-            (void)nread;
+            m_incomplete_data.append(buf, nread);
             more_junk_to_read = true;
             more_junk_to_read = true;
         }
         }
     } while (more_junk_to_read);
     } while (more_junk_to_read);
@@ -1170,4 +1221,12 @@ Vector<size_t, 2> Editor::vt_dsr()
     }
     }
     return { x, y };
     return { x, y };
 }
 }
+
+String Editor::line() const
+{
+    StringBuilder builder;
+    builder.append(Utf32View { m_buffer.data(), m_buffer.size() });
+    return builder.build();
+}
+
 }
 }

+ 7 - 5
Libraries/LibLine/Editor.h

@@ -147,8 +147,9 @@ public:
     void resized() { m_was_resized = true; }
     void resized() { m_was_resized = true; }
 
 
     size_t cursor() const { return m_cursor; }
     size_t cursor() const { return m_cursor; }
-    const Vector<char, 1024>& buffer() const { return m_buffer; }
-    char buffer_at(size_t pos) const { return m_buffer.at(pos); }
+    const Vector<u32, 1024>& buffer() const { return m_buffer; }
+    u32 buffer_at(size_t pos) const { return m_buffer.at(pos); }
+    String line() const;
 
 
     // only makes sense inside a char_input callback or on_* callback
     // only makes sense inside a char_input callback or on_* callback
     void set_prompt(const String& prompt)
     void set_prompt(const String& prompt)
@@ -162,7 +163,7 @@ public:
 
 
     void clear_line();
     void clear_line();
     void insert(const String&);
     void insert(const String&);
-    void insert(const char);
+    void insert(const u32);
     void stylize(const Span&, const Style&);
     void stylize(const Span&, const Style&);
     void strip_styles()
     void strip_styles()
     {
     {
@@ -275,9 +276,10 @@ private:
     size_t m_search_offset { 0 };
     size_t m_search_offset { 0 };
     bool m_searching_backwards { true };
     bool m_searching_backwards { true };
     size_t m_pre_search_cursor { 0 };
     size_t m_pre_search_cursor { 0 };
-    Vector<char, 1024> m_pre_search_buffer;
+    Vector<u32, 1024> m_pre_search_buffer;
 
 
-    Vector<char, 1024> m_buffer;
+    Vector<u32, 1024> m_buffer;
+    Vector<char, 512> m_incomplete_data;
     ByteBuffer m_pending_chars;
     ByteBuffer m_pending_chars;
     size_t m_cursor { 0 };
     size_t m_cursor { 0 };
     size_t m_drawn_cursor { 0 };
     size_t m_drawn_cursor { 0 };

+ 1 - 1
Shell/Shell.cpp

@@ -1389,7 +1389,7 @@ void Shell::highlight(Line::Editor&) const
     if (m_should_continue == ExitCodeOrContinuationRequest::SingleQuotedString) {
     if (m_should_continue == ExitCodeOrContinuationRequest::SingleQuotedString) {
         builder.append('\'');
         builder.append('\'');
     }
     }
-    builder.append(StringView { editor.buffer().data(), editor.buffer().size() });
+    builder.append(editor.line());
     auto commands = Parser { builder.string_view() }.parse();
     auto commands = Parser { builder.string_view() }.parse();
     auto first_command { true };
     auto first_command { true };
     for (auto& command : commands) {
     for (auto& command : commands) {

+ 2 - 2
Userland/js.cpp

@@ -518,7 +518,7 @@ int main(int argc, char** argv)
             };
             };
             editor.strip_styles();
             editor.strip_styles();
             StringBuilder builder;
             StringBuilder builder;
-            builder.append({ editor.buffer().data(), editor.buffer().size() });
+            builder.append(editor.line());
             // FIXME: The lexer returns weird position information without this
             // FIXME: The lexer returns weird position information without this
             builder.append(" ");
             builder.append(" ");
             String str = builder.build();
             String str = builder.build();
@@ -659,7 +659,7 @@ int main(int argc, char** argv)
             if (token.length() == 0)
             if (token.length() == 0)
                 return {}; // nyeh
                 return {}; // nyeh
 
 
-            StringView line { editor.buffer().data(), editor.cursor() };
+            auto line = editor.line();
             // we're only going to complete either
             // we're only going to complete either
             //    - <N>
             //    - <N>
             //        where N is part of the name of a variable
             //        where N is part of the name of a variable