Browse Source

Add support for full Unicode character set (#15)

* Add support for full Unicode character set

* Add server tests for unicode support
Eric Zhang 4 years ago
parent
commit
53b5f0b972

+ 2 - 0
Cargo.lock

@@ -853,6 +853,7 @@ name = "rustpad-server"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "bytecount",
  "dashmap",
  "dotenv",
  "futures",
@@ -871,6 +872,7 @@ dependencies = [
 name = "rustpad-wasm"
 version = "0.1.0"
 dependencies = [
+ "bytecount",
  "console_error_panic_hook",
  "js-sys",
  "operational-transform",

+ 1 - 0
rustpad-server/Cargo.toml

@@ -6,6 +6,7 @@ edition = "2018"
 
 [dependencies]
 anyhow = "1.0.40"
+bytecount = "0.6"
 dashmap = "4.0.2"
 dotenv = "0.15.0"
 futures = "0.3.15"

+ 1 - 1
rustpad-server/src/ot.rs

@@ -9,7 +9,7 @@ pub fn transform_index(operation: &OperationSeq, position: u32) -> u32 {
     for op in operation.ops() {
         match op {
             &Operation::Retain(n) => index -= n as i32,
-            Operation::Insert(s) => new_index += s.len() as i32,
+            Operation::Insert(s) => new_index += bytecount::num_chars(s.as_bytes()) as i32,
             &Operation::Delete(n) => {
                 new_index -= std::cmp::min(index, n as i32);
                 index -= n as i32;

+ 235 - 0
rustpad-server/tests/unicode.rs

@@ -0,0 +1,235 @@
+//! Tests for Unicode support and correct cursor transformation.
+
+pub mod common;
+
+use anyhow::Result;
+use common::*;
+use log::info;
+use operational_transform::OperationSeq;
+use rustpad_server::server;
+use serde_json::json;
+
+#[tokio::test]
+async fn test_unicode_length() -> Result<()> {
+    pretty_env_logger::try_init().ok();
+    let filter = server();
+
+    expect_text(&filter, "unicode", "").await;
+
+    let mut client = connect(&filter, "unicode").await?;
+    let msg = client.recv().await?;
+    assert_eq!(msg, json!({ "Identity": 0 }));
+
+    let mut operation = OperationSeq::default();
+    operation.insert("h🎉e🎉l👨‍👨‍👦‍👦lo");
+    let msg = json!({
+        "Edit": {
+            "revision": 0,
+            "operation": operation
+        }
+    });
+    info!("sending ClientMsg {}", msg);
+    client.send(&msg).await;
+
+    let msg = client.recv().await?;
+    assert_eq!(
+        msg,
+        json!({
+            "History": {
+                "start": 0,
+                "operations": [
+                    { "id": 0, "operation": ["h🎉e🎉l👨‍👨‍👦‍👦lo"] }
+                ]
+            }
+        })
+    );
+
+    info!("testing that text length is equal to number of Unicode code points...");
+    let mut operation = OperationSeq::default();
+    operation.delete(14);
+    let msg = json!({
+        "Edit": {
+            "revision": 1,
+            "operation": operation
+        }
+    });
+    info!("sending ClientMsg {}", msg);
+    client.send(&msg).await;
+
+    let msg = client.recv().await?;
+    assert_eq!(
+        msg,
+        json!({
+            "History": {
+                "start": 1,
+                "operations": [
+                    { "id": 0, "operation": [-14] }
+                ]
+            }
+        })
+    );
+
+    expect_text(&filter, "unicode", "").await;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_multiple_operations() -> Result<()> {
+    pretty_env_logger::try_init().ok();
+    let filter = server();
+
+    expect_text(&filter, "unicode", "").await;
+
+    let mut client = connect(&filter, "unicode").await?;
+    let msg = client.recv().await?;
+    assert_eq!(msg, json!({ "Identity": 0 }));
+
+    let mut operation = OperationSeq::default();
+    operation.insert("🎉😍𒀇👨‍👨‍👦‍👦"); // Emoticons and Cuneiform
+    let msg = json!({
+        "Edit": {
+            "revision": 0,
+            "operation": operation
+        }
+    });
+    info!("sending ClientMsg {}", msg);
+    client.send(&msg).await;
+
+    let msg = client.recv().await?;
+    assert_eq!(
+        msg,
+        json!({
+            "History": {
+                "start": 0,
+                "operations": [
+                    { "id": 0, "operation": ["🎉😍𒀇👨‍👨‍👦‍👦"] }
+                ]
+            }
+        })
+    );
+
+    let mut operation = OperationSeq::default();
+    operation.insert("👯‍♂️");
+    operation.retain(3);
+    operation.insert("𐅣𐅤𐅥"); // Ancient Greek numbers
+    operation.retain(7);
+    let msg = json!({
+        "Edit": {
+            "revision": 1,
+            "operation": operation
+        }
+    });
+    info!("sending ClientMsg {}", msg);
+    client.send(&msg).await;
+
+    let msg = client.recv().await?;
+    assert_eq!(
+        msg,
+        json!({
+            "History": {
+                "start": 1,
+                "operations": [
+                    { "id": 0, "operation": ["👯‍♂️", 3, "𐅣𐅤𐅥", 7] }
+                ]
+            }
+        })
+    );
+
+    expect_text(&filter, "unicode", "👯‍♂️🎉😍𒀇𐅣𐅤𐅥👨‍👨‍👦‍👦").await;
+
+    let mut operation = OperationSeq::default();
+    operation.retain(2);
+    operation.insert("h̷̙̤̏͊̑̍̆̃̉͝ĕ̶̠̌̓̃̓̽̃̚l̸̥̊̓̓͝͠l̸̨̠̣̟̥͠ỏ̴̳̖̪̟̱̰̥̞̙̏̓́͗̽̀̈́͛͐̚̕͝͝ ̶̡͍͙͚̞͙̣̘͙̯͇̙̠̀w̷̨̨̪͚̤͙͖̝͕̜̭̯̝̋̋̿̿̀̾͛̐̏͘͘̕͝ǒ̴̙͉͈̗̖͍̘̥̤̒̈́̒͠r̶̨̡̢̦͔̙̮̦͖͔̩͈̗̖̂̀l̶̡̢͚̬̤͕̜̀͛̌̈́̈́͑͋̈̍̇͊͝͠ď̵̛̛̯͕̭̩͖̝̙͎̊̏̈́̎͊̐̏͊̕͜͝͠͝"); // Lots of ligatures
+    operation.retain(8);
+    let msg = json!({
+        "Edit": {
+            "revision": 1,
+            "operation": operation
+        }
+    });
+    info!("sending ClientMsg {}", msg);
+    client.send(&msg).await;
+
+    let msg = client.recv().await?;
+    assert_eq!(
+        msg,
+        json!({
+            "History": {
+                "start": 2,
+                "operations": [
+                    { "id": 0, "operation": [6, "h̷̙̤̏͊̑̍̆̃̉͝ĕ̶̠̌̓̃̓̽̃̚l̸̥̊̓̓͝͠l̸̨̠̣̟̥͠ỏ̴̳̖̪̟̱̰̥̞̙̏̓́͗̽̀̈́͛͐̚̕͝͝ ̶̡͍͙͚̞͙̣̘͙̯͇̙̠̀w̷̨̨̪͚̤͙͖̝͕̜̭̯̝̋̋̿̿̀̾͛̐̏͘͘̕͝ǒ̴̙͉͈̗̖͍̘̥̤̒̈́̒͠r̶̨̡̢̦͔̙̮̦͖͔̩͈̗̖̂̀l̶̡̢͚̬̤͕̜̀͛̌̈́̈́͑͋̈̍̇͊͝͠ď̵̛̛̯͕̭̩͖̝̙͎̊̏̈́̎͊̐̏͊̕͜͝͠͝", 11] }
+                ]
+            }
+        })
+    );
+
+    expect_text(&filter, "unicode", "👯‍♂️🎉😍h̷̙̤̏͊̑̍̆̃̉͝ĕ̶̠̌̓̃̓̽̃̚l̸̥̊̓̓͝͠l̸̨̠̣̟̥͠ỏ̴̳̖̪̟̱̰̥̞̙̏̓́͗̽̀̈́͛͐̚̕͝͝ ̶̡͍͙͚̞͙̣̘͙̯͇̙̠̀w̷̨̨̪͚̤͙͖̝͕̜̭̯̝̋̋̿̿̀̾͛̐̏͘͘̕͝ǒ̴̙͉͈̗̖͍̘̥̤̒̈́̒͠r̶̨̡̢̦͔̙̮̦͖͔̩͈̗̖̂̀l̶̡̢͚̬̤͕̜̀͛̌̈́̈́͑͋̈̍̇͊͝͠ď̵̛̛̯͕̭̩͖̝̙͎̊̏̈́̎͊̐̏͊̕͜͝͠͝𒀇𐅣𐅤𐅥👨‍👨‍👦‍👦").await;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_unicode_cursors() -> Result<()> {
+    pretty_env_logger::try_init().ok();
+    let filter = server();
+
+    let mut client = connect(&filter, "unicode").await?;
+    assert_eq!(client.recv().await?, json!({ "Identity": 0 }));
+
+    let mut operation = OperationSeq::default();
+    operation.insert("🎉🎉🎉");
+    let msg = json!({
+        "Edit": {
+            "revision": 0,
+            "operation": operation
+        }
+    });
+    info!("sending ClientMsg {}", msg);
+    client.send(&msg).await;
+    client.recv().await?;
+
+    let cursors = json!({
+        "cursors": [0, 1, 2, 3],
+        "selections": [[0, 1], [2, 3]]
+    });
+    client.send(&json!({ "CursorData": cursors })).await;
+
+    let cursors_resp = json!({
+        "UserCursor": {
+            "id": 0,
+            "data": cursors
+        }
+    });
+    assert_eq!(client.recv().await?, cursors_resp);
+
+    let mut client2 = connect(&filter, "unicode").await?;
+    assert_eq!(client2.recv().await?, json!({ "Identity": 1 }));
+    client2.recv().await?;
+    assert_eq!(client2.recv().await?, cursors_resp);
+
+    let msg = json!({
+        "Edit": {
+            "revision": 0,
+            "operation": ["🎉"]
+        }
+    });
+    client2.send(&msg).await;
+
+    let mut client3 = connect(&filter, "unicode").await?;
+    assert_eq!(client3.recv().await?, json!({ "Identity": 2 }));
+    client3.recv().await?;
+
+    let transformed_cursors_resp = json!({
+        "UserCursor": {
+            "id": 0,
+            "data": {
+                "cursors": [1, 2, 3, 4],
+                "selections": [[1, 2], [3, 4]]
+            }
+        }
+    });
+    assert_eq!(client3.recv().await?, transformed_cursors_resp);
+
+    Ok(())
+}

+ 1 - 0
rustpad-wasm/Cargo.toml

@@ -11,6 +11,7 @@ crate-type = ["cdylib", "rlib"]
 default = ["console_error_panic_hook"]
 
 [dependencies]
+bytecount = "0.6"
 console_error_panic_hook = { version = "0.1", optional = true }
 operational-transform = { version = "0.6.0", features = ["serde"] }
 serde = { version = "1.0.126", features = ["derive"] }

+ 1 - 1
rustpad-wasm/src/lib.rs

@@ -141,7 +141,7 @@ impl OpSeq {
             use operational_transform::Operation::*;
             match op {
                 &Retain(n) => index -= n as i32,
-                Insert(s) => new_index += s.len() as i32,
+                Insert(s) => new_index += bytecount::num_chars(s.as_bytes()) as i32,
                 &Delete(n) => {
                     new_index -= std::cmp::min(index, n as i32);
                     index -= n as i32;

+ 52 - 14
src/rustpad.ts

@@ -2,6 +2,7 @@ import { OpSeq } from "rustpad-wasm";
 import type {
   editor,
   IDisposable,
+  IPosition,
 } from "monaco-editor/esm/vs/editor/editor.api";
 
 /** Options passed in to the Rustpad constructor. */
@@ -260,8 +261,8 @@ class Rustpad {
     for (const op of ops) {
       if (typeof op === "string") {
         // Insert
-        const pos = this.model.getPositionAt(index);
-        index += op.length;
+        const pos = unicodePosition(this.model, index);
+        index += unicodeLength(op);
         this.model.pushEditOperations(
           this.options.editor.getSelections(),
           [
@@ -284,8 +285,8 @@ class Rustpad {
       } else {
         // Delete
         const chars = -op;
-        var from = this.model.getPositionAt(index);
-        var to = this.model.getPositionAt(index + chars);
+        var from = unicodePosition(this.model, index);
+        var to = unicodePosition(this.model, index + chars);
         this.model.pushEditOperations(
           this.options.editor.getSelections(),
           [
@@ -331,7 +332,7 @@ class Rustpad {
         generateCssStyles(hue);
 
         for (const cursor of data.cursors) {
-          const position = this.model.getPositionAt(cursor);
+          const position = unicodePosition(this.model, cursor);
           decorations.push({
             options: {
               className: `remote-cursor-${hue}`,
@@ -347,8 +348,8 @@ class Rustpad {
           });
         }
         for (const selection of data.selections) {
-          const position = this.model.getPositionAt(selection[0]);
-          const positionEnd = this.model.getPositionAt(selection[1]);
+          const position = unicodePosition(this.model, selection[0]);
+          const positionEnd = unicodePosition(this.model, selection[1]);
           decorations.push({
             options: {
               className: `remote-selection-${hue}`,
@@ -378,17 +379,26 @@ class Rustpad {
   private onChange(event: editor.IModelContentChangedEvent) {
     if (!this.ignoreChanges) {
       const content = this.lastValue;
+      const contentLength = unicodeLength(content);
       let offset = 0;
 
       let operation = OpSeq.new();
-      operation.retain(content.length);
+      operation.retain(contentLength);
       event.changes.sort((a, b) => b.rangeOffset - a.rangeOffset);
       for (const change of event.changes) {
+        // The following dance is necessary to convert from UTF-16 indices (evil
+        // encoding-dependent JavaScript representation) to portable Unicode
+        // codepoint indices.
         const { text, rangeOffset, rangeLength } = change;
-        const restLength = content.length + offset - rangeOffset - rangeLength;
+        const initialLength = unicodeLength(content.slice(0, rangeOffset));
+        const deletedLength = unicodeLength(
+          content.slice(rangeOffset, rangeOffset + rangeLength)
+        );
+        const restLength =
+          contentLength + offset - initialLength - deletedLength;
         const changeOp = OpSeq.new();
-        changeOp.retain(rangeOffset);
-        changeOp.delete(rangeLength);
+        changeOp.retain(initialLength);
+        changeOp.delete(deletedLength);
         changeOp.insert(text);
         changeOp.retain(restLength);
         operation = operation.compose(changeOp)!;
@@ -401,15 +411,15 @@ class Rustpad {
 
   private onCursor(event: editor.ICursorPositionChangedEvent) {
     const cursors = [event.position, ...event.secondaryPositions];
-    this.cursorData.cursors = cursors.map((p) => this.model.getOffsetAt(p));
+    this.cursorData.cursors = cursors.map((p) => unicodeOffset(this.model, p));
     this.sendCursorData();
   }
 
   private onSelection(event: editor.ICursorSelectionChangedEvent) {
     const selections = [event.selection, ...event.secondarySelections];
     this.cursorData.selections = selections.map((s) => [
-      this.model.getOffsetAt(s.getStartPosition()),
-      this.model.getOffsetAt(s.getEndPosition()),
+      unicodeOffset(this.model, s.getStartPosition()),
+      unicodeOffset(this.model, s.getEndPosition()),
     ]);
     this.sendCursorData();
   }
@@ -442,6 +452,34 @@ type ServerMsg = {
   };
 };
 
+/** Returns the number of Unicode codepoints in a string. */
+function unicodeLength(str: string): number {
+  let length = 0;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  for (const c of str) ++length;
+  return length;
+}
+
+/** Returns the number of Unicode codepoints before a position in the model. */
+function unicodeOffset(model: editor.ITextModel, pos: IPosition): number {
+  const value = model.getValue();
+  const offsetUTF16 = model.getOffsetAt(pos);
+  return unicodeLength(value.slice(0, offsetUTF16));
+}
+
+/** Returns the position after a certain number of Unicode codepoints. */
+function unicodePosition(model: editor.ITextModel, offset: number): IPosition {
+  const value = model.getValue();
+  let offsetUTF16 = 0;
+  for (const c of value) {
+    // Iterate over Unicode codepoints
+    if (offset <= 0) break;
+    offsetUTF16 += c.length;
+    offset -= 1;
+  }
+  return model.getPositionAt(offsetUTF16);
+}
+
 /** Cache for private use by `generateCssStyles()`. */
 const generatedStyles = new Set<number>();