
LibJS parses JavaScript as UTF-8, so when creating a string, we must transcode it to UTF-16 to handle encoded surrogate pairs. For example, consider the following string: "\ud83d\ude00" The UTF-8 encoding of this surrogate pair is: 0xf0 0x9f 0x98 0x80 However, LibJS will currently store the two surrogates individually as UTF-8 encoded bytes, rather than combining the pair: 0xed 0xa0 0xb8, 0xed 0xb8 0x80 These are not equivalent. So, as String.prototype becomes UTF-16 aware, this encoding will no longer work for abstractions like strict equality.
38 lines
832 B
C++
38 lines
832 B
C++
/*
|
|
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <AK/String.h>
|
|
#include <AK/Vector.h>
|
|
#include <LibJS/Heap/Cell.h>
|
|
|
|
namespace JS {
|
|
|
|
class PrimitiveString final : public Cell {
|
|
public:
|
|
explicit PrimitiveString(String);
|
|
virtual ~PrimitiveString();
|
|
|
|
String const& string() const { return m_string; }
|
|
|
|
Vector<u16> const& utf16_string() const;
|
|
Utf16View utf16_string_view() const;
|
|
|
|
private:
|
|
virtual const char* class_name() const override { return "PrimitiveString"; }
|
|
|
|
String m_string;
|
|
mutable Vector<u16> m_utf16_string;
|
|
};
|
|
|
|
PrimitiveString* js_string(Heap&, Utf16View const&);
|
|
PrimitiveString* js_string(VM&, Utf16View const&);
|
|
|
|
PrimitiveString* js_string(Heap&, String);
|
|
PrimitiveString* js_string(VM&, String);
|
|
|
|
}
|