|
@@ -5,6 +5,7 @@
|
|
|
*/
|
|
|
|
|
|
#include <AK/Utf32View.h>
|
|
|
+#include <AK/Utf8View.h>
|
|
|
#include <LibUnicode/Punycode.h>
|
|
|
|
|
|
namespace Unicode::Punycode {
|
|
@@ -30,6 +31,14 @@ static Optional<u32> digit_value_of_code_point(u32 code_point)
|
|
|
return {};
|
|
|
}
|
|
|
|
|
|
+static u32 code_point_value_of_digit(u32 digit)
|
|
|
+{
|
|
|
+ VERIFY(digit < 36);
|
|
|
+ if (digit <= 25)
|
|
|
+ return 'a' + digit;
|
|
|
+ return '0' + digit - 26;
|
|
|
+}
|
|
|
+
|
|
|
// https://www.rfc-editor.org/rfc/rfc3492.html#section-6.1
|
|
|
static u32 adapt(u32 delta, u32 num_points, bool first_time)
|
|
|
{
|
|
@@ -160,4 +169,118 @@ ErrorOr<String> decode(StringView input)
|
|
|
return builder.to_string();
|
|
|
}
|
|
|
|
|
|
+static Optional<u32> find_smallest_code_point_greater_than_or_equal(Utf32View code_points, u32 threshold)
|
|
|
+{
|
|
|
+ Optional<u32> result;
|
|
|
+ for (auto code_point : code_points) {
|
|
|
+ if (code_point >= threshold && (!result.has_value() || code_point < result.value()))
|
|
|
+ result = code_point;
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+}
|
|
|
+
|
|
|
+ErrorOr<String> encode(StringView input)
|
|
|
+{
|
|
|
+ Vector<u32> code_points;
|
|
|
+ for (auto code_point : Utf8View(input))
|
|
|
+ TRY(code_points.try_append(code_point));
|
|
|
+ return encode(Utf32View(code_points.data(), code_points.size()));
|
|
|
+}
|
|
|
+
|
|
|
+// https://www.rfc-editor.org/rfc/rfc3492.html#section-6.3
|
|
|
+ErrorOr<String> encode(Utf32View input)
|
|
|
+{
|
|
|
+ Vector<u32> output;
|
|
|
+
|
|
|
+ // let n = initial_n
|
|
|
+ Checked<size_t> n = INITIAL_N;
|
|
|
+
|
|
|
+ // let delta = 0
|
|
|
+ Checked<size_t> delta = 0;
|
|
|
+
|
|
|
+ // let bias = initial_bias
|
|
|
+ u32 bias = INITIAL_BIAS;
|
|
|
+
|
|
|
+ // let h = b = the number of basic code points in the input
|
|
|
+ // copy them to the output in order, followed by a delimiter if b > 0
|
|
|
+ size_t b = 0;
|
|
|
+ for (auto code_point : input) {
|
|
|
+ if (is_ascii(code_point)) {
|
|
|
+ TRY(output.try_append(code_point));
|
|
|
+ b++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ auto h = b;
|
|
|
+ if (b > 0)
|
|
|
+ TRY(output.try_append(DELIMITER));
|
|
|
+
|
|
|
+ // while h < length(input) do begin
|
|
|
+ while (h < input.length()) {
|
|
|
+ // let m = the minimum {non-basic} code point >= n in the input
|
|
|
+ auto m = find_smallest_code_point_greater_than_or_equal(input, n.value());
|
|
|
+ VERIFY(m.has_value());
|
|
|
+
|
|
|
+ // let delta = delta + (m - n) * (h + 1), fail on overflow
|
|
|
+ delta = delta + (Checked(static_cast<size_t>(m.value())) - n) * Checked(h + 1);
|
|
|
+ if (delta.has_overflow())
|
|
|
+ return Error::from_string_literal("Numeric overflow");
|
|
|
+
|
|
|
+ // let n = m
|
|
|
+ n = m.value();
|
|
|
+
|
|
|
+ // for each code point c in the input (in order) do begin
|
|
|
+ for (auto c : input) {
|
|
|
+ // if c < n {or c is basic} then increment delta, fail on overflow
|
|
|
+ if (c < n.value()) {
|
|
|
+ delta++;
|
|
|
+ if (delta.has_overflow())
|
|
|
+ return Error::from_string_literal("Numeric overflow");
|
|
|
+ }
|
|
|
+
|
|
|
+ // if c == n then begin
|
|
|
+ if (c == n.value()) {
|
|
|
+ // let q = delta
|
|
|
+ auto q = delta.value();
|
|
|
+
|
|
|
+ // for k = base to infinity in steps of base do begin
|
|
|
+ for (size_t k = BASE;; k += BASE) {
|
|
|
+ // let t = tmin if k <= bias {+ tmin}, or
|
|
|
+ // tmax if k >= bias + tmax, or k - bias otherwise
|
|
|
+ u32 t = k <= bias ? TMIN : (k >= bias + TMAX ? TMAX : k - bias);
|
|
|
+
|
|
|
+ // if q < t then break
|
|
|
+ if (q < t)
|
|
|
+ break;
|
|
|
+
|
|
|
+ // output the code point for digit t + ((q - t) mod (base - t))
|
|
|
+ auto digit = t + ((q - t) % (BASE - t));
|
|
|
+ TRY(output.try_append(code_point_value_of_digit(digit)));
|
|
|
+
|
|
|
+ // let q = (q - t) div (base - t)
|
|
|
+ q = (q - t) / (BASE - t);
|
|
|
+ }
|
|
|
+ // output the code point for digit q
|
|
|
+ TRY(output.try_append(code_point_value_of_digit(q)));
|
|
|
+
|
|
|
+ // let bias = adapt(delta, h + 1, test h equals b?)
|
|
|
+ bias = adapt(delta.value(), h + 1, h == b);
|
|
|
+
|
|
|
+ // let delta = 0
|
|
|
+ delta = 0;
|
|
|
+
|
|
|
+ // increment h
|
|
|
+ h++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // increment delta and n
|
|
|
+ delta++;
|
|
|
+ n++;
|
|
|
+ }
|
|
|
+
|
|
|
+ StringBuilder builder;
|
|
|
+ TRY(builder.try_append(Utf32View(output.data(), output.size())));
|
|
|
+ return builder.to_string();
|
|
|
+}
|
|
|
+
|
|
|
}
|