Decoder.h 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. /*
  2. * Copyright (c) 2020-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
  4. * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #pragma once
  9. #include <AK/Forward.h>
  10. #include <AK/Function.h>
  11. #include <AK/Optional.h>
  12. #include <AK/String.h>
  13. namespace TextCodec {
  14. class Decoder {
  15. public:
  16. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) = 0;
  17. virtual ErrorOr<String> to_utf8(StringView);
  18. protected:
  19. virtual ~Decoder() = default;
  20. };
  21. class UTF8Decoder final : public Decoder {
  22. public:
  23. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  24. virtual ErrorOr<String> to_utf8(StringView) override;
  25. };
  26. class UTF16BEDecoder final : public Decoder {
  27. public:
  28. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  29. virtual ErrorOr<String> to_utf8(StringView) override;
  30. };
  31. class UTF16LEDecoder final : public Decoder {
  32. public:
  33. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  34. virtual ErrorOr<String> to_utf8(StringView) override;
  35. };
  36. class Latin1Decoder final : public Decoder {
  37. public:
  38. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  39. };
  40. class Latin2Decoder final : public Decoder {
  41. public:
  42. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  43. };
  44. class HebrewDecoder final : public Decoder {
  45. public:
  46. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  47. };
  48. class CyrillicDecoder final : public Decoder {
  49. public:
  50. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  51. };
  52. class Koi8RDecoder final : public Decoder {
  53. public:
  54. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  55. };
  56. class Latin9Decoder final : public Decoder {
  57. public:
  58. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  59. };
  60. class MacRomanDecoder final : public Decoder {
  61. public:
  62. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  63. };
  64. class TurkishDecoder final : public Decoder {
  65. public:
  66. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  67. };
  68. class XUserDefinedDecoder final : public Decoder {
  69. public:
  70. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  71. };
  72. Optional<Decoder&> decoder_for(StringView encoding);
  73. Optional<StringView> get_standardized_encoding(StringView encoding);
  74. // This returns the appropriate Unicode decoder for the sniffed BOM or nothing if there is no appropriate decoder.
  75. Optional<Decoder&> bom_sniff_to_decoder(StringView);
  76. // NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this.
  77. // This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder.
  78. ErrorOr<String> convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder&, StringView);
  79. }