Decoder.h 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. /*
  2. * Copyright (c) 2020-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
  4. * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #pragma once
  9. #include <AK/Forward.h>
  10. #include <AK/Function.h>
  11. #include <AK/Optional.h>
  12. namespace TextCodec {
  13. class Decoder {
  14. public:
  15. virtual void process(StringView, Function<void(u32)> on_code_point) = 0;
  16. virtual DeprecatedString to_utf8(StringView);
  17. protected:
  18. virtual ~Decoder() = default;
  19. };
  20. class UTF8Decoder final : public Decoder {
  21. public:
  22. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  23. virtual DeprecatedString to_utf8(StringView) override;
  24. };
  25. class UTF16BEDecoder final : public Decoder {
  26. public:
  27. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  28. virtual DeprecatedString to_utf8(StringView) override;
  29. };
  30. class UTF16LEDecoder final : public Decoder {
  31. public:
  32. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  33. virtual DeprecatedString to_utf8(StringView) override;
  34. };
  35. class Latin1Decoder final : public Decoder {
  36. public:
  37. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  38. };
  39. class Latin2Decoder final : public Decoder {
  40. public:
  41. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  42. };
  43. class HebrewDecoder final : public Decoder {
  44. public:
  45. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  46. };
  47. class CyrillicDecoder final : public Decoder {
  48. public:
  49. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  50. };
  51. class Koi8RDecoder final : public Decoder {
  52. public:
  53. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  54. };
  55. class Latin9Decoder final : public Decoder {
  56. public:
  57. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  58. };
  59. class MacRomanDecoder final : public Decoder {
  60. public:
  61. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  62. };
  63. class TurkishDecoder final : public Decoder {
  64. public:
  65. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  66. };
  67. class XUserDefinedDecoder final : public Decoder {
  68. public:
  69. virtual void process(StringView, Function<void(u32)> on_code_point) override;
  70. };
  71. Optional<Decoder&> decoder_for(StringView encoding);
  72. Optional<StringView> get_standardized_encoding(StringView encoding);
  73. // This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder.
  74. Decoder* bom_sniff_to_decoder(StringView);
  75. // NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this.
  76. // This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder.
  77. DeprecatedString convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder&, StringView);
  78. }