Decoder.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. /*
  2. * Copyright (c) 2020-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
  4. * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #pragma once
  9. #include <AK/Forward.h>
  10. #include <AK/Function.h>
  11. #include <AK/Optional.h>
  12. #include <AK/String.h>
  13. namespace TextCodec {
  14. class Decoder {
  15. public:
  16. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) = 0;
  17. virtual bool validate(StringView);
  18. virtual ErrorOr<String> to_utf8(StringView);
  19. protected:
  20. virtual ~Decoder() = default;
  21. };
  22. class UTF8Decoder final : public Decoder {
  23. public:
  24. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  25. virtual bool validate(StringView) override;
  26. virtual ErrorOr<String> to_utf8(StringView) override;
  27. };
  28. class UTF16BEDecoder final : public Decoder {
  29. public:
  30. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  31. virtual bool validate(StringView) override;
  32. virtual ErrorOr<String> to_utf8(StringView) override;
  33. };
  34. class UTF16LEDecoder final : public Decoder {
  35. public:
  36. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  37. virtual bool validate(StringView) override;
  38. virtual ErrorOr<String> to_utf8(StringView) override;
  39. };
  40. class Latin1Decoder final : public Decoder {
  41. public:
  42. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  43. };
  44. class Latin2Decoder final : public Decoder {
  45. public:
  46. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  47. };
  48. class HebrewDecoder final : public Decoder {
  49. public:
  50. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  51. };
  52. class CyrillicDecoder final : public Decoder {
  53. public:
  54. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  55. };
  56. class Koi8RDecoder final : public Decoder {
  57. public:
  58. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  59. };
  60. class Latin9Decoder final : public Decoder {
  61. public:
  62. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  63. };
  64. class MacRomanDecoder final : public Decoder {
  65. public:
  66. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  67. };
  68. class PDFDocEncodingDecoder final : public Decoder {
  69. public:
  70. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  71. };
  72. class TurkishDecoder final : public Decoder {
  73. public:
  74. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  75. };
  76. class XUserDefinedDecoder final : public Decoder {
  77. public:
  78. virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
  79. };
  80. Optional<Decoder&> decoder_for(StringView encoding);
  81. Optional<StringView> get_standardized_encoding(StringView encoding);
  82. // This returns the appropriate Unicode decoder for the sniffed BOM or nothing if there is no appropriate decoder.
  83. Optional<Decoder&> bom_sniff_to_decoder(StringView);
  84. // NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this.
  85. // This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder.
  86. ErrorOr<String> convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder&, StringView);
  87. StringView get_output_encoding(StringView encoding);
  88. }