Segmenter.h 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. /*
  2. * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/Function.h>
  8. #include <AK/NonnullOwnPtr.h>
  9. #include <AK/Optional.h>
  10. #include <AK/StringView.h>
  11. namespace Unicode {
  12. enum class SegmenterGranularity {
  13. Grapheme,
  14. Sentence,
  15. Word,
  16. };
  17. SegmenterGranularity segmenter_granularity_from_string(StringView);
  18. StringView segmenter_granularity_to_string(SegmenterGranularity);
  19. class Segmenter {
  20. public:
  21. static NonnullOwnPtr<Segmenter> create(SegmenterGranularity segmenter_granularity);
  22. static NonnullOwnPtr<Segmenter> create(StringView locale, SegmenterGranularity segmenter_granularity);
  23. virtual ~Segmenter() = default;
  24. SegmenterGranularity segmenter_granularity() const { return m_segmenter_granularity; }
  25. virtual NonnullOwnPtr<Segmenter> clone() const = 0;
  26. virtual void set_segmented_text(String) = 0;
  27. virtual void set_segmented_text(Utf16View const&) = 0;
  28. virtual size_t current_boundary() = 0;
  29. enum class Inclusive {
  30. No,
  31. Yes,
  32. };
  33. virtual Optional<size_t> previous_boundary(size_t index, Inclusive = Inclusive::No) = 0;
  34. virtual Optional<size_t> next_boundary(size_t index, Inclusive = Inclusive::No) = 0;
  35. using SegmentationCallback = Function<IterationDecision(size_t)>;
  36. virtual void for_each_boundary(String, SegmentationCallback) = 0;
  37. virtual void for_each_boundary(Utf16View const&, SegmentationCallback) = 0;
  38. virtual void for_each_boundary(Utf32View const&, SegmentationCallback) = 0;
  39. virtual bool is_current_boundary_word_like() const = 0;
  40. protected:
  41. explicit Segmenter(SegmenterGranularity segmenter_granularity)
  42. : m_segmenter_granularity(segmenter_granularity)
  43. {
  44. }
  45. SegmenterGranularity m_segmenter_granularity { SegmenterGranularity::Grapheme };
  46. };
  47. }