Segmentation.h 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. /*
  2. * Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
  3. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/Forward.h>
  9. #include <AK/Function.h>
  10. #include <AK/IterationDecision.h>
  11. #include <AK/Optional.h>
  12. #include <AK/Types.h>
  13. namespace Unicode {
  14. using SegmentationCallback = Function<IterationDecision(size_t)>;
  15. void for_each_grapheme_segmentation_boundary(Utf8View const&, SegmentationCallback);
  16. void for_each_grapheme_segmentation_boundary(Utf16View const&, SegmentationCallback);
  17. void for_each_grapheme_segmentation_boundary(Utf32View const&, SegmentationCallback);
  18. template<typename ViewType>
  19. Optional<size_t> next_grapheme_segmentation_boundary(ViewType const& view, size_t index)
  20. {
  21. Optional<size_t> result;
  22. for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
  23. if (boundary > index) {
  24. result = boundary;
  25. return IterationDecision::Break;
  26. }
  27. return IterationDecision::Continue;
  28. });
  29. return result;
  30. }
  31. template<typename ViewType>
  32. Optional<size_t> previous_grapheme_segmentation_boundary(ViewType const& view, size_t index)
  33. {
  34. Optional<size_t> result;
  35. for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
  36. if (boundary < index) {
  37. result = boundary;
  38. return IterationDecision::Continue;
  39. }
  40. return IterationDecision::Break;
  41. });
  42. return result;
  43. }
  44. void for_each_word_segmentation_boundary(Utf8View const&, SegmentationCallback);
  45. void for_each_word_segmentation_boundary(Utf16View const&, SegmentationCallback);
  46. void for_each_word_segmentation_boundary(Utf32View const&, SegmentationCallback);
  47. template<typename ViewType>
  48. Optional<size_t> next_word_segmentation_boundary(ViewType const& view, size_t index)
  49. {
  50. Optional<size_t> result;
  51. for_each_word_segmentation_boundary(view, [&](auto boundary) {
  52. if (boundary > index) {
  53. result = boundary;
  54. return IterationDecision::Break;
  55. }
  56. return IterationDecision::Continue;
  57. });
  58. return result;
  59. }
  60. template<typename ViewType>
  61. Optional<size_t> previous_word_segmentation_boundary(ViewType const& view, size_t index)
  62. {
  63. Optional<size_t> result;
  64. for_each_word_segmentation_boundary(view, [&](auto boundary) {
  65. if (boundary < index) {
  66. result = boundary;
  67. return IterationDecision::Continue;
  68. }
  69. return IterationDecision::Break;
  70. });
  71. return result;
  72. }
  73. void for_each_sentence_segmentation_boundary(Utf8View const&, SegmentationCallback);
  74. void for_each_sentence_segmentation_boundary(Utf16View const&, SegmentationCallback);
  75. void for_each_sentence_segmentation_boundary(Utf32View const&, SegmentationCallback);
  76. template<typename ViewType>
  77. Optional<size_t> next_sentence_segmentation_boundary(ViewType const& view, size_t index)
  78. {
  79. Optional<size_t> result;
  80. for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
  81. if (boundary > index) {
  82. result = boundary;
  83. return IterationDecision::Break;
  84. }
  85. return IterationDecision::Continue;
  86. });
  87. return result;
  88. }
  89. template<typename ViewType>
  90. Optional<size_t> previous_sentence_segmentation_boundary(ViewType const& view, size_t index)
  91. {
  92. Optional<size_t> result;
  93. for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
  94. if (boundary < index) {
  95. result = boundary;
  96. return IterationDecision::Continue;
  97. }
  98. return IterationDecision::Break;
  99. });
  100. return result;
  101. }
  102. }