Segmentation.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /*
  2. * Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
  3. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/Forward.h>
  9. #include <AK/Function.h>
  10. #include <AK/IterationDecision.h>
  11. #include <AK/Optional.h>
  12. #include <AK/Types.h>
  13. #include <AK/Vector.h>
  14. namespace Unicode {
  15. using SegmentationCallback = Function<IterationDecision(size_t)>;
  16. void for_each_grapheme_segmentation_boundary(Utf8View const&, SegmentationCallback);
  17. void for_each_grapheme_segmentation_boundary(Utf16View const&, SegmentationCallback);
  18. void for_each_grapheme_segmentation_boundary(Utf32View const&, SegmentationCallback);
  19. template<typename ViewType>
  20. Vector<size_t> find_grapheme_segmentation_boundaries(ViewType const& view)
  21. {
  22. Vector<size_t> boundaries;
  23. for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
  24. boundaries.append(boundary);
  25. return IterationDecision::Continue;
  26. });
  27. return boundaries;
  28. }
  29. template<typename ViewType>
  30. Optional<size_t> next_grapheme_segmentation_boundary(ViewType const& view, size_t index)
  31. {
  32. Optional<size_t> result;
  33. for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
  34. if (boundary > index) {
  35. result = boundary;
  36. return IterationDecision::Break;
  37. }
  38. return IterationDecision::Continue;
  39. });
  40. return result;
  41. }
  42. template<typename ViewType>
  43. Optional<size_t> previous_grapheme_segmentation_boundary(ViewType const& view, size_t index)
  44. {
  45. Optional<size_t> result;
  46. for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
  47. if (boundary < index) {
  48. result = boundary;
  49. return IterationDecision::Continue;
  50. }
  51. return IterationDecision::Break;
  52. });
  53. return result;
  54. }
  55. void for_each_word_segmentation_boundary(Utf8View const&, SegmentationCallback);
  56. void for_each_word_segmentation_boundary(Utf16View const&, SegmentationCallback);
  57. void for_each_word_segmentation_boundary(Utf32View const&, SegmentationCallback);
  58. template<typename ViewType>
  59. Vector<size_t> find_word_segmentation_boundaries(ViewType const& view)
  60. {
  61. Vector<size_t> boundaries;
  62. for_each_word_segmentation_boundary(view, [&](auto boundary) {
  63. boundaries.append(boundary);
  64. return IterationDecision::Continue;
  65. });
  66. return boundaries;
  67. }
  68. template<typename ViewType>
  69. Optional<size_t> next_word_segmentation_boundary(ViewType const& view, size_t index)
  70. {
  71. Optional<size_t> result;
  72. for_each_word_segmentation_boundary(view, [&](auto boundary) {
  73. if (boundary > index) {
  74. result = boundary;
  75. return IterationDecision::Break;
  76. }
  77. return IterationDecision::Continue;
  78. });
  79. return result;
  80. }
  81. template<typename ViewType>
  82. Optional<size_t> previous_word_segmentation_boundary(ViewType const& view, size_t index)
  83. {
  84. Optional<size_t> result;
  85. for_each_word_segmentation_boundary(view, [&](auto boundary) {
  86. if (boundary < index) {
  87. result = boundary;
  88. return IterationDecision::Continue;
  89. }
  90. return IterationDecision::Break;
  91. });
  92. return result;
  93. }
  94. void for_each_sentence_segmentation_boundary(Utf8View const&, SegmentationCallback);
  95. void for_each_sentence_segmentation_boundary(Utf16View const&, SegmentationCallback);
  96. void for_each_sentence_segmentation_boundary(Utf32View const&, SegmentationCallback);
  97. template<typename ViewType>
  98. Vector<size_t> find_sentence_segmentation_boundaries(ViewType const& view)
  99. {
  100. Vector<size_t> boundaries;
  101. for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
  102. boundaries.append(boundary);
  103. return IterationDecision::Continue;
  104. });
  105. return boundaries;
  106. }
  107. template<typename ViewType>
  108. Optional<size_t> next_sentence_segmentation_boundary(ViewType const& view, size_t index)
  109. {
  110. Optional<size_t> result;
  111. for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
  112. if (boundary > index) {
  113. result = boundary;
  114. return IterationDecision::Break;
  115. }
  116. return IterationDecision::Continue;
  117. });
  118. return result;
  119. }
  120. template<typename ViewType>
  121. Optional<size_t> previous_sentence_segmentation_boundary(ViewType const& view, size_t index)
  122. {
  123. Optional<size_t> result;
  124. for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
  125. if (boundary < index) {
  126. result = boundary;
  127. return IterationDecision::Continue;
  128. }
  129. return IterationDecision::Break;
  130. });
  131. return result;
  132. }
  133. }