TopicsService.java 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. package com.provectus.kafka.ui.service;
  2. import static java.util.stream.Collectors.toList;
  3. import static java.util.stream.Collectors.toMap;
  4. import com.google.common.collect.Sets;
  5. import com.provectus.kafka.ui.config.ClustersProperties;
  6. import com.provectus.kafka.ui.exception.TopicMetadataException;
  7. import com.provectus.kafka.ui.exception.TopicNotFoundException;
  8. import com.provectus.kafka.ui.exception.TopicRecreationException;
  9. import com.provectus.kafka.ui.exception.ValidationException;
  10. import com.provectus.kafka.ui.model.ClusterFeature;
  11. import com.provectus.kafka.ui.model.InternalLogDirStats;
  12. import com.provectus.kafka.ui.model.InternalPartition;
  13. import com.provectus.kafka.ui.model.InternalPartitionsOffsets;
  14. import com.provectus.kafka.ui.model.InternalReplica;
  15. import com.provectus.kafka.ui.model.InternalTopic;
  16. import com.provectus.kafka.ui.model.InternalTopicConfig;
  17. import com.provectus.kafka.ui.model.KafkaCluster;
  18. import com.provectus.kafka.ui.model.Metrics;
  19. import com.provectus.kafka.ui.model.PartitionsIncreaseDTO;
  20. import com.provectus.kafka.ui.model.PartitionsIncreaseResponseDTO;
  21. import com.provectus.kafka.ui.model.ReplicationFactorChangeDTO;
  22. import com.provectus.kafka.ui.model.ReplicationFactorChangeResponseDTO;
  23. import com.provectus.kafka.ui.model.Statistics;
  24. import com.provectus.kafka.ui.model.TopicCreationDTO;
  25. import com.provectus.kafka.ui.model.TopicUpdateDTO;
  26. import java.time.Duration;
  27. import java.util.Collection;
  28. import java.util.Collections;
  29. import java.util.Comparator;
  30. import java.util.List;
  31. import java.util.Map;
  32. import java.util.Optional;
  33. import java.util.function.Function;
  34. import java.util.stream.Collectors;
  35. import lombok.RequiredArgsConstructor;
  36. import org.apache.kafka.clients.admin.ConfigEntry;
  37. import org.apache.kafka.clients.admin.NewPartitionReassignment;
  38. import org.apache.kafka.clients.admin.NewPartitions;
  39. import org.apache.kafka.clients.admin.OffsetSpec;
  40. import org.apache.kafka.clients.admin.TopicDescription;
  41. import org.apache.kafka.common.Node;
  42. import org.apache.kafka.common.TopicPartition;
  43. import org.apache.kafka.common.errors.TopicExistsException;
  44. import org.springframework.beans.factory.annotation.Value;
  45. import org.springframework.stereotype.Service;
  46. import reactor.core.publisher.Mono;
  47. import reactor.util.retry.Retry;
  48. @Service
  49. @RequiredArgsConstructor
  50. public class TopicsService {
  51. private final AdminClientService adminClientService;
  52. private final StatisticsCache statisticsCache;
  53. private final ClustersProperties clustersProperties;
  54. @Value("${topic.recreate.maxRetries:15}")
  55. private int recreateMaxRetries;
  56. @Value("${topic.recreate.delay.seconds:1}")
  57. private int recreateDelayInSeconds;
  58. @Value("${topic.load.after.create.maxRetries:10}")
  59. private int loadTopicAfterCreateRetries;
  60. @Value("${topic.load.after.create.delay.ms:500}")
  61. private int loadTopicAfterCreateDelayInMs;
  62. public Mono<List<InternalTopic>> loadTopics(KafkaCluster c, List<String> topics) {
  63. if (topics.isEmpty()) {
  64. return Mono.just(List.of());
  65. }
  66. return adminClientService.get(c)
  67. .flatMap(ac ->
  68. ac.describeTopics(topics).zipWith(ac.getTopicsConfig(topics, false),
  69. (descriptions, configs) -> {
  70. statisticsCache.update(c, descriptions, configs);
  71. return getPartitionOffsets(descriptions, ac).map(offsets -> {
  72. var metrics = statisticsCache.get(c);
  73. return createList(
  74. topics,
  75. descriptions,
  76. configs,
  77. offsets,
  78. metrics.getMetrics(),
  79. metrics.getLogDirInfo()
  80. );
  81. });
  82. })).flatMap(Function.identity());
  83. }
  84. private Mono<InternalTopic> loadTopic(KafkaCluster c, String topicName) {
  85. return loadTopics(c, List.of(topicName))
  86. .flatMap(lst -> lst.stream().findFirst()
  87. .map(Mono::just)
  88. .orElse(Mono.error(TopicNotFoundException::new)));
  89. }
  90. /**
  91. * After creation topic can be invisible via API for some time.
  92. * To workaround this, we retyring topic loading until it becomes visible.
  93. */
  94. private Mono<InternalTopic> loadTopicAfterCreation(KafkaCluster c, String topicName) {
  95. return loadTopic(c, topicName)
  96. .retryWhen(
  97. Retry
  98. .fixedDelay(
  99. loadTopicAfterCreateRetries,
  100. Duration.ofMillis(loadTopicAfterCreateDelayInMs)
  101. )
  102. .filter(TopicNotFoundException.class::isInstance)
  103. .onRetryExhaustedThrow((spec, sig) ->
  104. new TopicMetadataException(
  105. String.format(
  106. "Error while loading created topic '%s' - topic is not visible via API "
  107. + "after waiting for %d ms.",
  108. topicName,
  109. loadTopicAfterCreateDelayInMs * loadTopicAfterCreateRetries)))
  110. );
  111. }
  112. private List<InternalTopic> createList(List<String> orderedNames,
  113. Map<String, TopicDescription> descriptions,
  114. Map<String, List<ConfigEntry>> configs,
  115. InternalPartitionsOffsets partitionsOffsets,
  116. Metrics metrics,
  117. InternalLogDirStats logDirInfo) {
  118. return orderedNames.stream()
  119. .filter(descriptions::containsKey)
  120. .map(t -> InternalTopic.from(
  121. descriptions.get(t),
  122. configs.getOrDefault(t, List.of()),
  123. partitionsOffsets,
  124. metrics,
  125. logDirInfo,
  126. clustersProperties.getInternalTopicPrefix()
  127. ))
  128. .collect(toList());
  129. }
  130. private Mono<InternalPartitionsOffsets> getPartitionOffsets(Map<String, TopicDescription>
  131. descriptionsMap,
  132. ReactiveAdminClient ac) {
  133. var descriptions = descriptionsMap.values();
  134. return ac.listOffsets(descriptions, OffsetSpec.earliest())
  135. .zipWith(ac.listOffsets(descriptions, OffsetSpec.latest()),
  136. (earliest, latest) ->
  137. Sets.intersection(earliest.keySet(), latest.keySet())
  138. .stream()
  139. .map(tp ->
  140. Map.entry(tp,
  141. new InternalPartitionsOffsets.Offsets(
  142. earliest.get(tp), latest.get(tp))))
  143. .collect(toMap(Map.Entry::getKey, Map.Entry::getValue)))
  144. .map(InternalPartitionsOffsets::new);
  145. }
  146. public Mono<InternalTopic> getTopicDetails(KafkaCluster cluster, String topicName) {
  147. return loadTopic(cluster, topicName);
  148. }
  149. public Mono<List<ConfigEntry>> getTopicConfigs(KafkaCluster cluster, String topicName) {
  150. // there 2 case that we cover here:
  151. // 1. topic not found/visible - describeTopic() will be empty and we will throw TopicNotFoundException
  152. // 2. topic is visible, but we don't have DESCRIBE_CONFIG permission - we should return empty list
  153. return adminClientService.get(cluster)
  154. .flatMap(ac -> ac.describeTopic(topicName)
  155. .switchIfEmpty(Mono.error(new TopicNotFoundException()))
  156. .then(ac.getTopicsConfig(List.of(topicName), true))
  157. .map(m -> m.values().stream().findFirst().orElse(List.of())));
  158. }
  159. private Mono<InternalTopic> createTopic(KafkaCluster c, ReactiveAdminClient adminClient,
  160. Mono<TopicCreationDTO> topicCreation) {
  161. return topicCreation.flatMap(topicData ->
  162. adminClient.createTopic(
  163. topicData.getName(),
  164. topicData.getPartitions(),
  165. topicData.getReplicationFactor(),
  166. topicData.getConfigs()
  167. ).thenReturn(topicData)
  168. )
  169. .onErrorMap(t -> new TopicMetadataException(t.getMessage(), t))
  170. .flatMap(topicData -> loadTopicAfterCreation(c, topicData.getName()));
  171. }
  172. public Mono<InternalTopic> createTopic(KafkaCluster cluster, Mono<TopicCreationDTO> topicCreation) {
  173. return adminClientService.get(cluster)
  174. .flatMap(ac -> createTopic(cluster, ac, topicCreation));
  175. }
  176. public Mono<InternalTopic> recreateTopic(KafkaCluster cluster, String topicName) {
  177. return loadTopic(cluster, topicName)
  178. .flatMap(t -> deleteTopic(cluster, topicName)
  179. .thenReturn(t)
  180. .delayElement(Duration.ofSeconds(recreateDelayInSeconds))
  181. .flatMap(topic ->
  182. adminClientService.get(cluster)
  183. .flatMap(ac ->
  184. ac.createTopic(
  185. topic.getName(),
  186. topic.getPartitionCount(),
  187. topic.getReplicationFactor(),
  188. topic.getTopicConfigs()
  189. .stream()
  190. .collect(Collectors.toMap(InternalTopicConfig::getName,
  191. InternalTopicConfig::getValue))
  192. )
  193. .thenReturn(topicName)
  194. )
  195. .retryWhen(
  196. Retry.fixedDelay(recreateMaxRetries, Duration.ofSeconds(recreateDelayInSeconds))
  197. .filter(TopicExistsException.class::isInstance)
  198. .onRetryExhaustedThrow((a, b) ->
  199. new TopicRecreationException(topicName,
  200. recreateMaxRetries * recreateDelayInSeconds))
  201. )
  202. .flatMap(a -> loadTopicAfterCreation(cluster, topicName))
  203. )
  204. );
  205. }
  206. private Mono<InternalTopic> updateTopic(KafkaCluster cluster,
  207. String topicName,
  208. TopicUpdateDTO topicUpdate) {
  209. return adminClientService.get(cluster)
  210. .flatMap(ac ->
  211. ac.updateTopicConfig(topicName, topicUpdate.getConfigs())
  212. .then(loadTopic(cluster, topicName)));
  213. }
  214. public Mono<InternalTopic> updateTopic(KafkaCluster cl, String topicName,
  215. Mono<TopicUpdateDTO> topicUpdate) {
  216. return topicUpdate
  217. .flatMap(t -> updateTopic(cl, topicName, t));
  218. }
  219. private Mono<InternalTopic> changeReplicationFactor(
  220. KafkaCluster cluster,
  221. ReactiveAdminClient adminClient,
  222. String topicName,
  223. Map<TopicPartition, Optional<NewPartitionReassignment>> reassignments
  224. ) {
  225. return adminClient.alterPartitionReassignments(reassignments)
  226. .then(loadTopic(cluster, topicName));
  227. }
  228. /**
  229. * Change topic replication factor, works on brokers versions 5.4.x and higher
  230. */
  231. public Mono<ReplicationFactorChangeResponseDTO> changeReplicationFactor(
  232. KafkaCluster cluster,
  233. String topicName,
  234. ReplicationFactorChangeDTO replicationFactorChange) {
  235. return loadTopic(cluster, topicName).flatMap(topic -> adminClientService.get(cluster)
  236. .flatMap(ac -> {
  237. Integer actual = topic.getReplicationFactor();
  238. Integer requested = replicationFactorChange.getTotalReplicationFactor();
  239. Integer brokersCount = statisticsCache.get(cluster).getClusterDescription()
  240. .getNodes().size();
  241. if (requested.equals(actual)) {
  242. return Mono.error(
  243. new ValidationException(
  244. String.format("Topic already has replicationFactor %s.", actual)));
  245. }
  246. if (requested <= 0) {
  247. return Mono.error(
  248. new ValidationException(
  249. String.format("Requested replication factor (%s) should be greater or equal to 1.", requested)));
  250. }
  251. if (requested > brokersCount) {
  252. return Mono.error(
  253. new ValidationException(
  254. String.format("Requested replication factor %s more than brokers count %s.",
  255. requested, brokersCount)));
  256. }
  257. return changeReplicationFactor(cluster, ac, topicName,
  258. getPartitionsReassignments(cluster, topic,
  259. replicationFactorChange));
  260. })
  261. .map(t -> new ReplicationFactorChangeResponseDTO()
  262. .topicName(t.getName())
  263. .totalReplicationFactor(t.getReplicationFactor())));
  264. }
  265. private Map<TopicPartition, Optional<NewPartitionReassignment>> getPartitionsReassignments(
  266. KafkaCluster cluster,
  267. InternalTopic topic,
  268. ReplicationFactorChangeDTO replicationFactorChange) {
  269. // Current assignment map (Partition number -> List of brokers)
  270. Map<Integer, List<Integer>> currentAssignment = getCurrentAssignment(topic);
  271. // Brokers map (Broker id -> count)
  272. Map<Integer, Integer> brokersUsage = getBrokersMap(cluster, currentAssignment);
  273. int currentReplicationFactor = topic.getReplicationFactor();
  274. // If we should to increase Replication factor
  275. if (replicationFactorChange.getTotalReplicationFactor() > currentReplicationFactor) {
  276. // For each partition
  277. for (var assignmentList : currentAssignment.values()) {
  278. // Get brokers list sorted by usage
  279. var brokers = brokersUsage.entrySet().stream()
  280. .sorted(Map.Entry.comparingByValue())
  281. .map(Map.Entry::getKey)
  282. .collect(toList());
  283. // Iterate brokers and try to add them in assignment
  284. // while partition replicas count != requested replication factor
  285. for (Integer broker : brokers) {
  286. if (!assignmentList.contains(broker)) {
  287. assignmentList.add(broker);
  288. brokersUsage.merge(broker, 1, Integer::sum);
  289. }
  290. if (assignmentList.size() == replicationFactorChange.getTotalReplicationFactor()) {
  291. break;
  292. }
  293. }
  294. if (assignmentList.size() != replicationFactorChange.getTotalReplicationFactor()) {
  295. throw new ValidationException("Something went wrong during adding replicas");
  296. }
  297. }
  298. // If we should to decrease Replication factor
  299. } else if (replicationFactorChange.getTotalReplicationFactor() < currentReplicationFactor) {
  300. for (Map.Entry<Integer, List<Integer>> assignmentEntry : currentAssignment.entrySet()) {
  301. var partition = assignmentEntry.getKey();
  302. var brokers = assignmentEntry.getValue();
  303. // Get brokers list sorted by usage in reverse order
  304. var brokersUsageList = brokersUsage.entrySet().stream()
  305. .sorted(Map.Entry.comparingByValue(Comparator.reverseOrder()))
  306. .map(Map.Entry::getKey)
  307. .collect(toList());
  308. // Iterate brokers and try to remove them from assignment
  309. // while partition replicas count != requested replication factor
  310. for (Integer broker : brokersUsageList) {
  311. // Check is the broker the leader of partition
  312. if (!topic.getPartitions().get(partition).getLeader()
  313. .equals(broker)) {
  314. brokers.remove(broker);
  315. brokersUsage.merge(broker, -1, Integer::sum);
  316. }
  317. if (brokers.size() == replicationFactorChange.getTotalReplicationFactor()) {
  318. break;
  319. }
  320. }
  321. if (brokers.size() != replicationFactorChange.getTotalReplicationFactor()) {
  322. throw new ValidationException("Something went wrong during removing replicas");
  323. }
  324. }
  325. } else {
  326. throw new ValidationException("Replication factor already equals requested");
  327. }
  328. // Return result map
  329. return currentAssignment.entrySet().stream().collect(toMap(
  330. e -> new TopicPartition(topic.getName(), e.getKey()),
  331. e -> Optional.of(new NewPartitionReassignment(e.getValue()))
  332. ));
  333. }
  334. private Map<Integer, List<Integer>> getCurrentAssignment(InternalTopic topic) {
  335. return topic.getPartitions().values().stream()
  336. .collect(toMap(
  337. InternalPartition::getPartition,
  338. p -> p.getReplicas().stream()
  339. .map(InternalReplica::getBroker)
  340. .collect(toList())
  341. ));
  342. }
  343. private Map<Integer, Integer> getBrokersMap(KafkaCluster cluster,
  344. Map<Integer, List<Integer>> currentAssignment) {
  345. Map<Integer, Integer> result = statisticsCache.get(cluster).getClusterDescription().getNodes()
  346. .stream()
  347. .map(Node::id)
  348. .collect(toMap(
  349. c -> c,
  350. c -> 0
  351. ));
  352. currentAssignment.values().forEach(brokers -> brokers
  353. .forEach(broker -> result.put(broker, result.get(broker) + 1)));
  354. return result;
  355. }
  356. public Mono<PartitionsIncreaseResponseDTO> increaseTopicPartitions(
  357. KafkaCluster cluster,
  358. String topicName,
  359. PartitionsIncreaseDTO partitionsIncrease) {
  360. return loadTopic(cluster, topicName).flatMap(topic ->
  361. adminClientService.get(cluster).flatMap(ac -> {
  362. Integer actualCount = topic.getPartitionCount();
  363. Integer requestedCount = partitionsIncrease.getTotalPartitionsCount();
  364. if (requestedCount < actualCount) {
  365. return Mono.error(
  366. new ValidationException(String.format(
  367. "Topic currently has %s partitions, which is higher than the requested %s.",
  368. actualCount, requestedCount)));
  369. }
  370. if (requestedCount.equals(actualCount)) {
  371. return Mono.error(
  372. new ValidationException(
  373. String.format("Topic already has %s partitions.", actualCount)));
  374. }
  375. Map<String, NewPartitions> newPartitionsMap = Collections.singletonMap(
  376. topicName,
  377. NewPartitions.increaseTo(partitionsIncrease.getTotalPartitionsCount())
  378. );
  379. return ac.createPartitions(newPartitionsMap)
  380. .then(loadTopic(cluster, topicName));
  381. }).map(t -> new PartitionsIncreaseResponseDTO()
  382. .topicName(t.getName())
  383. .totalPartitionsCount(t.getPartitionCount())
  384. )
  385. );
  386. }
  387. public Mono<Void> deleteTopic(KafkaCluster cluster, String topicName) {
  388. if (statisticsCache.get(cluster).getFeatures().contains(ClusterFeature.TOPIC_DELETION)) {
  389. return adminClientService.get(cluster).flatMap(c -> c.deleteTopic(topicName))
  390. .doOnSuccess(t -> statisticsCache.onTopicDelete(cluster, topicName));
  391. } else {
  392. return Mono.error(new ValidationException("Topic deletion restricted"));
  393. }
  394. }
  395. public Mono<InternalTopic> cloneTopic(
  396. KafkaCluster cluster, String topicName, String newTopicName) {
  397. return loadTopic(cluster, topicName).flatMap(topic ->
  398. adminClientService.get(cluster)
  399. .flatMap(ac ->
  400. ac.createTopic(
  401. newTopicName,
  402. topic.getPartitionCount(),
  403. topic.getReplicationFactor(),
  404. topic.getTopicConfigs()
  405. .stream()
  406. .collect(Collectors
  407. .toMap(InternalTopicConfig::getName, InternalTopicConfig::getValue))
  408. )
  409. ).thenReturn(newTopicName)
  410. .flatMap(a -> loadTopicAfterCreation(cluster, newTopicName))
  411. );
  412. }
  413. public Mono<List<InternalTopic>> getTopicsForPagination(KafkaCluster cluster) {
  414. Statistics stats = statisticsCache.get(cluster);
  415. return filterExisting(cluster, stats.getTopicDescriptions().keySet())
  416. .map(lst -> lst.stream()
  417. .map(topicName ->
  418. InternalTopic.from(
  419. stats.getTopicDescriptions().get(topicName),
  420. stats.getTopicConfigs().getOrDefault(topicName, List.of()),
  421. InternalPartitionsOffsets.empty(),
  422. stats.getMetrics(),
  423. stats.getLogDirInfo(),
  424. clustersProperties.getInternalTopicPrefix()
  425. ))
  426. .collect(toList())
  427. );
  428. }
  429. private Mono<List<String>> filterExisting(KafkaCluster cluster, Collection<String> topics) {
  430. return adminClientService.get(cluster)
  431. .flatMap(ac -> ac.listTopics(true))
  432. .map(existing -> existing
  433. .stream()
  434. .filter(topics::contains)
  435. .collect(toList()));
  436. }
  437. }