소스 검색

fix #2763 improve recursive handling

Shinsuke Sugaya 1 년 전
부모
커밋
ef8b98cd83
1개의 변경된 파일10개의 추가작업 그리고 2개의 파일을 삭제
  1. 10 2
      src/main/java/org/codelibs/fess/ds/callback/FileListIndexUpdateCallbackImpl.java

+ 10 - 2
src/main/java/org/codelibs/fess/ds/callback/FileListIndexUpdateCallbackImpl.java

@@ -19,9 +19,11 @@ import static org.codelibs.core.stream.StreamUtil.stream;
 
 import java.util.ArrayList;
 import java.util.Deque;
+import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadPoolExecutor;
@@ -147,11 +149,14 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
             final long maxAccessCount = getMaxAccessCount(paramMap, dataMap);
             long counter = 0;
             final Deque<String> urlQueue = new LinkedList<>();
+            final Set<String> processedUrls = new HashSet<>();
             urlQueue.offer(url);
             while (!urlQueue.isEmpty() && (maxAccessCount < 0 || counter < maxAccessCount)) {
+                counter++;
                 final Map<String, Object> localDataMap =
                         dataMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
                 String processingUrl = urlQueue.poll();
+                processedUrls.add(processingUrl);
                 if (deleteUrlList.contains(processingUrl)) {
                     deleteDocuments(); // delete before indexing
                 }
@@ -165,7 +170,6 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
                         if (processingUrl == null) {
                             break;
                         }
-                        counter++;
                         localDataMap.put(fessConfig.getIndexFieldUrl(), processingUrl);
                         crawlerStatsHelper.record(keyObj, StatsAction.REDIRECTED);
                     }
@@ -176,7 +180,11 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
                     crawlerStatsHelper.record(keyObj, StatsAction.ACCESS_EXCEPTION);
                     final Throwable cause = e.getCause();
                     if (cause instanceof ChildUrlsException) {
-                        ((ChildUrlsException) cause).getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
+                        ((ChildUrlsException) cause).getChildUrlList().stream().map(RequestData::getUrl).forEach(s -> {
+                            if (!processedUrls.contains(s)&&!urlQueue.contains(s)) {
+                                urlQueue.offer(s);
+                            }
+                        });
                     } else if (maxAccessCount != 1L) {
                         throw e;
                     } else {