瀏覽代碼

modify empty check

Shinsuke Sugaya 9 年之前
父節點
當前提交
36bcfa4046
共有 1 個文件被更改,包括 6 次插入3 次删除
  1. 6 3
      src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java

+ 6 - 3
src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java

@@ -23,8 +23,10 @@ import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.List;
 import java.util.Map;
 import java.util.Map;
 import java.util.Set;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.IOUtils;
+import org.codelibs.core.lang.StringUtil;
 import org.codelibs.core.misc.DynamicProperties;
 import org.codelibs.core.misc.DynamicProperties;
 import org.codelibs.fess.Constants;
 import org.codelibs.fess.Constants;
 import org.codelibs.fess.crawler.builder.RequestDataBuilder;
 import org.codelibs.fess.crawler.builder.RequestDataBuilder;
@@ -53,7 +55,7 @@ public class FessCrawlerThread extends CrawlerThread {
     private static final Logger logger = LoggerFactory.getLogger(FessCrawlerThread.class);
     private static final Logger logger = LoggerFactory.getLogger(FessCrawlerThread.class);
 
 
     @Override
     @Override
-    protected boolean isContentUpdated(final CrawlerClient client, final UrlQueue urlQueue) {
+    protected boolean isContentUpdated(final CrawlerClient client, final UrlQueue<?> urlQueue) {
         final DynamicProperties crawlerProperties = ComponentUtil.getCrawlerProperties();
         final DynamicProperties crawlerProperties = ComponentUtil.getCrawlerProperties();
         if (crawlerProperties.getProperty(Constants.INCREMENTAL_CRAWLING_PROPERTY, Constants.TRUE).equals(Constants.TRUE)) {
         if (crawlerProperties.getProperty(Constants.INCREMENTAL_CRAWLING_PROPERTY, Constants.TRUE).equals(Constants.TRUE)) {
 
 
@@ -163,11 +165,12 @@ public class FessCrawlerThread extends CrawlerThread {
         return true;
         return true;
     }
     }
 
 
-    protected void storeChildUrlsToQueue(final UrlQueue urlQueue, final Set<RequestData> childUrlSet) {
+    protected void storeChildUrlsToQueue(final UrlQueue<?> urlQueue, final Set<RequestData> childUrlSet) {
         if (childUrlSet != null) {
         if (childUrlSet != null) {
             synchronized (crawlerContext.getAccessCountLock()) {
             synchronized (crawlerContext.getAccessCountLock()) {
                 // add an url
                 // add an url
-                storeChildUrls(childUrlSet, urlQueue.getUrl(), urlQueue.getDepth() != null ? urlQueue.getDepth() + 1 : 1);
+                storeChildUrls(childUrlSet.stream().filter(rd -> StringUtil.isNotBlank(rd.getUrl())).collect(Collectors.toSet()),
+                        urlQueue.getUrl(), urlQueue.getDepth() != null ? urlQueue.getDepth() + 1 : 1);
             }
             }
         }
         }
     }
     }