diff --git a/pom.xml b/pom.xml
index 00771dbce..7b8180a7f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -55,8 +55,8 @@
4.8.2
0.5.2
-
- 1.0.0-SNAPSHOT
+
+ 1.0.0-SNAPSHOT
1.6
3.11-beta2
1.8.7
@@ -466,11 +466,11 @@
2.2.1
-
+
- org.codelibs.robot
- s2robot-lasta
- ${s2robot.version}
+ org.codelibs.fess
+ fess-crawler-lasta
+ ${crawler.version}
commons-logging
@@ -479,9 +479,9 @@
- org.codelibs.robot
- s2robot-es
- ${s2robot.version}
+ org.codelibs.fess
+ fess-crawler-es
+ ${crawler.version}
org.bouncycastle
diff --git a/src/main/java/org/codelibs/fess/app/web/admin/webconfig/WebConfigEditForm.java b/src/main/java/org/codelibs/fess/app/web/admin/webconfig/WebConfigEditForm.java
index 69ff23b6d..b35a014e3 100644
--- a/src/main/java/org/codelibs/fess/app/web/admin/webconfig/WebConfigEditForm.java
+++ b/src/main/java/org/codelibs/fess/app/web/admin/webconfig/WebConfigEditForm.java
@@ -139,7 +139,7 @@ public class WebConfigEditForm implements Serializable {
sortOrder = "0";
userAgent = ComponentUtil.getUserAgentName();
if (StringUtil.isBlank(userAgent)) {
- userAgent = "Fess Robot/" + Constants.FESS_VERSION;
+ userAgent = "FessCrawler/" + Constants.FESS_VERSION;
}
numOfThread = Integer.toString(Constants.DEFAULT_NUM_OF_THREAD_FOR_WEB);
intervalTime = Integer.toString(Constants.DEFAULT_INTERVAL_TIME_FOR_WEB);
diff --git a/src/main/java/org/codelibs/fess/app/web/admin/wizard/AdminWizardAction.java b/src/main/java/org/codelibs/fess/app/web/admin/wizard/AdminWizardAction.java
index 3b32d6b47..61b9434a7 100644
--- a/src/main/java/org/codelibs/fess/app/web/admin/wizard/AdminWizardAction.java
+++ b/src/main/java/org/codelibs/fess/app/web/admin/wizard/AdminWizardAction.java
@@ -39,7 +39,7 @@ import org.codelibs.fess.helper.JobHelper;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.job.TriggeredJob;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.util.CharUtil;
+import org.codelibs.fess.crawler.util.CharUtil;
import org.lastaflute.web.Execute;
import org.lastaflute.web.callback.ActionRuntime;
import org.lastaflute.web.response.HtmlResponse;
diff --git a/src/main/java/org/codelibs/fess/app/web/go/GoAction.java b/src/main/java/org/codelibs/fess/app/web/go/GoAction.java
index 280f65008..5a8799d1a 100644
--- a/src/main/java/org/codelibs/fess/app/web/go/GoAction.java
+++ b/src/main/java/org/codelibs/fess/app/web/go/GoAction.java
@@ -34,7 +34,7 @@ import org.codelibs.fess.helper.SearchLogHelper;
import org.codelibs.fess.helper.ViewHelper;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.DocumentUtil;
-import org.codelibs.robot.util.CharUtil;
+import org.codelibs.fess.crawler.util.CharUtil;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.lastaflute.web.Execute;
diff --git a/src/main/java/org/codelibs/fess/crawler/FessS2RobotThread.java b/src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java
similarity index 90%
rename from src/main/java/org/codelibs/fess/crawler/FessS2RobotThread.java
rename to src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java
index 25155166e..932960dc7 100644
--- a/src/main/java/org/codelibs/fess/crawler/FessS2RobotThread.java
+++ b/src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java
@@ -36,29 +36,29 @@ import org.codelibs.fess.helper.FieldHelper;
import org.codelibs.fess.helper.IndexingHelper;
import org.codelibs.fess.helper.SambaHelper;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.S2RobotThread;
-import org.codelibs.robot.builder.RequestDataBuilder;
-import org.codelibs.robot.client.S2RobotClient;
-import org.codelibs.robot.client.smb.SmbClient;
-import org.codelibs.robot.entity.RequestData;
-import org.codelibs.robot.entity.ResponseData;
-import org.codelibs.robot.entity.UrlQueue;
-import org.codelibs.robot.log.LogType;
+import org.codelibs.fess.crawler.CrawlerThread;
+import org.codelibs.fess.crawler.builder.RequestDataBuilder;
+import org.codelibs.fess.crawler.client.CrawlerClient;
+import org.codelibs.fess.crawler.client.smb.SmbClient;
+import org.codelibs.fess.crawler.entity.RequestData;
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.entity.UrlQueue;
+import org.codelibs.fess.crawler.log.LogType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import jcifs.smb.ACE;
import jcifs.smb.SID;
-public class FessS2RobotThread extends S2RobotThread {
- private static final Logger logger = LoggerFactory.getLogger(FessS2RobotThread.class);
+public class FessCrawlerThread extends CrawlerThread {
+ private static final Logger logger = LoggerFactory.getLogger(FessCrawlerThread.class);
@Override
- protected boolean isContentUpdated(final S2RobotClient client, final UrlQueue urlQueue) {
+ protected boolean isContentUpdated(final CrawlerClient client, final UrlQueue urlQueue) {
final DynamicProperties crawlerProperties = ComponentUtil.getCrawlerProperties();
if (crawlerProperties.getProperty(Constants.DIFF_CRAWLING_PROPERTY, Constants.TRUE).equals(Constants.TRUE)) {
- log(logHelper, LogType.CHECK_LAST_MODIFIED, robotContext, urlQueue);
+ log(logHelper, LogType.CHECK_LAST_MODIFIED, crawlerContext, urlQueue);
final long startTime = System.currentTimeMillis();
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
@@ -72,7 +72,7 @@ public class FessS2RobotThread extends S2RobotThread {
final String url = urlQueue.getUrl();
ResponseData responseData = null;
try {
- final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(robotContext.getSessionId());
+ final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(crawlerContext.getSessionId());
final Map dataMap = new HashMap();
dataMap.put(fieldHelper.urlField, url);
final List roleTypeList = new ArrayList();
@@ -140,12 +140,12 @@ public class FessS2RobotThread extends S2RobotThread {
return true;
} else if (responseData.getLastModified().getTime() <= lastModified.getTime() && httpStatusCode == 200) {
- log(logHelper, LogType.NOT_MODIFIED, robotContext, urlQueue);
+ log(logHelper, LogType.NOT_MODIFIED, crawlerContext, urlQueue);
responseData.setExecutionTime(System.currentTimeMillis() - startTime);
responseData.setParentUrl(urlQueue.getParentUrl());
- responseData.setSessionId(robotContext.getSessionId());
- responseData.setHttpStatusCode(org.codelibs.robot.Constants.NOT_MODIFIED_STATUS);
+ responseData.setSessionId(crawlerContext.getSessionId());
+ responseData.setHttpStatusCode(org.codelibs.fess.crawler.Constants.NOT_MODIFIED_STATUS);
processResponse(urlQueue, responseData);
storeChildUrlsToQueue(urlQueue, getAnchorSet(document.get(fieldHelper.anchorField)));
@@ -163,7 +163,7 @@ public class FessS2RobotThread extends S2RobotThread {
protected void storeChildUrlsToQueue(final UrlQueue urlQueue, final Set childUrlSet) {
if (childUrlSet != null) {
- synchronized (robotContext.getAccessCountLock()) {
+ synchronized (crawlerContext.getAccessCountLock()) {
// add an url
storeChildUrls(childUrlSet, urlQueue.getUrl(), urlQueue.getDepth() != null ? urlQueue.getDepth() + 1 : 1);
}
diff --git a/src/main/java/org/codelibs/fess/crawler/interval/FessIntervalController.java b/src/main/java/org/codelibs/fess/crawler/interval/FessIntervalController.java
index d9a0f68c7..25739895b 100644
--- a/src/main/java/org/codelibs/fess/crawler/interval/FessIntervalController.java
+++ b/src/main/java/org/codelibs/fess/crawler/interval/FessIntervalController.java
@@ -18,7 +18,7 @@ package org.codelibs.fess.crawler.interval;
import org.codelibs.fess.helper.IntervalControlHelper;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.interval.impl.DefaultIntervalController;
+import org.codelibs.fess.crawler.interval.impl.DefaultIntervalController;
public class FessIntervalController extends DefaultIntervalController {
diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java
index 29ad610cb..cdb501869 100644
--- a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java
+++ b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java
@@ -48,17 +48,17 @@ import org.codelibs.fess.helper.PathMappingHelper;
import org.codelibs.fess.helper.SambaHelper;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.client.smb.SmbClient;
-import org.codelibs.robot.entity.AccessResult;
-import org.codelibs.robot.entity.AccessResultData;
-import org.codelibs.robot.entity.ExtractData;
-import org.codelibs.robot.entity.ResponseData;
-import org.codelibs.robot.entity.ResultData;
-import org.codelibs.robot.entity.UrlQueue;
-import org.codelibs.robot.exception.RobotCrawlAccessException;
-import org.codelibs.robot.exception.RobotSystemException;
-import org.codelibs.robot.extractor.Extractor;
-import org.codelibs.robot.util.CrawlingParameterUtil;
+import org.codelibs.fess.crawler.client.smb.SmbClient;
+import org.codelibs.fess.crawler.entity.AccessResult;
+import org.codelibs.fess.crawler.entity.AccessResultData;
+import org.codelibs.fess.crawler.entity.ExtractData;
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.entity.ResultData;
+import org.codelibs.fess.crawler.entity.UrlQueue;
+import org.codelibs.fess.crawler.exception.CrawlingAccessException;
+import org.codelibs.fess.crawler.exception.CrawlerSystemException;
+import org.codelibs.fess.crawler.extractor.Extractor;
+import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -96,7 +96,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
@Override
public ResultData transform(final ResponseData responseData) {
if (responseData == null || responseData.getResponseBody() == null) {
- throw new RobotCrawlAccessException("No response body.");
+ throw new CrawlingAccessException("No response body.");
}
final Extractor extractor = getExtractor(responseData);
@@ -144,8 +144,8 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
}
}
} catch (final Exception e) {
- final RobotCrawlAccessException rcae = new RobotCrawlAccessException("Could not get a text from " + responseData.getUrl(), e);
- rcae.setLogLevel(RobotCrawlAccessException.WARN);
+ final CrawlingAccessException rcae = new CrawlingAccessException("Could not get a text from " + responseData.getUrl(), e);
+ rcae.setLogLevel(CrawlingAccessException.WARN);
throw rcae;
} finally {
IOUtils.closeQuietly(in);
@@ -323,7 +323,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
try {
resultData.setData(SerializeUtil.fromObjectToBinary(dataMap));
} catch (final Exception e) {
- throw new RobotCrawlAccessException("Could not serialize object: " + url, e);
+ throw new CrawlingAccessException("Could not serialize object: " + url, e);
}
resultData.setEncoding(charsetName);
@@ -466,7 +466,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
try {
return SerializeUtil.fromBinaryToObject(data);
} catch (final Exception e) {
- throw new RobotSystemException("Could not create an instanced from bytes.", e);
+ throw new CrawlerSystemException("Could not create an instanced from bytes.", e);
}
}
return new HashMap();
diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessXpathTransformer.java
index 343a3e53b..7be789a4c 100644
--- a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessXpathTransformer.java
+++ b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessXpathTransformer.java
@@ -25,7 +25,7 @@ import org.apache.commons.lang3.StringUtils;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.helper.FieldHelper;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.transformer.impl.XpathTransformer;
+import org.codelibs.fess.crawler.transformer.impl.XpathTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java
index 4a538cfe5..e5c13e0d4 100644
--- a/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java
+++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java
@@ -18,9 +18,9 @@ package org.codelibs.fess.crawler.transformer;
import org.codelibs.fess.exception.FessSystemException;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.entity.ResponseData;
-import org.codelibs.robot.extractor.Extractor;
-import org.codelibs.robot.extractor.ExtractorFactory;
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.extractor.Extractor;
+import org.codelibs.fess.crawler.extractor.ExtractorFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java
index 86e6c8b14..b3d58154a 100644
--- a/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java
+++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java
@@ -17,8 +17,8 @@
package org.codelibs.fess.crawler.transformer;
import org.codelibs.fess.exception.FessSystemException;
-import org.codelibs.robot.entity.ResponseData;
-import org.codelibs.robot.extractor.Extractor;
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.extractor.Extractor;
import org.lastaflute.di.core.SingletonLaContainer;
public class FessTikaTransformer extends AbstractFessFileTransformer {
diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java
index ef6873444..d626061a2 100644
--- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java
+++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java
@@ -50,17 +50,17 @@ import org.codelibs.fess.helper.OverlappingHostHelper;
import org.codelibs.fess.helper.PathMappingHelper;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.builder.RequestDataBuilder;
-import org.codelibs.robot.entity.AccessResultData;
-import org.codelibs.robot.entity.RequestData;
-import org.codelibs.robot.entity.ResponseData;
-import org.codelibs.robot.entity.ResultData;
-import org.codelibs.robot.entity.UrlQueue;
-import org.codelibs.robot.exception.ChildUrlsException;
-import org.codelibs.robot.exception.RobotCrawlAccessException;
-import org.codelibs.robot.exception.RobotSystemException;
-import org.codelibs.robot.util.CrawlingParameterUtil;
-import org.codelibs.robot.util.ResponseDataUtil;
+import org.codelibs.fess.crawler.builder.RequestDataBuilder;
+import org.codelibs.fess.crawler.entity.AccessResultData;
+import org.codelibs.fess.crawler.entity.RequestData;
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.entity.ResultData;
+import org.codelibs.fess.crawler.entity.UrlQueue;
+import org.codelibs.fess.crawler.exception.ChildUrlsException;
+import org.codelibs.fess.crawler.exception.CrawlingAccessException;
+import org.codelibs.fess.crawler.exception.CrawlerSystemException;
+import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
+import org.codelibs.fess.crawler.util.ResponseDataUtil;
import org.cyberneko.html.parsers.DOMParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -116,7 +116,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
}
parser.parse(is);
} catch (final Exception e) {
- throw new RobotCrawlAccessException("Could not parse " + responseData.getUrl(), e);
+ throw new CrawlingAccessException("Could not parse " + responseData.getUrl(), e);
} finally {
IOUtils.closeQuietly(bis);
}
@@ -172,7 +172,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
try {
resultData.setData(SerializeUtil.fromObjectToBinary(dataMap));
} catch (final Exception e) {
- throw new RobotCrawlAccessException("Could not serialize object: " + responseData.getUrl(), e);
+ throw new CrawlingAccessException("Could not serialize object: " + responseData.getUrl(), e);
}
resultData.setEncoding(charsetName);
} finally {
@@ -500,7 +500,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
try {
return SerializeUtil.fromBinaryToObject(data);
} catch (final Exception e) {
- throw new RobotSystemException("Could not create an instanced from bytes.", e);
+ throw new CrawlerSystemException("Could not create an instanced from bytes.", e);
}
}
return new HashMap();
diff --git a/src/main/java/org/codelibs/fess/ds/DataStoreCrawlingException.java b/src/main/java/org/codelibs/fess/ds/DataStoreCrawlingException.java
index 4c76f6bc6..a06af4481 100644
--- a/src/main/java/org/codelibs/fess/ds/DataStoreCrawlingException.java
+++ b/src/main/java/org/codelibs/fess/ds/DataStoreCrawlingException.java
@@ -16,9 +16,9 @@
package org.codelibs.fess.ds;
-import org.codelibs.robot.exception.RobotCrawlAccessException;
+import org.codelibs.fess.crawler.exception.CrawlingAccessException;
-public class DataStoreCrawlingException extends RobotCrawlAccessException {
+public class DataStoreCrawlingException extends CrawlingAccessException {
private static final long serialVersionUID = 1L;
diff --git a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java
index 7e6b7e08b..53f1cb38e 100644
--- a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java
+++ b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java
@@ -36,8 +36,8 @@ import org.codelibs.fess.ds.DataStoreCrawlingException;
import org.codelibs.fess.ds.DataStoreException;
import org.codelibs.fess.ds.IndexUpdateCallback;
import org.codelibs.fess.es.exentity.DataConfig;
-import org.codelibs.robot.exception.RobotCrawlAccessException;
-import org.codelibs.robot.exception.RobotMultipleCrawlAccessException;
+import org.codelibs.fess.crawler.exception.CrawlingAccessException;
+import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
import org.lastaflute.di.core.SingletonLaContainer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -230,10 +230,10 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
try {
loop = callback.store(dataMap);
- } catch (final RobotCrawlAccessException e) {
+ } catch (final CrawlingAccessException e) {
Throwable target = e;
- if (target instanceof RobotMultipleCrawlAccessException) {
- final Throwable[] causes = ((RobotMultipleCrawlAccessException) target).getCauses();
+ if (target instanceof MultipleCrawlingAccessException) {
+ final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses();
if (causes.length > 0) {
target = causes[causes.length - 1];
}
diff --git a/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java
index 3ee29749d..ea3a8215c 100644
--- a/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java
+++ b/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java
@@ -34,17 +34,17 @@ import org.codelibs.fess.helper.CrawlingSessionHelper;
import org.codelibs.fess.helper.FieldHelper;
import org.codelibs.fess.helper.IndexingHelper;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.builder.RequestDataBuilder;
-import org.codelibs.robot.client.S2RobotClient;
-import org.codelibs.robot.client.S2RobotClientFactory;
-import org.codelibs.robot.entity.ResponseData;
-import org.codelibs.robot.entity.ResultData;
-import org.codelibs.robot.exception.RobotSystemException;
-import org.codelibs.robot.processor.ResponseProcessor;
-import org.codelibs.robot.processor.impl.DefaultResponseProcessor;
-import org.codelibs.robot.rule.Rule;
-import org.codelibs.robot.rule.RuleManager;
-import org.codelibs.robot.transformer.Transformer;
+import org.codelibs.fess.crawler.builder.RequestDataBuilder;
+import org.codelibs.fess.crawler.client.CrawlerClient;
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.entity.ResultData;
+import org.codelibs.fess.crawler.exception.CrawlerSystemException;
+import org.codelibs.fess.crawler.processor.ResponseProcessor;
+import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
+import org.codelibs.fess.crawler.rule.Rule;
+import org.codelibs.fess.crawler.rule.RuleManager;
+import org.codelibs.fess.crawler.transformer.Transformer;
import org.lastaflute.di.core.SingletonLaContainer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -71,7 +71,7 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
public int maxDeleteDocumentCacheSize = 100;
- protected S2RobotClientFactory robotClientFactory;
+ protected CrawlerClientFactory crawlerClientFactory;
protected CrawlingSessionHelper crawlingSessionHelper;
@@ -92,9 +92,9 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
@Override
public void store(final DataConfig config, final IndexUpdateCallback callback, final Map initParamMap) {
- robotClientFactory = SingletonLaContainer.getComponent(S2RobotClientFactory.class);
+ crawlerClientFactory = SingletonLaContainer.getComponent(CrawlerClientFactory.class);
- config.initializeClientFactory(robotClientFactory);
+ config.initializeClientFactory(crawlerClientFactory);
super.store(config, callback, initParamMap);
}
@@ -170,9 +170,9 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
final String url = dataMap.get(fieldHelper.urlField).toString();
try {
- final S2RobotClient client = robotClientFactory.getClient(url);
+ final CrawlerClient client = crawlerClientFactory.getClient(url);
if (client == null) {
- logger.warn("S2RobotClient is null. Data: " + dataMap);
+ logger.warn("CrawlerClient is null. Data: " + dataMap);
return false;
}
@@ -200,7 +200,7 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
(Map) SerializeUtil.fromBinaryToObject(data);
dataMap.putAll(responseDataMap);
} catch (final Exception e) {
- throw new RobotSystemException("Could not create an instance from bytes.", e);
+ throw new CrawlerSystemException("Could not create an instance from bytes.", e);
}
}
diff --git a/src/main/java/org/codelibs/fess/es/exentity/CrawlingConfig.java b/src/main/java/org/codelibs/fess/es/exentity/CrawlingConfig.java
index f678a99b4..be4e8beaa 100644
--- a/src/main/java/org/codelibs/fess/es/exentity/CrawlingConfig.java
+++ b/src/main/java/org/codelibs/fess/es/exentity/CrawlingConfig.java
@@ -2,7 +2,7 @@ package org.codelibs.fess.es.exentity;
import java.util.Map;
-import org.codelibs.robot.client.S2RobotClientFactory;
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
public interface CrawlingConfig {
@@ -20,7 +20,7 @@ public interface CrawlingConfig {
String getConfigId();
- void initializeClientFactory(S2RobotClientFactory s2RobotClientFactory);
+ void initializeClientFactory(CrawlerClientFactory crawlerClientFactory);
Map getConfigParameterMap(ConfigName name);
diff --git a/src/main/java/org/codelibs/fess/es/exentity/DataConfig.java b/src/main/java/org/codelibs/fess/es/exentity/DataConfig.java
index 79300d71c..b498e81ae 100644
--- a/src/main/java/org/codelibs/fess/es/exentity/DataConfig.java
+++ b/src/main/java/org/codelibs/fess/es/exentity/DataConfig.java
@@ -24,13 +24,13 @@ import org.codelibs.fess.es.exbhv.LabelTypeBhv;
import org.codelibs.fess.es.exbhv.RoleTypeBhv;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.ParameterUtil;
-import org.codelibs.robot.client.S2RobotClientFactory;
-import org.codelibs.robot.client.http.Authentication;
-import org.codelibs.robot.client.http.HcHttpClient;
-import org.codelibs.robot.client.http.impl.AuthenticationImpl;
-import org.codelibs.robot.client.http.ntlm.JcifsEngine;
-import org.codelibs.robot.client.smb.SmbAuthentication;
-import org.codelibs.robot.client.smb.SmbClient;
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
+import org.codelibs.fess.crawler.client.http.Authentication;
+import org.codelibs.fess.crawler.client.http.HcHttpClient;
+import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl;
+import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine;
+import org.codelibs.fess.crawler.client.smb.SmbAuthentication;
+import org.codelibs.fess.crawler.client.smb.SmbClient;
import org.dbflute.cbean.result.ListResultBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -44,15 +44,15 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
private static final Logger logger = LoggerFactory.getLogger(DataConfig.class);
- private static final String S2ROBOT_WEB_HEADER_PREFIX = "s2robot.web.header.";
+ private static final String S2ROBOT_WEB_HEADER_PREFIX = "crawler.web.header.";
- private static final String S2ROBOT_WEB_AUTH = "s2robot.web.auth";
+ private static final String S2ROBOT_WEB_AUTH = "crawler.web.auth";
- private static final String S2ROBOT_USERAGENT = "s2robot.useragent";
+ private static final String S2ROBOT_USERAGENT = "crawler.useragent";
- private static final String S2ROBOT_PARAM_PREFIX = "s2robot.param.";
+ private static final String S2ROBOT_PARAM_PREFIX = "crawler.param.";
- private static final Object S2ROBOT_FILE_AUTH = "s2robot.file.auth";
+ private static final Object S2ROBOT_FILE_AUTH = "crawler.file.auth";
private String[] labelTypeIds;
@@ -211,11 +211,11 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
}
@Override
- public void initializeClientFactory(final S2RobotClientFactory robotClientFactory) {
+ public void initializeClientFactory(final CrawlerClientFactory crawlerClientFactory) {
final Map paramMap = getHandlerParameterMap();
final Map factoryParamMap = new HashMap();
- robotClientFactory.setInitParameterMap(factoryParamMap);
+ crawlerClientFactory.setInitParameterMap(factoryParamMap);
// parameters
for (final Map.Entry entry : paramMap.entrySet()) {
@@ -301,18 +301,19 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
}
// request header
- final List rhList = new ArrayList();
+ final List rhList =
+ new ArrayList();
int count = 1;
String headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count + ".name");
while (StringUtil.isNotBlank(headerName)) {
final String headerValue = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count + ".value");
- rhList.add(new org.codelibs.robot.client.http.RequestHeader(headerName, headerValue));
+ rhList.add(new org.codelibs.fess.crawler.client.http.RequestHeader(headerName, headerValue));
count++;
headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count + ".name");
}
if (!rhList.isEmpty()) {
factoryParamMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY,
- rhList.toArray(new org.codelibs.robot.client.http.RequestHeader[rhList.size()]));
+ rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()]));
}
// file auth
diff --git a/src/main/java/org/codelibs/fess/es/exentity/FileConfig.java b/src/main/java/org/codelibs/fess/es/exentity/FileConfig.java
index 7ef16f246..be64db551 100644
--- a/src/main/java/org/codelibs/fess/es/exentity/FileConfig.java
+++ b/src/main/java/org/codelibs/fess/es/exentity/FileConfig.java
@@ -18,9 +18,9 @@ import org.codelibs.fess.es.exbhv.RoleTypeBhv;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.ParameterUtil;
-import org.codelibs.robot.client.S2RobotClientFactory;
-import org.codelibs.robot.client.smb.SmbAuthentication;
-import org.codelibs.robot.client.smb.SmbClient;
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
+import org.codelibs.fess.crawler.client.smb.SmbAuthentication;
+import org.codelibs.fess.crawler.client.smb.SmbClient;
import org.dbflute.cbean.result.ListResultBean;
import org.lastaflute.di.core.SingletonLaContainer;
@@ -228,7 +228,7 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig {
}
@Override
- public void initializeClientFactory(final S2RobotClientFactory clientFactory) {
+ public void initializeClientFactory(final CrawlerClientFactory clientFactory) {
final FileAuthenticationService fileAuthenticationService = SingletonLaContainer.getComponent(FileAuthenticationService.class);
// Parameters
diff --git a/src/main/java/org/codelibs/fess/es/exentity/RequestHeader.java b/src/main/java/org/codelibs/fess/es/exentity/RequestHeader.java
index 263640e25..e67153778 100644
--- a/src/main/java/org/codelibs/fess/es/exentity/RequestHeader.java
+++ b/src/main/java/org/codelibs/fess/es/exentity/RequestHeader.java
@@ -30,8 +30,8 @@ public class RequestHeader extends BsRequestHeader {
asDocMeta().version(version);
}
- public org.codelibs.robot.client.http.RequestHeader getS2RobotRequestHeader() {
- return new org.codelibs.robot.client.http.RequestHeader(getName(), getValue());
+ public org.codelibs.fess.crawler.client.http.RequestHeader getCrawlerRequestHeader() {
+ return new org.codelibs.fess.crawler.client.http.RequestHeader(getName(), getValue());
}
public WebConfig getWebConfig() {
diff --git a/src/main/java/org/codelibs/fess/es/exentity/WebAuthentication.java b/src/main/java/org/codelibs/fess/es/exentity/WebAuthentication.java
index 481db5aed..fd220bacb 100644
--- a/src/main/java/org/codelibs/fess/es/exentity/WebAuthentication.java
+++ b/src/main/java/org/codelibs/fess/es/exentity/WebAuthentication.java
@@ -16,10 +16,10 @@ import org.codelibs.fess.app.service.WebConfigService;
import org.codelibs.fess.es.bsentity.BsWebAuthentication;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.ParameterUtil;
-import org.codelibs.robot.client.http.Authentication;
-import org.codelibs.robot.client.http.impl.AuthenticationImpl;
-import org.codelibs.robot.client.http.ntlm.JcifsEngine;
-import org.codelibs.robot.exception.RobotSystemException;
+import org.codelibs.fess.crawler.client.http.Authentication;
+import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl;
+import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine;
+import org.codelibs.fess.crawler.exception.CrawlerSystemException;
/**
* @author FreeGen
@@ -71,7 +71,7 @@ public class WebAuthentication extends BsWebAuthentication {
private Credentials getCredentials() {
if (StringUtil.isEmpty(getUsername())) {
- throw new RobotSystemException("username is empty.");
+ throw new CrawlerSystemException("username is empty.");
}
if (Constants.NTLM.equals(getProtocolScheme())) {
diff --git a/src/main/java/org/codelibs/fess/es/exentity/WebConfig.java b/src/main/java/org/codelibs/fess/es/exentity/WebConfig.java
index feee2c79b..6bd97f37f 100644
--- a/src/main/java/org/codelibs/fess/es/exentity/WebConfig.java
+++ b/src/main/java/org/codelibs/fess/es/exentity/WebConfig.java
@@ -18,9 +18,9 @@ import org.codelibs.fess.es.exbhv.WebConfigToLabelBhv;
import org.codelibs.fess.es.exbhv.WebConfigToRoleBhv;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.ParameterUtil;
-import org.codelibs.robot.client.S2RobotClientFactory;
-import org.codelibs.robot.client.http.Authentication;
-import org.codelibs.robot.client.http.HcHttpClient;
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
+import org.codelibs.fess.crawler.client.http.Authentication;
+import org.codelibs.fess.crawler.client.http.HcHttpClient;
import org.dbflute.cbean.result.ListResultBean;
import org.lastaflute.di.core.SingletonLaContainer;
@@ -232,7 +232,7 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
}
@Override
- public void initializeClientFactory(final S2RobotClientFactory clientFactory) {
+ public void initializeClientFactory(final CrawlerClientFactory clientFactory) {
final WebAuthenticationService webAuthenticationService = SingletonLaContainer.getComponent(WebAuthenticationService.class);
final RequestHeaderService requestHeaderService = SingletonLaContainer.getComponent(RequestHeaderService.class);
@@ -259,11 +259,13 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
// request header
final List requestHeaderList = requestHeaderService.getRequestHeaderList(getId());
- final List rhList = new ArrayList();
+ final List rhList =
+ new ArrayList();
for (final RequestHeader requestHeader : requestHeaderList) {
- rhList.add(requestHeader.getS2RobotRequestHeader());
+ rhList.add(requestHeader.getCrawlerRequestHeader());
}
- paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, rhList.toArray(new org.codelibs.robot.client.http.RequestHeader[rhList.size()]));
+ paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY,
+ rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()]));
}
diff --git a/src/main/java/org/codelibs/fess/exec/Crawler.java b/src/main/java/org/codelibs/fess/exec/Crawler.java
index 049a52fd8..5573a2ba7 100644
--- a/src/main/java/org/codelibs/fess/exec/Crawler.java
+++ b/src/main/java/org/codelibs/fess/exec/Crawler.java
@@ -46,7 +46,7 @@ import org.codelibs.fess.helper.PathMappingHelper;
import org.codelibs.fess.helper.WebFsIndexHelper;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.ResourceUtil;
-import org.codelibs.robot.client.EsClient;
+import org.codelibs.fess.crawler.client.EsClient;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.kohsuke.args4j.CmdLineException;
diff --git a/src/main/java/org/codelibs/fess/helper/RobotLogHelper.java b/src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java
similarity index 71%
rename from src/main/java/org/codelibs/fess/helper/RobotLogHelper.java
rename to src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java
index 664a98415..8f6570a3d 100644
--- a/src/main/java/org/codelibs/fess/helper/RobotLogHelper.java
+++ b/src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java
@@ -19,29 +19,29 @@ package org.codelibs.fess.helper;
import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.es.exentity.CrawlingConfig;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.S2RobotContext;
-import org.codelibs.robot.entity.UrlQueue;
-import org.codelibs.robot.exception.RobotMultipleCrawlAccessException;
-import org.codelibs.robot.helper.impl.LogHelperImpl;
-import org.codelibs.robot.log.LogType;
+import org.codelibs.fess.crawler.CrawlerContext;
+import org.codelibs.fess.crawler.entity.UrlQueue;
+import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
+import org.codelibs.fess.crawler.helper.impl.LogHelperImpl;
+import org.codelibs.fess.crawler.log.LogType;
import org.lastaflute.di.core.SingletonLaContainer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class RobotLogHelper extends LogHelperImpl {
+public class CrawlerLogHelper extends LogHelperImpl {
private static final Logger logger = LoggerFactory // NOPMD
- .getLogger(RobotLogHelper.class);
+ .getLogger(CrawlerLogHelper.class);
@Override
public void log(final LogType key, final Object... objs) {
try {
switch (key) {
case CRAWLING_ACCESS_EXCEPTION: {
- final S2RobotContext robotContext = (S2RobotContext) objs[0];
+ final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue urlQueue = (UrlQueue) objs[1];
Throwable e = (Throwable) objs[2];
- if (e instanceof RobotMultipleCrawlAccessException) {
- final Throwable[] causes = ((RobotMultipleCrawlAccessException) e).getCauses();
+ if (e instanceof MultipleCrawlingAccessException) {
+ final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses();
if (causes.length > 0) {
e = causes[causes.length - 1];
}
@@ -54,15 +54,15 @@ public class RobotLogHelper extends LogHelperImpl {
} else {
errorName = e.getClass().getCanonicalName();
}
- storeFailureUrl(robotContext, urlQueue, errorName, e);
+ storeFailureUrl(crawlerContext, urlQueue, errorName, e);
break;
}
case CRAWLING_EXCETPION: {
- final S2RobotContext robotContext = (S2RobotContext) objs[0];
+ final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue urlQueue = (UrlQueue) objs[1];
final Throwable e = (Throwable) objs[2];
- storeFailureUrl(robotContext, urlQueue, e.getClass().getCanonicalName(), e);
+ storeFailureUrl(crawlerContext, urlQueue, e.getClass().getCanonicalName(), e);
break;
}
default:
@@ -75,9 +75,9 @@ public class RobotLogHelper extends LogHelperImpl {
super.log(key, objs);
}
- private void storeFailureUrl(final S2RobotContext robotContext, final UrlQueue urlQueue, final String errorName, final Throwable e) {
+ private void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue urlQueue, final String errorName, final Throwable e) {
- final CrawlingConfig crawlingConfig = getCrawlingConfig(robotContext.getSessionId());
+ final CrawlingConfig crawlingConfig = getCrawlingConfig(crawlerContext.getSessionId());
final String url = urlQueue.getUrl();
final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class);
diff --git a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java
index 0f36f73d4..7c3505c81 100644
--- a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java
+++ b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java
@@ -132,8 +132,8 @@ public class DataIndexHelper implements Serializable {
while (startedCrawlerNum < dataCrawlingThreadList.size()) {
// Force to stop crawl
if (systemHelper.isForceStop()) {
- for (final DataCrawlingThread s2Robot : dataCrawlingThreadList) {
- s2Robot.stopCrawling();
+ for (final DataCrawlingThread crawlerThread : dataCrawlingThreadList) {
+ crawlerThread.stopCrawling();
}
break;
}
diff --git a/src/main/java/org/codelibs/fess/helper/SystemHelper.java b/src/main/java/org/codelibs/fess/helper/SystemHelper.java
index 4bfd6e217..485c87672 100644
--- a/src/main/java/org/codelibs/fess/helper/SystemHelper.java
+++ b/src/main/java/org/codelibs/fess/helper/SystemHelper.java
@@ -42,7 +42,7 @@ import org.codelibs.fess.Constants;
import org.codelibs.fess.app.service.RoleTypeService;
import org.codelibs.fess.es.exentity.RoleType;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.util.CharUtil;
+import org.codelibs.fess.crawler.util.CharUtil;
import org.lastaflute.di.core.SingletonLaContainer;
import org.lastaflute.web.util.LaRequestUtil;
diff --git a/src/main/java/org/codelibs/fess/helper/ViewHelper.java b/src/main/java/org/codelibs/fess/helper/ViewHelper.java
index f04bc5d20..95e95f9af 100644
--- a/src/main/java/org/codelibs/fess/helper/ViewHelper.java
+++ b/src/main/java/org/codelibs/fess/helper/ViewHelper.java
@@ -56,11 +56,11 @@ import org.codelibs.fess.helper.UserAgentHelper.UserAgentType;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.DocumentUtil;
import org.codelibs.fess.util.ResourceUtil;
-import org.codelibs.robot.builder.RequestDataBuilder;
-import org.codelibs.robot.client.S2RobotClient;
-import org.codelibs.robot.client.S2RobotClientFactory;
-import org.codelibs.robot.entity.ResponseData;
-import org.codelibs.robot.util.CharUtil;
+import org.codelibs.fess.crawler.builder.RequestDataBuilder;
+import org.codelibs.fess.crawler.client.CrawlerClient;
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.util.CharUtil;
import org.lastaflute.di.core.SingletonLaContainer;
import org.lastaflute.taglib.function.LaFunctions;
import org.lastaflute.web.response.StreamResponse;
@@ -508,11 +508,11 @@ public class ViewHelper implements Serializable {
throw new FessSystemException("No crawlingConfig: " + configIdObj);
}
final String url = (String) doc.get(fieldHelper.urlField);
- final S2RobotClientFactory robotClientFactory = SingletonLaContainer.getComponent(S2RobotClientFactory.class);
- config.initializeClientFactory(robotClientFactory);
- final S2RobotClient client = robotClientFactory.getClient(url);
+ final CrawlerClientFactory crawlerClientFactory = SingletonLaContainer.getComponent(CrawlerClientFactory.class);
+ config.initializeClientFactory(crawlerClientFactory);
+ final CrawlerClient client = crawlerClientFactory.getClient(url);
if (client == null) {
- throw new FessSystemException("No S2RobotClient: " + configIdObj + ", url: " + url);
+ throw new FessSystemException("No CrawlerClient: " + configIdObj + ", url: " + url);
}
final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build());
final StreamResponse response = new StreamResponse(StringUtil.EMPTY);
diff --git a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
index d5cd755d1..bf2f7d008 100644
--- a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
+++ b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
@@ -37,11 +37,11 @@ import org.codelibs.fess.es.exentity.FileConfig;
import org.codelibs.fess.es.exentity.WebConfig;
import org.codelibs.fess.indexer.IndexUpdater;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.S2Robot;
-import org.codelibs.robot.S2RobotContext;
-import org.codelibs.robot.service.DataService;
-import org.codelibs.robot.service.UrlFilterService;
-import org.codelibs.robot.service.UrlQueueService;
+import org.codelibs.fess.crawler.Crawler;
+import org.codelibs.fess.crawler.CrawlerContext;
+import org.codelibs.fess.crawler.service.DataService;
+import org.codelibs.fess.crawler.service.UrlFilterService;
+import org.codelibs.fess.crawler.service.UrlQueueService;
import org.lastaflute.di.core.SingletonLaContainer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -81,7 +81,7 @@ public class WebFsIndexHelper implements Serializable {
public int crawlerPriority = Thread.NORM_PRIORITY;
- private final List s2RobotList = Collections.synchronizedList(new ArrayList());
+ private final List crawlerList = Collections.synchronizedList(new ArrayList());
// needed?
@Deprecated
@@ -140,15 +140,15 @@ public class WebFsIndexHelper implements Serializable {
final long startTime = System.currentTimeMillis();
final List sessionIdList = new ArrayList();
- s2RobotList.clear();
- final List s2RobotStatusList = new ArrayList();
+ crawlerList.clear();
+ final List crawlerStatusList = new ArrayList();
// Web
for (final WebConfig webConfig : webConfigList) {
final String sid = crawlingConfigHelper.store(sessionId, webConfig);
- // create s2robot
- final S2Robot s2Robot = SingletonLaContainer.getComponent(S2Robot.class);
- s2Robot.setSessionId(sid);
+ // create crawler
+ final Crawler crawler = SingletonLaContainer.getComponent(Crawler.class);
+ crawler.setSessionId(sid);
sessionIdList.add(sid);
final String urlsStr = webConfig.getUrls();
@@ -160,26 +160,26 @@ public class WebFsIndexHelper implements Serializable {
// interval time
final int intervalTime =
webConfig.getIntervalTime() != null ? webConfig.getIntervalTime() : Constants.DEFAULT_INTERVAL_TIME_FOR_WEB;
- ((FessIntervalController) s2Robot.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime);
+ ((FessIntervalController) crawler.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime);
final String includedUrlsStr = webConfig.getIncludedUrls() != null ? webConfig.getIncludedUrls() : StringUtil.EMPTY;
final String excludedUrlsStr = webConfig.getExcludedUrls() != null ? webConfig.getExcludedUrls() : StringUtil.EMPTY;
// num of threads
- final S2RobotContext robotContext = s2Robot.getRobotContext();
+ final CrawlerContext crawlerContext = crawler.getCrawlerContext();
final int numOfThread =
webConfig.getNumOfThread() != null ? webConfig.getNumOfThread() : Constants.DEFAULT_NUM_OF_THREAD_FOR_WEB;
- robotContext.setNumOfThread(numOfThread);
+ crawlerContext.setNumOfThread(numOfThread);
// depth
final int depth = webConfig.getDepth() != null ? webConfig.getDepth() : -1;
- robotContext.setMaxDepth(depth);
+ crawlerContext.setMaxDepth(depth);
// max count
final long maxCount = webConfig.getMaxAccessCount() != null ? webConfig.getMaxAccessCount() : maxAccessCount;
- robotContext.setMaxAccessCount(maxCount);
+ crawlerContext.setMaxAccessCount(maxCount);
- webConfig.initializeClientFactory(s2Robot.getClientFactory());
+ webConfig.initializeClientFactory(crawler.getClientFactory());
// set urls
final String[] urls = urlsStr.split("[\r\n]");
@@ -187,7 +187,7 @@ public class WebFsIndexHelper implements Serializable {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
if (!urlValue.startsWith("#")) {
- s2Robot.addUrl(urlValue);
+ crawler.addUrl(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Target URL: " + urlValue);
}
@@ -201,7 +201,7 @@ public class WebFsIndexHelper implements Serializable {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
if (!urlValue.startsWith("#")) {
- s2Robot.addIncludeFilter(urlValue);
+ crawler.addIncludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Included URL: " + urlValue);
}
@@ -215,7 +215,7 @@ public class WebFsIndexHelper implements Serializable {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
if (!urlValue.startsWith("#")) {
- s2Robot.addExcludeFilter(urlValue);
+ crawler.addExcludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Excluded URL: " + urlValue);
}
@@ -229,7 +229,7 @@ public class WebFsIndexHelper implements Serializable {
for (final String u : excludedUrlList) {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
- s2Robot.addExcludeFilter(urlValue);
+ crawler.addExcludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Excluded URL from failures: " + urlValue);
}
@@ -241,20 +241,20 @@ public class WebFsIndexHelper implements Serializable {
logger.debug("Crawling " + urlsStr);
}
- s2Robot.setBackground(true);
- s2Robot.setThreadPriority(crawlerPriority);
+ crawler.setBackground(true);
+ crawler.setThreadPriority(crawlerPriority);
- s2RobotList.add(s2Robot);
- s2RobotStatusList.add(Constants.READY);
+ crawlerList.add(crawler);
+ crawlerStatusList.add(Constants.READY);
}
// File
for (final FileConfig fileConfig : fileConfigList) {
final String sid = crawlingConfigHelper.store(sessionId, fileConfig);
- // create s2robot
- final S2Robot s2Robot = SingletonLaContainer.getComponent(S2Robot.class);
- s2Robot.setSessionId(sid);
+ // create crawler
+ final Crawler crawler = SingletonLaContainer.getComponent(Crawler.class);
+ crawler.setSessionId(sid);
sessionIdList.add(sid);
final String pathsStr = fileConfig.getPaths();
@@ -265,26 +265,26 @@ public class WebFsIndexHelper implements Serializable {
final int intervalTime =
fileConfig.getIntervalTime() != null ? fileConfig.getIntervalTime() : Constants.DEFAULT_INTERVAL_TIME_FOR_FS;
- ((FessIntervalController) s2Robot.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime);
+ ((FessIntervalController) crawler.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime);
final String includedPathsStr = fileConfig.getIncludedPaths() != null ? fileConfig.getIncludedPaths() : StringUtil.EMPTY;
final String excludedPathsStr = fileConfig.getExcludedPaths() != null ? fileConfig.getExcludedPaths() : StringUtil.EMPTY;
// num of threads
- final S2RobotContext robotContext = s2Robot.getRobotContext();
+ final CrawlerContext crawlerContext = crawler.getCrawlerContext();
final int numOfThread =
fileConfig.getNumOfThread() != null ? fileConfig.getNumOfThread() : Constants.DEFAULT_NUM_OF_THREAD_FOR_FS;
- robotContext.setNumOfThread(numOfThread);
+ crawlerContext.setNumOfThread(numOfThread);
// depth
final int depth = fileConfig.getDepth() != null ? fileConfig.getDepth() : -1;
- robotContext.setMaxDepth(depth);
+ crawlerContext.setMaxDepth(depth);
// max count
final long maxCount = fileConfig.getMaxAccessCount() != null ? fileConfig.getMaxAccessCount() : maxAccessCount;
- robotContext.setMaxAccessCount(maxCount);
+ crawlerContext.setMaxAccessCount(maxCount);
- fileConfig.initializeClientFactory(s2Robot.getClientFactory());
+ fileConfig.initializeClientFactory(crawler.getClientFactory());
// set paths
final String[] paths = pathsStr.split("[\r\n]");
@@ -299,7 +299,7 @@ public class WebFsIndexHelper implements Serializable {
u = "file:/" + u;
}
}
- s2Robot.addUrl(u);
+ crawler.addUrl(u);
if (logger.isInfoEnabled()) {
logger.info("Target Path: " + u);
}
@@ -321,7 +321,7 @@ public class WebFsIndexHelper implements Serializable {
} else {
urlValue = systemHelper.encodeUrlFilter(line);
}
- s2Robot.addIncludeFilter(urlValue);
+ crawler.addIncludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Included Path: " + urlValue);
}
@@ -345,7 +345,7 @@ public class WebFsIndexHelper implements Serializable {
} else {
urlValue = systemHelper.encodeUrlFilter(line);
}
- s2Robot.addExcludeFilter(urlValue);
+ crawler.addExcludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Excluded Path: " + urlValue);
}
@@ -361,7 +361,7 @@ public class WebFsIndexHelper implements Serializable {
for (final String u : excludedUrlList) {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
- s2Robot.addExcludeFilter(urlValue);
+ crawler.addExcludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Excluded Path from failures: " + urlValue);
}
@@ -373,11 +373,11 @@ public class WebFsIndexHelper implements Serializable {
logger.debug("Crawling " + pathsStr);
}
- s2Robot.setBackground(true);
- s2Robot.setThreadPriority(crawlerPriority);
+ crawler.setBackground(true);
+ crawler.setThreadPriority(crawlerPriority);
- s2RobotList.add(s2Robot);
- s2RobotStatusList.add(Constants.READY);
+ crawlerList.add(crawler);
+ crawlerStatusList.add(Constants.READY);
}
// run index update
@@ -386,7 +386,7 @@ public class WebFsIndexHelper implements Serializable {
indexUpdater.setPriority(indexUpdaterPriority);
indexUpdater.setSessionIdList(sessionIdList);
indexUpdater.setDaemon(true);
- indexUpdater.setS2RobotList(s2RobotList);
+ indexUpdater.setCrawlerList(crawlerList);
for (final BoostDocumentRule rule : boostDocumentRuleService.getAvailableBoostDocumentRuleList()) {
indexUpdater.addBoostDocumentRule(new org.codelibs.fess.indexer.BoostDocumentRule(rule));
}
@@ -394,19 +394,19 @@ public class WebFsIndexHelper implements Serializable {
int startedCrawlerNum = 0;
int activeCrawlerNum = 0;
- while (startedCrawlerNum < s2RobotList.size()) {
+ while (startedCrawlerNum < crawlerList.size()) {
// Force to stop crawl
if (systemHelper.isForceStop()) {
- for (final S2Robot s2Robot : s2RobotList) {
- s2Robot.stop();
+ for (final Crawler crawler : crawlerList) {
+ crawler.stop();
}
break;
}
if (activeCrawlerNum < multiprocessCrawlingCount) {
// start crawling
- s2RobotList.get(startedCrawlerNum).execute();
- s2RobotStatusList.set(startedCrawlerNum, Constants.RUNNING);
+ crawlerList.get(startedCrawlerNum).execute();
+ crawlerStatusList.set(startedCrawlerNum, Constants.RUNNING);
startedCrawlerNum++;
activeCrawlerNum++;
try {
@@ -419,10 +419,10 @@ public class WebFsIndexHelper implements Serializable {
// check status
for (int i = 0; i < startedCrawlerNum; i++) {
- if (!s2RobotList.get(i).getRobotContext().isRunning() && s2RobotStatusList.get(i).equals(Constants.RUNNING)) {
- s2RobotList.get(i).awaitTermination();
- s2RobotStatusList.set(i, Constants.DONE);
- final String sid = s2RobotList.get(i).getRobotContext().getSessionId();
+ if (!crawlerList.get(i).getCrawlerContext().isRunning() && crawlerStatusList.get(i).equals(Constants.RUNNING)) {
+ crawlerList.get(i).awaitTermination();
+ crawlerStatusList.set(i, Constants.DONE);
+ final String sid = crawlerList.get(i).getCrawlerContext().getSessionId();
indexUpdater.addFinishedSessionId(sid);
activeCrawlerNum--;
}
@@ -437,20 +437,20 @@ public class WebFsIndexHelper implements Serializable {
boolean finishedAll = false;
while (!finishedAll) {
finishedAll = true;
- for (int i = 0; i < s2RobotList.size(); i++) {
- s2RobotList.get(i).awaitTermination(crawlingExecutionInterval);
- if (!s2RobotList.get(i).getRobotContext().isRunning() && !s2RobotStatusList.get(i).equals(Constants.DONE)) {
- s2RobotStatusList.set(i, Constants.DONE);
- final String sid = s2RobotList.get(i).getRobotContext().getSessionId();
+ for (int i = 0; i < crawlerList.size(); i++) {
+ crawlerList.get(i).awaitTermination(crawlingExecutionInterval);
+ if (!crawlerList.get(i).getCrawlerContext().isRunning() && !crawlerStatusList.get(i).equals(Constants.DONE)) {
+ crawlerStatusList.set(i, Constants.DONE);
+ final String sid = crawlerList.get(i).getCrawlerContext().getSessionId();
indexUpdater.addFinishedSessionId(sid);
}
- if (!s2RobotStatusList.get(i).equals(Constants.DONE)) {
+ if (!crawlerStatusList.get(i).equals(Constants.DONE)) {
finishedAll = false;
}
}
}
- s2RobotList.clear();
- s2RobotStatusList.clear();
+ crawlerList.clear();
+ crawlerStatusList.clear();
// put cralwing info
final CrawlingSessionHelper crawlingSessionHelper = ComponentUtil.getCrawlingSessionHelper();
diff --git a/src/main/java/org/codelibs/fess/indexer/BoostDocumentRule.java b/src/main/java/org/codelibs/fess/indexer/BoostDocumentRule.java
index 576e4bdb8..0e9335429 100644
--- a/src/main/java/org/codelibs/fess/indexer/BoostDocumentRule.java
+++ b/src/main/java/org/codelibs/fess/indexer/BoostDocumentRule.java
@@ -53,7 +53,11 @@ public class BoostDocumentRule {
return ((Boolean) value).booleanValue();
}
} catch (final Exception e) {
- logger.warn("Failed to parse a doc for boost: " + map, e);
+ if (logger.isDebugEnabled()) {
+ logger.debug("Failed to evaluate \"" + matchExpression + "\" for " + map, e);
+ } else {
+ logger.warn("Failed to evaluate \"" + matchExpression + "\".");
+ }
}
return false;
diff --git a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java
index c512415b6..2010e3e61 100644
--- a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java
+++ b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java
@@ -36,17 +36,17 @@ import org.codelibs.fess.helper.IntervalControlHelper;
import org.codelibs.fess.helper.SearchLogHelper;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.util.ComponentUtil;
-import org.codelibs.robot.S2Robot;
-import org.codelibs.robot.entity.AccessResult;
-import org.codelibs.robot.entity.AccessResultData;
-import org.codelibs.robot.entity.EsAccessResult;
-import org.codelibs.robot.entity.EsUrlQueue;
-import org.codelibs.robot.service.DataService;
-import org.codelibs.robot.service.UrlFilterService;
-import org.codelibs.robot.service.UrlQueueService;
-import org.codelibs.robot.service.impl.EsDataService;
-import org.codelibs.robot.transformer.Transformer;
-import org.codelibs.robot.util.EsResultList;
+import org.codelibs.fess.crawler.Crawler;
+import org.codelibs.fess.crawler.entity.AccessResult;
+import org.codelibs.fess.crawler.entity.AccessResultData;
+import org.codelibs.fess.crawler.entity.EsAccessResult;
+import org.codelibs.fess.crawler.entity.EsUrlQueue;
+import org.codelibs.fess.crawler.service.DataService;
+import org.codelibs.fess.crawler.service.UrlFilterService;
+import org.codelibs.fess.crawler.service.UrlQueueService;
+import org.codelibs.fess.crawler.service.impl.EsDataService;
+import org.codelibs.fess.crawler.transformer.Transformer;
+import org.codelibs.fess.crawler.util.EsResultList;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilder;
@@ -122,7 +122,7 @@ public class IndexUpdater extends Thread {
private final Map docValueMap = new HashMap();
- private List s2RobotList;
+ private List crawlerList;
public IndexUpdater() {
// nothing
@@ -176,7 +176,7 @@ public class IndexUpdater extends Thread {
.boolFilter()
.must(FilterBuilders.termsFilter(EsAccessResult.SESSION_ID, sessionIdList))
.must(FilterBuilders.termFilter(EsAccessResult.STATUS,
- org.codelibs.robot.Constants.OK_STATUS)));
+ org.codelibs.fess.crawler.Constants.OK_STATUS)));
builder.setQuery(queryBuilder);
builder.setFrom(0);
if (maxDocumentCacheSize <= 0) {
@@ -507,8 +507,8 @@ public class IndexUpdater extends Thread {
private void forceStop() {
systemHelper.setForceStop(true);
- for (final S2Robot s2Robot : s2RobotList) {
- s2Robot.stop();
+ for (final Crawler crawler : crawlerList) {
+ crawler.stop();
}
}
@@ -557,7 +557,7 @@ public class IndexUpdater extends Thread {
docValueMap.put(fieldName, value);
}
- public void setS2RobotList(final List s2RobotList) {
- this.s2RobotList = s2RobotList;
+ public void setCrawlerList(final List crawlerList) {
+ this.crawlerList = crawlerList;
}
}
diff --git a/src/main/java/org/codelibs/fess/util/ComponentUtil.java b/src/main/java/org/codelibs/fess/util/ComponentUtil.java
index 2f8901bba..13c4b015b 100644
--- a/src/main/java/org/codelibs/fess/util/ComponentUtil.java
+++ b/src/main/java/org/codelibs/fess/util/ComponentUtil.java
@@ -45,9 +45,9 @@ import org.codelibs.fess.helper.UserAgentHelper;
import org.codelibs.fess.helper.ViewHelper;
import org.codelibs.fess.indexer.IndexUpdater;
import org.codelibs.fess.job.JobExecutor;
-import org.codelibs.robot.entity.EsAccessResult;
-import org.codelibs.robot.extractor.ExtractorFactory;
-import org.codelibs.robot.service.DataService;
+import org.codelibs.fess.crawler.entity.EsAccessResult;
+import org.codelibs.fess.crawler.extractor.ExtractorFactory;
+import org.codelibs.fess.crawler.service.DataService;
import org.lastaflute.core.message.MessageManager;
import org.lastaflute.di.core.SingletonLaContainer;
import org.lastaflute.di.core.factory.SingletonLaContainerFactory;
diff --git a/src/main/java/org/codelibs/fess/util/ResourceUtil.java b/src/main/java/org/codelibs/fess/util/ResourceUtil.java
index 586fff005..959655914 100644
--- a/src/main/java/org/codelibs/fess/util/ResourceUtil.java
+++ b/src/main/java/org/codelibs/fess/util/ResourceUtil.java
@@ -73,19 +73,19 @@ public class ResourceUtil {
} catch (final Throwable e) { // NOSONAR
// ignore
}
- Path path = Paths.get(".", names);
- if (Files.exists(path)) {
- return path;
+ final Path defaultPath = Paths.get("WEB-INF/" + base, names);
+ if (Files.exists(defaultPath)) {
+ return defaultPath;
}
- path = Paths.get("src/main/webapps/WEB-INF/" + base, names);
- if (Files.exists(path)) {
- return path;
+ final Path srcBasePath = Paths.get("src/main/webapps/WEB-INF/" + base, names);
+ if (Files.exists(srcBasePath)) {
+ return srcBasePath;
}
- path = Paths.get("target/fess/WEB-INF/" + base, names);
- if (Files.exists(path)) {
- return path;
+ final Path targetBasePath = Paths.get("target/fess/WEB-INF/" + base, names);
+ if (Files.exists(targetBasePath)) {
+ return targetBasePath;
}
- return path;
+ return defaultPath;
}
public static File[] getJarFiles(final String namePrefix) {
diff --git a/src/main/resources/app.xml b/src/main/resources/app.xml
index 393e06e7e..4abe69586 100644
--- a/src/main/resources/app.xml
+++ b/src/main/resources/app.xml
@@ -10,8 +10,8 @@
-
-
+
+
diff --git a/src/main/resources/crawler.xml b/src/main/resources/crawler.xml
new file mode 100644
index 000000000..007040aae
--- /dev/null
+++ b/src/main/resources/crawler.xml
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ".crawler"
+ "queue"
+
+
+ ".crawler"
+ "data"
+
+
+ ".crawler"
+ "filter"
+
+
+
diff --git a/src/main/resources/s2robot/contentlength.xml b/src/main/resources/crawler/contentlength.xml
similarity index 70%
rename from src/main/resources/s2robot/contentlength.xml
rename to src/main/resources/crawler/contentlength.xml
index 7c314d1d6..d155cbb86 100644
--- a/src/main/resources/s2robot/contentlength.xml
+++ b/src/main/resources/crawler/contentlength.xml
@@ -1,11 +1,11 @@
-
-
+
+
+ class="org.codelibs.fess.crawler.helper.ContentLengthHelper" instance="singleton">
10485760
"text/html"
diff --git a/src/main/resources/s2robot/es.xml b/src/main/resources/crawler/es.xml
similarity index 80%
rename from src/main/resources/s2robot/es.xml
rename to src/main/resources/crawler/es.xml
index 4d9d756b9..1bc8475aa 100644
--- a/src/main/resources/s2robot/es.xml
+++ b/src/main/resources/crawler/es.xml
@@ -3,6 +3,6 @@
"http://dbflute.org/meta/lastadi10.dtd">
+ class="org.codelibs.fess.crawler.client.EsClient">
diff --git a/src/main/resources/s2robot/interval.xml b/src/main/resources/crawler/interval.xml
similarity index 79%
rename from src/main/resources/s2robot/interval.xml
rename to src/main/resources/crawler/interval.xml
index 33eceda48..fb4df5452 100644
--- a/src/main/resources/s2robot/interval.xml
+++ b/src/main/resources/crawler/interval.xml
@@ -1,8 +1,8 @@
-
-
+
+
-
-
+
+
+ class="org.codelibs.fess.helper.CrawlerLogHelper">
diff --git a/src/main/resources/s2robot/rule.xml b/src/main/resources/crawler/rule.xml
similarity index 75%
rename from src/main/resources/s2robot/rule.xml
rename to src/main/resources/crawler/rule.xml
index 4c905265c..22315b477 100644
--- a/src/main/resources/s2robot/rule.xml
+++ b/src/main/resources/crawler/rule.xml
@@ -1,11 +1,11 @@
-
-
-
+
+
+
-
+
sitemapsRule
@@ -23,10 +23,10 @@
-
+
"sitemapsRule"
-
+
@@ -35,10 +35,10 @@
-
+
"webHtmlRule"
-
+
fessXpathTransformer
(int[])[200]
(int[])[304]
@@ -56,10 +56,10 @@
-
+
"webFileRule"
-
+
fessFileTransformer
(int[])[200]
(int[])[304]
@@ -85,10 +85,10 @@
-
+
"fsFileRule"
-
+
fessFileTransformer
(int[])[200]
(int[])[304]
@@ -116,10 +116,10 @@
-
+
"defaultRule"
-
+
fessTikaTransformer
(int[])[200]
(int[])[304]
diff --git a/src/main/resources/s2robot/transformer.xml b/src/main/resources/crawler/transformer.xml
similarity index 97%
rename from src/main/resources/s2robot/transformer.xml
rename to src/main/resources/crawler/transformer.xml
index a2e68741f..67d274c26 100644
--- a/src/main/resources/s2robot/transformer.xml
+++ b/src/main/resources/crawler/transformer.xml
@@ -1,8 +1,8 @@
-
-
+
+
diff --git a/src/main/resources/s2robot_es.xml b/src/main/resources/s2robot_es.xml
deleted file mode 100644
index 3dfdb1708..000000000
--- a/src/main/resources/s2robot_es.xml
+++ /dev/null
@@ -1,52 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ".robot"
- "queue"
-
-
- ".robot"
- "data"
-
-
- ".robot"
- "filter"
-
-
-
diff --git a/src/main/webapp/WEB-INF/cmd/resources/app.xml b/src/main/webapp/WEB-INF/cmd/resources/app.xml
index 86e3d2d6b..6e7d0a54f 100644
--- a/src/main/webapp/WEB-INF/cmd/resources/app.xml
+++ b/src/main/webapp/WEB-INF/cmd/resources/app.xml
@@ -5,7 +5,7 @@
-
+