diff --git a/pom.xml b/pom.xml index 00771dbce..7b8180a7f 100644 --- a/pom.xml +++ b/pom.xml @@ -55,8 +55,8 @@ 4.8.2 0.5.2 - - 1.0.0-SNAPSHOT + + 1.0.0-SNAPSHOT 1.6 3.11-beta2 1.8.7 @@ -466,11 +466,11 @@ 2.2.1 - + - org.codelibs.robot - s2robot-lasta - ${s2robot.version} + org.codelibs.fess + fess-crawler-lasta + ${crawler.version} commons-logging @@ -479,9 +479,9 @@ - org.codelibs.robot - s2robot-es - ${s2robot.version} + org.codelibs.fess + fess-crawler-es + ${crawler.version} org.bouncycastle diff --git a/src/main/java/org/codelibs/fess/app/web/admin/webconfig/WebConfigEditForm.java b/src/main/java/org/codelibs/fess/app/web/admin/webconfig/WebConfigEditForm.java index 69ff23b6d..b35a014e3 100644 --- a/src/main/java/org/codelibs/fess/app/web/admin/webconfig/WebConfigEditForm.java +++ b/src/main/java/org/codelibs/fess/app/web/admin/webconfig/WebConfigEditForm.java @@ -139,7 +139,7 @@ public class WebConfigEditForm implements Serializable { sortOrder = "0"; userAgent = ComponentUtil.getUserAgentName(); if (StringUtil.isBlank(userAgent)) { - userAgent = "Fess Robot/" + Constants.FESS_VERSION; + userAgent = "FessCrawler/" + Constants.FESS_VERSION; } numOfThread = Integer.toString(Constants.DEFAULT_NUM_OF_THREAD_FOR_WEB); intervalTime = Integer.toString(Constants.DEFAULT_INTERVAL_TIME_FOR_WEB); diff --git a/src/main/java/org/codelibs/fess/app/web/admin/wizard/AdminWizardAction.java b/src/main/java/org/codelibs/fess/app/web/admin/wizard/AdminWizardAction.java index 3b32d6b47..61b9434a7 100644 --- a/src/main/java/org/codelibs/fess/app/web/admin/wizard/AdminWizardAction.java +++ b/src/main/java/org/codelibs/fess/app/web/admin/wizard/AdminWizardAction.java @@ -39,7 +39,7 @@ import org.codelibs.fess.helper.JobHelper; import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.job.TriggeredJob; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.util.CharUtil; +import org.codelibs.fess.crawler.util.CharUtil; import org.lastaflute.web.Execute; import org.lastaflute.web.callback.ActionRuntime; import org.lastaflute.web.response.HtmlResponse; diff --git a/src/main/java/org/codelibs/fess/app/web/go/GoAction.java b/src/main/java/org/codelibs/fess/app/web/go/GoAction.java index 280f65008..5a8799d1a 100644 --- a/src/main/java/org/codelibs/fess/app/web/go/GoAction.java +++ b/src/main/java/org/codelibs/fess/app/web/go/GoAction.java @@ -34,7 +34,7 @@ import org.codelibs.fess.helper.SearchLogHelper; import org.codelibs.fess.helper.ViewHelper; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.DocumentUtil; -import org.codelibs.robot.util.CharUtil; +import org.codelibs.fess.crawler.util.CharUtil; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.TermQueryBuilder; import org.lastaflute.web.Execute; diff --git a/src/main/java/org/codelibs/fess/crawler/FessS2RobotThread.java b/src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java similarity index 90% rename from src/main/java/org/codelibs/fess/crawler/FessS2RobotThread.java rename to src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java index 25155166e..932960dc7 100644 --- a/src/main/java/org/codelibs/fess/crawler/FessS2RobotThread.java +++ b/src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java @@ -36,29 +36,29 @@ import org.codelibs.fess.helper.FieldHelper; import org.codelibs.fess.helper.IndexingHelper; import org.codelibs.fess.helper.SambaHelper; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.S2RobotThread; -import org.codelibs.robot.builder.RequestDataBuilder; -import org.codelibs.robot.client.S2RobotClient; -import org.codelibs.robot.client.smb.SmbClient; -import org.codelibs.robot.entity.RequestData; -import org.codelibs.robot.entity.ResponseData; -import org.codelibs.robot.entity.UrlQueue; -import org.codelibs.robot.log.LogType; +import org.codelibs.fess.crawler.CrawlerThread; +import org.codelibs.fess.crawler.builder.RequestDataBuilder; +import org.codelibs.fess.crawler.client.CrawlerClient; +import org.codelibs.fess.crawler.client.smb.SmbClient; +import org.codelibs.fess.crawler.entity.RequestData; +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.entity.UrlQueue; +import org.codelibs.fess.crawler.log.LogType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import jcifs.smb.ACE; import jcifs.smb.SID; -public class FessS2RobotThread extends S2RobotThread { - private static final Logger logger = LoggerFactory.getLogger(FessS2RobotThread.class); +public class FessCrawlerThread extends CrawlerThread { + private static final Logger logger = LoggerFactory.getLogger(FessCrawlerThread.class); @Override - protected boolean isContentUpdated(final S2RobotClient client, final UrlQueue urlQueue) { + protected boolean isContentUpdated(final CrawlerClient client, final UrlQueue urlQueue) { final DynamicProperties crawlerProperties = ComponentUtil.getCrawlerProperties(); if (crawlerProperties.getProperty(Constants.DIFF_CRAWLING_PROPERTY, Constants.TRUE).equals(Constants.TRUE)) { - log(logHelper, LogType.CHECK_LAST_MODIFIED, robotContext, urlQueue); + log(logHelper, LogType.CHECK_LAST_MODIFIED, crawlerContext, urlQueue); final long startTime = System.currentTimeMillis(); final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper(); @@ -72,7 +72,7 @@ public class FessS2RobotThread extends S2RobotThread { final String url = urlQueue.getUrl(); ResponseData responseData = null; try { - final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(robotContext.getSessionId()); + final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(crawlerContext.getSessionId()); final Map dataMap = new HashMap(); dataMap.put(fieldHelper.urlField, url); final List roleTypeList = new ArrayList(); @@ -140,12 +140,12 @@ public class FessS2RobotThread extends S2RobotThread { return true; } else if (responseData.getLastModified().getTime() <= lastModified.getTime() && httpStatusCode == 200) { - log(logHelper, LogType.NOT_MODIFIED, robotContext, urlQueue); + log(logHelper, LogType.NOT_MODIFIED, crawlerContext, urlQueue); responseData.setExecutionTime(System.currentTimeMillis() - startTime); responseData.setParentUrl(urlQueue.getParentUrl()); - responseData.setSessionId(robotContext.getSessionId()); - responseData.setHttpStatusCode(org.codelibs.robot.Constants.NOT_MODIFIED_STATUS); + responseData.setSessionId(crawlerContext.getSessionId()); + responseData.setHttpStatusCode(org.codelibs.fess.crawler.Constants.NOT_MODIFIED_STATUS); processResponse(urlQueue, responseData); storeChildUrlsToQueue(urlQueue, getAnchorSet(document.get(fieldHelper.anchorField))); @@ -163,7 +163,7 @@ public class FessS2RobotThread extends S2RobotThread { protected void storeChildUrlsToQueue(final UrlQueue urlQueue, final Set childUrlSet) { if (childUrlSet != null) { - synchronized (robotContext.getAccessCountLock()) { + synchronized (crawlerContext.getAccessCountLock()) { // add an url storeChildUrls(childUrlSet, urlQueue.getUrl(), urlQueue.getDepth() != null ? urlQueue.getDepth() + 1 : 1); } diff --git a/src/main/java/org/codelibs/fess/crawler/interval/FessIntervalController.java b/src/main/java/org/codelibs/fess/crawler/interval/FessIntervalController.java index d9a0f68c7..25739895b 100644 --- a/src/main/java/org/codelibs/fess/crawler/interval/FessIntervalController.java +++ b/src/main/java/org/codelibs/fess/crawler/interval/FessIntervalController.java @@ -18,7 +18,7 @@ package org.codelibs.fess.crawler.interval; import org.codelibs.fess.helper.IntervalControlHelper; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.interval.impl.DefaultIntervalController; +import org.codelibs.fess.crawler.interval.impl.DefaultIntervalController; public class FessIntervalController extends DefaultIntervalController { diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java index 29ad610cb..cdb501869 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java @@ -48,17 +48,17 @@ import org.codelibs.fess.helper.PathMappingHelper; import org.codelibs.fess.helper.SambaHelper; import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.client.smb.SmbClient; -import org.codelibs.robot.entity.AccessResult; -import org.codelibs.robot.entity.AccessResultData; -import org.codelibs.robot.entity.ExtractData; -import org.codelibs.robot.entity.ResponseData; -import org.codelibs.robot.entity.ResultData; -import org.codelibs.robot.entity.UrlQueue; -import org.codelibs.robot.exception.RobotCrawlAccessException; -import org.codelibs.robot.exception.RobotSystemException; -import org.codelibs.robot.extractor.Extractor; -import org.codelibs.robot.util.CrawlingParameterUtil; +import org.codelibs.fess.crawler.client.smb.SmbClient; +import org.codelibs.fess.crawler.entity.AccessResult; +import org.codelibs.fess.crawler.entity.AccessResultData; +import org.codelibs.fess.crawler.entity.ExtractData; +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.entity.ResultData; +import org.codelibs.fess.crawler.entity.UrlQueue; +import org.codelibs.fess.crawler.exception.CrawlingAccessException; +import org.codelibs.fess.crawler.exception.CrawlerSystemException; +import org.codelibs.fess.crawler.extractor.Extractor; +import org.codelibs.fess.crawler.util.CrawlingParameterUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -96,7 +96,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans @Override public ResultData transform(final ResponseData responseData) { if (responseData == null || responseData.getResponseBody() == null) { - throw new RobotCrawlAccessException("No response body."); + throw new CrawlingAccessException("No response body."); } final Extractor extractor = getExtractor(responseData); @@ -144,8 +144,8 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans } } } catch (final Exception e) { - final RobotCrawlAccessException rcae = new RobotCrawlAccessException("Could not get a text from " + responseData.getUrl(), e); - rcae.setLogLevel(RobotCrawlAccessException.WARN); + final CrawlingAccessException rcae = new CrawlingAccessException("Could not get a text from " + responseData.getUrl(), e); + rcae.setLogLevel(CrawlingAccessException.WARN); throw rcae; } finally { IOUtils.closeQuietly(in); @@ -323,7 +323,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans try { resultData.setData(SerializeUtil.fromObjectToBinary(dataMap)); } catch (final Exception e) { - throw new RobotCrawlAccessException("Could not serialize object: " + url, e); + throw new CrawlingAccessException("Could not serialize object: " + url, e); } resultData.setEncoding(charsetName); @@ -466,7 +466,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans try { return SerializeUtil.fromBinaryToObject(data); } catch (final Exception e) { - throw new RobotSystemException("Could not create an instanced from bytes.", e); + throw new CrawlerSystemException("Could not create an instanced from bytes.", e); } } return new HashMap(); diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessXpathTransformer.java index 343a3e53b..7be789a4c 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessXpathTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessXpathTransformer.java @@ -25,7 +25,7 @@ import org.apache.commons.lang3.StringUtils; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.helper.FieldHelper; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.transformer.impl.XpathTransformer; +import org.codelibs.fess.crawler.transformer.impl.XpathTransformer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java index 4a538cfe5..e5c13e0d4 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java @@ -18,9 +18,9 @@ package org.codelibs.fess.crawler.transformer; import org.codelibs.fess.exception.FessSystemException; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.entity.ResponseData; -import org.codelibs.robot.extractor.Extractor; -import org.codelibs.robot.extractor.ExtractorFactory; +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.extractor.Extractor; +import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java index 86e6c8b14..b3d58154a 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java @@ -17,8 +17,8 @@ package org.codelibs.fess.crawler.transformer; import org.codelibs.fess.exception.FessSystemException; -import org.codelibs.robot.entity.ResponseData; -import org.codelibs.robot.extractor.Extractor; +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.extractor.Extractor; import org.lastaflute.di.core.SingletonLaContainer; public class FessTikaTransformer extends AbstractFessFileTransformer { diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java index ef6873444..d626061a2 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java @@ -50,17 +50,17 @@ import org.codelibs.fess.helper.OverlappingHostHelper; import org.codelibs.fess.helper.PathMappingHelper; import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.builder.RequestDataBuilder; -import org.codelibs.robot.entity.AccessResultData; -import org.codelibs.robot.entity.RequestData; -import org.codelibs.robot.entity.ResponseData; -import org.codelibs.robot.entity.ResultData; -import org.codelibs.robot.entity.UrlQueue; -import org.codelibs.robot.exception.ChildUrlsException; -import org.codelibs.robot.exception.RobotCrawlAccessException; -import org.codelibs.robot.exception.RobotSystemException; -import org.codelibs.robot.util.CrawlingParameterUtil; -import org.codelibs.robot.util.ResponseDataUtil; +import org.codelibs.fess.crawler.builder.RequestDataBuilder; +import org.codelibs.fess.crawler.entity.AccessResultData; +import org.codelibs.fess.crawler.entity.RequestData; +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.entity.ResultData; +import org.codelibs.fess.crawler.entity.UrlQueue; +import org.codelibs.fess.crawler.exception.ChildUrlsException; +import org.codelibs.fess.crawler.exception.CrawlingAccessException; +import org.codelibs.fess.crawler.exception.CrawlerSystemException; +import org.codelibs.fess.crawler.util.CrawlingParameterUtil; +import org.codelibs.fess.crawler.util.ResponseDataUtil; import org.cyberneko.html.parsers.DOMParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -116,7 +116,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { } parser.parse(is); } catch (final Exception e) { - throw new RobotCrawlAccessException("Could not parse " + responseData.getUrl(), e); + throw new CrawlingAccessException("Could not parse " + responseData.getUrl(), e); } finally { IOUtils.closeQuietly(bis); } @@ -172,7 +172,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { try { resultData.setData(SerializeUtil.fromObjectToBinary(dataMap)); } catch (final Exception e) { - throw new RobotCrawlAccessException("Could not serialize object: " + responseData.getUrl(), e); + throw new CrawlingAccessException("Could not serialize object: " + responseData.getUrl(), e); } resultData.setEncoding(charsetName); } finally { @@ -500,7 +500,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { try { return SerializeUtil.fromBinaryToObject(data); } catch (final Exception e) { - throw new RobotSystemException("Could not create an instanced from bytes.", e); + throw new CrawlerSystemException("Could not create an instanced from bytes.", e); } } return new HashMap(); diff --git a/src/main/java/org/codelibs/fess/ds/DataStoreCrawlingException.java b/src/main/java/org/codelibs/fess/ds/DataStoreCrawlingException.java index 4c76f6bc6..a06af4481 100644 --- a/src/main/java/org/codelibs/fess/ds/DataStoreCrawlingException.java +++ b/src/main/java/org/codelibs/fess/ds/DataStoreCrawlingException.java @@ -16,9 +16,9 @@ package org.codelibs.fess.ds; -import org.codelibs.robot.exception.RobotCrawlAccessException; +import org.codelibs.fess.crawler.exception.CrawlingAccessException; -public class DataStoreCrawlingException extends RobotCrawlAccessException { +public class DataStoreCrawlingException extends CrawlingAccessException { private static final long serialVersionUID = 1L; diff --git a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java index 7e6b7e08b..53f1cb38e 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java @@ -36,8 +36,8 @@ import org.codelibs.fess.ds.DataStoreCrawlingException; import org.codelibs.fess.ds.DataStoreException; import org.codelibs.fess.ds.IndexUpdateCallback; import org.codelibs.fess.es.exentity.DataConfig; -import org.codelibs.robot.exception.RobotCrawlAccessException; -import org.codelibs.robot.exception.RobotMultipleCrawlAccessException; +import org.codelibs.fess.crawler.exception.CrawlingAccessException; +import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException; import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -230,10 +230,10 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl { try { loop = callback.store(dataMap); - } catch (final RobotCrawlAccessException e) { + } catch (final CrawlingAccessException e) { Throwable target = e; - if (target instanceof RobotMultipleCrawlAccessException) { - final Throwable[] causes = ((RobotMultipleCrawlAccessException) target).getCauses(); + if (target instanceof MultipleCrawlingAccessException) { + final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses(); if (causes.length > 0) { target = causes[causes.length - 1]; } diff --git a/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java index 3ee29749d..ea3a8215c 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java @@ -34,17 +34,17 @@ import org.codelibs.fess.helper.CrawlingSessionHelper; import org.codelibs.fess.helper.FieldHelper; import org.codelibs.fess.helper.IndexingHelper; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.builder.RequestDataBuilder; -import org.codelibs.robot.client.S2RobotClient; -import org.codelibs.robot.client.S2RobotClientFactory; -import org.codelibs.robot.entity.ResponseData; -import org.codelibs.robot.entity.ResultData; -import org.codelibs.robot.exception.RobotSystemException; -import org.codelibs.robot.processor.ResponseProcessor; -import org.codelibs.robot.processor.impl.DefaultResponseProcessor; -import org.codelibs.robot.rule.Rule; -import org.codelibs.robot.rule.RuleManager; -import org.codelibs.robot.transformer.Transformer; +import org.codelibs.fess.crawler.builder.RequestDataBuilder; +import org.codelibs.fess.crawler.client.CrawlerClient; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.entity.ResultData; +import org.codelibs.fess.crawler.exception.CrawlerSystemException; +import org.codelibs.fess.crawler.processor.ResponseProcessor; +import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor; +import org.codelibs.fess.crawler.rule.Rule; +import org.codelibs.fess.crawler.rule.RuleManager; +import org.codelibs.fess.crawler.transformer.Transformer; import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,7 +71,7 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl { public int maxDeleteDocumentCacheSize = 100; - protected S2RobotClientFactory robotClientFactory; + protected CrawlerClientFactory crawlerClientFactory; protected CrawlingSessionHelper crawlingSessionHelper; @@ -92,9 +92,9 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl { @Override public void store(final DataConfig config, final IndexUpdateCallback callback, final Map initParamMap) { - robotClientFactory = SingletonLaContainer.getComponent(S2RobotClientFactory.class); + crawlerClientFactory = SingletonLaContainer.getComponent(CrawlerClientFactory.class); - config.initializeClientFactory(robotClientFactory); + config.initializeClientFactory(crawlerClientFactory); super.store(config, callback, initParamMap); } @@ -170,9 +170,9 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl { final String url = dataMap.get(fieldHelper.urlField).toString(); try { - final S2RobotClient client = robotClientFactory.getClient(url); + final CrawlerClient client = crawlerClientFactory.getClient(url); if (client == null) { - logger.warn("S2RobotClient is null. Data: " + dataMap); + logger.warn("CrawlerClient is null. Data: " + dataMap); return false; } @@ -200,7 +200,7 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl { (Map) SerializeUtil.fromBinaryToObject(data); dataMap.putAll(responseDataMap); } catch (final Exception e) { - throw new RobotSystemException("Could not create an instance from bytes.", e); + throw new CrawlerSystemException("Could not create an instance from bytes.", e); } } diff --git a/src/main/java/org/codelibs/fess/es/exentity/CrawlingConfig.java b/src/main/java/org/codelibs/fess/es/exentity/CrawlingConfig.java index f678a99b4..be4e8beaa 100644 --- a/src/main/java/org/codelibs/fess/es/exentity/CrawlingConfig.java +++ b/src/main/java/org/codelibs/fess/es/exentity/CrawlingConfig.java @@ -2,7 +2,7 @@ package org.codelibs.fess.es.exentity; import java.util.Map; -import org.codelibs.robot.client.S2RobotClientFactory; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; public interface CrawlingConfig { @@ -20,7 +20,7 @@ public interface CrawlingConfig { String getConfigId(); - void initializeClientFactory(S2RobotClientFactory s2RobotClientFactory); + void initializeClientFactory(CrawlerClientFactory crawlerClientFactory); Map getConfigParameterMap(ConfigName name); diff --git a/src/main/java/org/codelibs/fess/es/exentity/DataConfig.java b/src/main/java/org/codelibs/fess/es/exentity/DataConfig.java index 79300d71c..b498e81ae 100644 --- a/src/main/java/org/codelibs/fess/es/exentity/DataConfig.java +++ b/src/main/java/org/codelibs/fess/es/exentity/DataConfig.java @@ -24,13 +24,13 @@ import org.codelibs.fess.es.exbhv.LabelTypeBhv; import org.codelibs.fess.es.exbhv.RoleTypeBhv; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ParameterUtil; -import org.codelibs.robot.client.S2RobotClientFactory; -import org.codelibs.robot.client.http.Authentication; -import org.codelibs.robot.client.http.HcHttpClient; -import org.codelibs.robot.client.http.impl.AuthenticationImpl; -import org.codelibs.robot.client.http.ntlm.JcifsEngine; -import org.codelibs.robot.client.smb.SmbAuthentication; -import org.codelibs.robot.client.smb.SmbClient; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.crawler.client.http.Authentication; +import org.codelibs.fess.crawler.client.http.HcHttpClient; +import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl; +import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine; +import org.codelibs.fess.crawler.client.smb.SmbAuthentication; +import org.codelibs.fess.crawler.client.smb.SmbClient; import org.dbflute.cbean.result.ListResultBean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,15 +44,15 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig { private static final Logger logger = LoggerFactory.getLogger(DataConfig.class); - private static final String S2ROBOT_WEB_HEADER_PREFIX = "s2robot.web.header."; + private static final String S2ROBOT_WEB_HEADER_PREFIX = "crawler.web.header."; - private static final String S2ROBOT_WEB_AUTH = "s2robot.web.auth"; + private static final String S2ROBOT_WEB_AUTH = "crawler.web.auth"; - private static final String S2ROBOT_USERAGENT = "s2robot.useragent"; + private static final String S2ROBOT_USERAGENT = "crawler.useragent"; - private static final String S2ROBOT_PARAM_PREFIX = "s2robot.param."; + private static final String S2ROBOT_PARAM_PREFIX = "crawler.param."; - private static final Object S2ROBOT_FILE_AUTH = "s2robot.file.auth"; + private static final Object S2ROBOT_FILE_AUTH = "crawler.file.auth"; private String[] labelTypeIds; @@ -211,11 +211,11 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig { } @Override - public void initializeClientFactory(final S2RobotClientFactory robotClientFactory) { + public void initializeClientFactory(final CrawlerClientFactory crawlerClientFactory) { final Map paramMap = getHandlerParameterMap(); final Map factoryParamMap = new HashMap(); - robotClientFactory.setInitParameterMap(factoryParamMap); + crawlerClientFactory.setInitParameterMap(factoryParamMap); // parameters for (final Map.Entry entry : paramMap.entrySet()) { @@ -301,18 +301,19 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig { } // request header - final List rhList = new ArrayList(); + final List rhList = + new ArrayList(); int count = 1; String headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count + ".name"); while (StringUtil.isNotBlank(headerName)) { final String headerValue = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count + ".value"); - rhList.add(new org.codelibs.robot.client.http.RequestHeader(headerName, headerValue)); + rhList.add(new org.codelibs.fess.crawler.client.http.RequestHeader(headerName, headerValue)); count++; headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count + ".name"); } if (!rhList.isEmpty()) { factoryParamMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, - rhList.toArray(new org.codelibs.robot.client.http.RequestHeader[rhList.size()])); + rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()])); } // file auth diff --git a/src/main/java/org/codelibs/fess/es/exentity/FileConfig.java b/src/main/java/org/codelibs/fess/es/exentity/FileConfig.java index 7ef16f246..be64db551 100644 --- a/src/main/java/org/codelibs/fess/es/exentity/FileConfig.java +++ b/src/main/java/org/codelibs/fess/es/exentity/FileConfig.java @@ -18,9 +18,9 @@ import org.codelibs.fess.es.exbhv.RoleTypeBhv; import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ParameterUtil; -import org.codelibs.robot.client.S2RobotClientFactory; -import org.codelibs.robot.client.smb.SmbAuthentication; -import org.codelibs.robot.client.smb.SmbClient; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.crawler.client.smb.SmbAuthentication; +import org.codelibs.fess.crawler.client.smb.SmbClient; import org.dbflute.cbean.result.ListResultBean; import org.lastaflute.di.core.SingletonLaContainer; @@ -228,7 +228,7 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig { } @Override - public void initializeClientFactory(final S2RobotClientFactory clientFactory) { + public void initializeClientFactory(final CrawlerClientFactory clientFactory) { final FileAuthenticationService fileAuthenticationService = SingletonLaContainer.getComponent(FileAuthenticationService.class); // Parameters diff --git a/src/main/java/org/codelibs/fess/es/exentity/RequestHeader.java b/src/main/java/org/codelibs/fess/es/exentity/RequestHeader.java index 263640e25..e67153778 100644 --- a/src/main/java/org/codelibs/fess/es/exentity/RequestHeader.java +++ b/src/main/java/org/codelibs/fess/es/exentity/RequestHeader.java @@ -30,8 +30,8 @@ public class RequestHeader extends BsRequestHeader { asDocMeta().version(version); } - public org.codelibs.robot.client.http.RequestHeader getS2RobotRequestHeader() { - return new org.codelibs.robot.client.http.RequestHeader(getName(), getValue()); + public org.codelibs.fess.crawler.client.http.RequestHeader getCrawlerRequestHeader() { + return new org.codelibs.fess.crawler.client.http.RequestHeader(getName(), getValue()); } public WebConfig getWebConfig() { diff --git a/src/main/java/org/codelibs/fess/es/exentity/WebAuthentication.java b/src/main/java/org/codelibs/fess/es/exentity/WebAuthentication.java index 481db5aed..fd220bacb 100644 --- a/src/main/java/org/codelibs/fess/es/exentity/WebAuthentication.java +++ b/src/main/java/org/codelibs/fess/es/exentity/WebAuthentication.java @@ -16,10 +16,10 @@ import org.codelibs.fess.app.service.WebConfigService; import org.codelibs.fess.es.bsentity.BsWebAuthentication; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ParameterUtil; -import org.codelibs.robot.client.http.Authentication; -import org.codelibs.robot.client.http.impl.AuthenticationImpl; -import org.codelibs.robot.client.http.ntlm.JcifsEngine; -import org.codelibs.robot.exception.RobotSystemException; +import org.codelibs.fess.crawler.client.http.Authentication; +import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl; +import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine; +import org.codelibs.fess.crawler.exception.CrawlerSystemException; /** * @author FreeGen @@ -71,7 +71,7 @@ public class WebAuthentication extends BsWebAuthentication { private Credentials getCredentials() { if (StringUtil.isEmpty(getUsername())) { - throw new RobotSystemException("username is empty."); + throw new CrawlerSystemException("username is empty."); } if (Constants.NTLM.equals(getProtocolScheme())) { diff --git a/src/main/java/org/codelibs/fess/es/exentity/WebConfig.java b/src/main/java/org/codelibs/fess/es/exentity/WebConfig.java index feee2c79b..6bd97f37f 100644 --- a/src/main/java/org/codelibs/fess/es/exentity/WebConfig.java +++ b/src/main/java/org/codelibs/fess/es/exentity/WebConfig.java @@ -18,9 +18,9 @@ import org.codelibs.fess.es.exbhv.WebConfigToLabelBhv; import org.codelibs.fess.es.exbhv.WebConfigToRoleBhv; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ParameterUtil; -import org.codelibs.robot.client.S2RobotClientFactory; -import org.codelibs.robot.client.http.Authentication; -import org.codelibs.robot.client.http.HcHttpClient; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.crawler.client.http.Authentication; +import org.codelibs.fess.crawler.client.http.HcHttpClient; import org.dbflute.cbean.result.ListResultBean; import org.lastaflute.di.core.SingletonLaContainer; @@ -232,7 +232,7 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig { } @Override - public void initializeClientFactory(final S2RobotClientFactory clientFactory) { + public void initializeClientFactory(final CrawlerClientFactory clientFactory) { final WebAuthenticationService webAuthenticationService = SingletonLaContainer.getComponent(WebAuthenticationService.class); final RequestHeaderService requestHeaderService = SingletonLaContainer.getComponent(RequestHeaderService.class); @@ -259,11 +259,13 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig { // request header final List requestHeaderList = requestHeaderService.getRequestHeaderList(getId()); - final List rhList = new ArrayList(); + final List rhList = + new ArrayList(); for (final RequestHeader requestHeader : requestHeaderList) { - rhList.add(requestHeader.getS2RobotRequestHeader()); + rhList.add(requestHeader.getCrawlerRequestHeader()); } - paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, rhList.toArray(new org.codelibs.robot.client.http.RequestHeader[rhList.size()])); + paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, + rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()])); } diff --git a/src/main/java/org/codelibs/fess/exec/Crawler.java b/src/main/java/org/codelibs/fess/exec/Crawler.java index 049a52fd8..5573a2ba7 100644 --- a/src/main/java/org/codelibs/fess/exec/Crawler.java +++ b/src/main/java/org/codelibs/fess/exec/Crawler.java @@ -46,7 +46,7 @@ import org.codelibs.fess.helper.PathMappingHelper; import org.codelibs.fess.helper.WebFsIndexHelper; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ResourceUtil; -import org.codelibs.robot.client.EsClient; +import org.codelibs.fess.crawler.client.EsClient; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.kohsuke.args4j.CmdLineException; diff --git a/src/main/java/org/codelibs/fess/helper/RobotLogHelper.java b/src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java similarity index 71% rename from src/main/java/org/codelibs/fess/helper/RobotLogHelper.java rename to src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java index 664a98415..8f6570a3d 100644 --- a/src/main/java/org/codelibs/fess/helper/RobotLogHelper.java +++ b/src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java @@ -19,29 +19,29 @@ package org.codelibs.fess.helper; import org.codelibs.fess.app.service.FailureUrlService; import org.codelibs.fess.es.exentity.CrawlingConfig; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.S2RobotContext; -import org.codelibs.robot.entity.UrlQueue; -import org.codelibs.robot.exception.RobotMultipleCrawlAccessException; -import org.codelibs.robot.helper.impl.LogHelperImpl; -import org.codelibs.robot.log.LogType; +import org.codelibs.fess.crawler.CrawlerContext; +import org.codelibs.fess.crawler.entity.UrlQueue; +import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException; +import org.codelibs.fess.crawler.helper.impl.LogHelperImpl; +import org.codelibs.fess.crawler.log.LogType; import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class RobotLogHelper extends LogHelperImpl { +public class CrawlerLogHelper extends LogHelperImpl { private static final Logger logger = LoggerFactory // NOPMD - .getLogger(RobotLogHelper.class); + .getLogger(CrawlerLogHelper.class); @Override public void log(final LogType key, final Object... objs) { try { switch (key) { case CRAWLING_ACCESS_EXCEPTION: { - final S2RobotContext robotContext = (S2RobotContext) objs[0]; + final CrawlerContext crawlerContext = (CrawlerContext) objs[0]; final UrlQueue urlQueue = (UrlQueue) objs[1]; Throwable e = (Throwable) objs[2]; - if (e instanceof RobotMultipleCrawlAccessException) { - final Throwable[] causes = ((RobotMultipleCrawlAccessException) e).getCauses(); + if (e instanceof MultipleCrawlingAccessException) { + final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses(); if (causes.length > 0) { e = causes[causes.length - 1]; } @@ -54,15 +54,15 @@ public class RobotLogHelper extends LogHelperImpl { } else { errorName = e.getClass().getCanonicalName(); } - storeFailureUrl(robotContext, urlQueue, errorName, e); + storeFailureUrl(crawlerContext, urlQueue, errorName, e); break; } case CRAWLING_EXCETPION: { - final S2RobotContext robotContext = (S2RobotContext) objs[0]; + final CrawlerContext crawlerContext = (CrawlerContext) objs[0]; final UrlQueue urlQueue = (UrlQueue) objs[1]; final Throwable e = (Throwable) objs[2]; - storeFailureUrl(robotContext, urlQueue, e.getClass().getCanonicalName(), e); + storeFailureUrl(crawlerContext, urlQueue, e.getClass().getCanonicalName(), e); break; } default: @@ -75,9 +75,9 @@ public class RobotLogHelper extends LogHelperImpl { super.log(key, objs); } - private void storeFailureUrl(final S2RobotContext robotContext, final UrlQueue urlQueue, final String errorName, final Throwable e) { + private void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue urlQueue, final String errorName, final Throwable e) { - final CrawlingConfig crawlingConfig = getCrawlingConfig(robotContext.getSessionId()); + final CrawlingConfig crawlingConfig = getCrawlingConfig(crawlerContext.getSessionId()); final String url = urlQueue.getUrl(); final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class); diff --git a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java index 0f36f73d4..7c3505c81 100644 --- a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java +++ b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java @@ -132,8 +132,8 @@ public class DataIndexHelper implements Serializable { while (startedCrawlerNum < dataCrawlingThreadList.size()) { // Force to stop crawl if (systemHelper.isForceStop()) { - for (final DataCrawlingThread s2Robot : dataCrawlingThreadList) { - s2Robot.stopCrawling(); + for (final DataCrawlingThread crawlerThread : dataCrawlingThreadList) { + crawlerThread.stopCrawling(); } break; } diff --git a/src/main/java/org/codelibs/fess/helper/SystemHelper.java b/src/main/java/org/codelibs/fess/helper/SystemHelper.java index 4bfd6e217..485c87672 100644 --- a/src/main/java/org/codelibs/fess/helper/SystemHelper.java +++ b/src/main/java/org/codelibs/fess/helper/SystemHelper.java @@ -42,7 +42,7 @@ import org.codelibs.fess.Constants; import org.codelibs.fess.app.service.RoleTypeService; import org.codelibs.fess.es.exentity.RoleType; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.util.CharUtil; +import org.codelibs.fess.crawler.util.CharUtil; import org.lastaflute.di.core.SingletonLaContainer; import org.lastaflute.web.util.LaRequestUtil; diff --git a/src/main/java/org/codelibs/fess/helper/ViewHelper.java b/src/main/java/org/codelibs/fess/helper/ViewHelper.java index f04bc5d20..95e95f9af 100644 --- a/src/main/java/org/codelibs/fess/helper/ViewHelper.java +++ b/src/main/java/org/codelibs/fess/helper/ViewHelper.java @@ -56,11 +56,11 @@ import org.codelibs.fess.helper.UserAgentHelper.UserAgentType; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.DocumentUtil; import org.codelibs.fess.util.ResourceUtil; -import org.codelibs.robot.builder.RequestDataBuilder; -import org.codelibs.robot.client.S2RobotClient; -import org.codelibs.robot.client.S2RobotClientFactory; -import org.codelibs.robot.entity.ResponseData; -import org.codelibs.robot.util.CharUtil; +import org.codelibs.fess.crawler.builder.RequestDataBuilder; +import org.codelibs.fess.crawler.client.CrawlerClient; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.util.CharUtil; import org.lastaflute.di.core.SingletonLaContainer; import org.lastaflute.taglib.function.LaFunctions; import org.lastaflute.web.response.StreamResponse; @@ -508,11 +508,11 @@ public class ViewHelper implements Serializable { throw new FessSystemException("No crawlingConfig: " + configIdObj); } final String url = (String) doc.get(fieldHelper.urlField); - final S2RobotClientFactory robotClientFactory = SingletonLaContainer.getComponent(S2RobotClientFactory.class); - config.initializeClientFactory(robotClientFactory); - final S2RobotClient client = robotClientFactory.getClient(url); + final CrawlerClientFactory crawlerClientFactory = SingletonLaContainer.getComponent(CrawlerClientFactory.class); + config.initializeClientFactory(crawlerClientFactory); + final CrawlerClient client = crawlerClientFactory.getClient(url); if (client == null) { - throw new FessSystemException("No S2RobotClient: " + configIdObj + ", url: " + url); + throw new FessSystemException("No CrawlerClient: " + configIdObj + ", url: " + url); } final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build()); final StreamResponse response = new StreamResponse(StringUtil.EMPTY); diff --git a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java index d5cd755d1..bf2f7d008 100644 --- a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java +++ b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java @@ -37,11 +37,11 @@ import org.codelibs.fess.es.exentity.FileConfig; import org.codelibs.fess.es.exentity.WebConfig; import org.codelibs.fess.indexer.IndexUpdater; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.S2Robot; -import org.codelibs.robot.S2RobotContext; -import org.codelibs.robot.service.DataService; -import org.codelibs.robot.service.UrlFilterService; -import org.codelibs.robot.service.UrlQueueService; +import org.codelibs.fess.crawler.Crawler; +import org.codelibs.fess.crawler.CrawlerContext; +import org.codelibs.fess.crawler.service.DataService; +import org.codelibs.fess.crawler.service.UrlFilterService; +import org.codelibs.fess.crawler.service.UrlQueueService; import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -81,7 +81,7 @@ public class WebFsIndexHelper implements Serializable { public int crawlerPriority = Thread.NORM_PRIORITY; - private final List s2RobotList = Collections.synchronizedList(new ArrayList()); + private final List crawlerList = Collections.synchronizedList(new ArrayList()); // needed? @Deprecated @@ -140,15 +140,15 @@ public class WebFsIndexHelper implements Serializable { final long startTime = System.currentTimeMillis(); final List sessionIdList = new ArrayList(); - s2RobotList.clear(); - final List s2RobotStatusList = new ArrayList(); + crawlerList.clear(); + final List crawlerStatusList = new ArrayList(); // Web for (final WebConfig webConfig : webConfigList) { final String sid = crawlingConfigHelper.store(sessionId, webConfig); - // create s2robot - final S2Robot s2Robot = SingletonLaContainer.getComponent(S2Robot.class); - s2Robot.setSessionId(sid); + // create crawler + final Crawler crawler = SingletonLaContainer.getComponent(Crawler.class); + crawler.setSessionId(sid); sessionIdList.add(sid); final String urlsStr = webConfig.getUrls(); @@ -160,26 +160,26 @@ public class WebFsIndexHelper implements Serializable { // interval time final int intervalTime = webConfig.getIntervalTime() != null ? webConfig.getIntervalTime() : Constants.DEFAULT_INTERVAL_TIME_FOR_WEB; - ((FessIntervalController) s2Robot.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime); + ((FessIntervalController) crawler.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime); final String includedUrlsStr = webConfig.getIncludedUrls() != null ? webConfig.getIncludedUrls() : StringUtil.EMPTY; final String excludedUrlsStr = webConfig.getExcludedUrls() != null ? webConfig.getExcludedUrls() : StringUtil.EMPTY; // num of threads - final S2RobotContext robotContext = s2Robot.getRobotContext(); + final CrawlerContext crawlerContext = crawler.getCrawlerContext(); final int numOfThread = webConfig.getNumOfThread() != null ? webConfig.getNumOfThread() : Constants.DEFAULT_NUM_OF_THREAD_FOR_WEB; - robotContext.setNumOfThread(numOfThread); + crawlerContext.setNumOfThread(numOfThread); // depth final int depth = webConfig.getDepth() != null ? webConfig.getDepth() : -1; - robotContext.setMaxDepth(depth); + crawlerContext.setMaxDepth(depth); // max count final long maxCount = webConfig.getMaxAccessCount() != null ? webConfig.getMaxAccessCount() : maxAccessCount; - robotContext.setMaxAccessCount(maxCount); + crawlerContext.setMaxAccessCount(maxCount); - webConfig.initializeClientFactory(s2Robot.getClientFactory()); + webConfig.initializeClientFactory(crawler.getClientFactory()); // set urls final String[] urls = urlsStr.split("[\r\n]"); @@ -187,7 +187,7 @@ public class WebFsIndexHelper implements Serializable { if (StringUtil.isNotBlank(u)) { final String urlValue = u.trim(); if (!urlValue.startsWith("#")) { - s2Robot.addUrl(urlValue); + crawler.addUrl(urlValue); if (logger.isInfoEnabled()) { logger.info("Target URL: " + urlValue); } @@ -201,7 +201,7 @@ public class WebFsIndexHelper implements Serializable { if (StringUtil.isNotBlank(u)) { final String urlValue = u.trim(); if (!urlValue.startsWith("#")) { - s2Robot.addIncludeFilter(urlValue); + crawler.addIncludeFilter(urlValue); if (logger.isInfoEnabled()) { logger.info("Included URL: " + urlValue); } @@ -215,7 +215,7 @@ public class WebFsIndexHelper implements Serializable { if (StringUtil.isNotBlank(u)) { final String urlValue = u.trim(); if (!urlValue.startsWith("#")) { - s2Robot.addExcludeFilter(urlValue); + crawler.addExcludeFilter(urlValue); if (logger.isInfoEnabled()) { logger.info("Excluded URL: " + urlValue); } @@ -229,7 +229,7 @@ public class WebFsIndexHelper implements Serializable { for (final String u : excludedUrlList) { if (StringUtil.isNotBlank(u)) { final String urlValue = u.trim(); - s2Robot.addExcludeFilter(urlValue); + crawler.addExcludeFilter(urlValue); if (logger.isInfoEnabled()) { logger.info("Excluded URL from failures: " + urlValue); } @@ -241,20 +241,20 @@ public class WebFsIndexHelper implements Serializable { logger.debug("Crawling " + urlsStr); } - s2Robot.setBackground(true); - s2Robot.setThreadPriority(crawlerPriority); + crawler.setBackground(true); + crawler.setThreadPriority(crawlerPriority); - s2RobotList.add(s2Robot); - s2RobotStatusList.add(Constants.READY); + crawlerList.add(crawler); + crawlerStatusList.add(Constants.READY); } // File for (final FileConfig fileConfig : fileConfigList) { final String sid = crawlingConfigHelper.store(sessionId, fileConfig); - // create s2robot - final S2Robot s2Robot = SingletonLaContainer.getComponent(S2Robot.class); - s2Robot.setSessionId(sid); + // create crawler + final Crawler crawler = SingletonLaContainer.getComponent(Crawler.class); + crawler.setSessionId(sid); sessionIdList.add(sid); final String pathsStr = fileConfig.getPaths(); @@ -265,26 +265,26 @@ public class WebFsIndexHelper implements Serializable { final int intervalTime = fileConfig.getIntervalTime() != null ? fileConfig.getIntervalTime() : Constants.DEFAULT_INTERVAL_TIME_FOR_FS; - ((FessIntervalController) s2Robot.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime); + ((FessIntervalController) crawler.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime); final String includedPathsStr = fileConfig.getIncludedPaths() != null ? fileConfig.getIncludedPaths() : StringUtil.EMPTY; final String excludedPathsStr = fileConfig.getExcludedPaths() != null ? fileConfig.getExcludedPaths() : StringUtil.EMPTY; // num of threads - final S2RobotContext robotContext = s2Robot.getRobotContext(); + final CrawlerContext crawlerContext = crawler.getCrawlerContext(); final int numOfThread = fileConfig.getNumOfThread() != null ? fileConfig.getNumOfThread() : Constants.DEFAULT_NUM_OF_THREAD_FOR_FS; - robotContext.setNumOfThread(numOfThread); + crawlerContext.setNumOfThread(numOfThread); // depth final int depth = fileConfig.getDepth() != null ? fileConfig.getDepth() : -1; - robotContext.setMaxDepth(depth); + crawlerContext.setMaxDepth(depth); // max count final long maxCount = fileConfig.getMaxAccessCount() != null ? fileConfig.getMaxAccessCount() : maxAccessCount; - robotContext.setMaxAccessCount(maxCount); + crawlerContext.setMaxAccessCount(maxCount); - fileConfig.initializeClientFactory(s2Robot.getClientFactory()); + fileConfig.initializeClientFactory(crawler.getClientFactory()); // set paths final String[] paths = pathsStr.split("[\r\n]"); @@ -299,7 +299,7 @@ public class WebFsIndexHelper implements Serializable { u = "file:/" + u; } } - s2Robot.addUrl(u); + crawler.addUrl(u); if (logger.isInfoEnabled()) { logger.info("Target Path: " + u); } @@ -321,7 +321,7 @@ public class WebFsIndexHelper implements Serializable { } else { urlValue = systemHelper.encodeUrlFilter(line); } - s2Robot.addIncludeFilter(urlValue); + crawler.addIncludeFilter(urlValue); if (logger.isInfoEnabled()) { logger.info("Included Path: " + urlValue); } @@ -345,7 +345,7 @@ public class WebFsIndexHelper implements Serializable { } else { urlValue = systemHelper.encodeUrlFilter(line); } - s2Robot.addExcludeFilter(urlValue); + crawler.addExcludeFilter(urlValue); if (logger.isInfoEnabled()) { logger.info("Excluded Path: " + urlValue); } @@ -361,7 +361,7 @@ public class WebFsIndexHelper implements Serializable { for (final String u : excludedUrlList) { if (StringUtil.isNotBlank(u)) { final String urlValue = u.trim(); - s2Robot.addExcludeFilter(urlValue); + crawler.addExcludeFilter(urlValue); if (logger.isInfoEnabled()) { logger.info("Excluded Path from failures: " + urlValue); } @@ -373,11 +373,11 @@ public class WebFsIndexHelper implements Serializable { logger.debug("Crawling " + pathsStr); } - s2Robot.setBackground(true); - s2Robot.setThreadPriority(crawlerPriority); + crawler.setBackground(true); + crawler.setThreadPriority(crawlerPriority); - s2RobotList.add(s2Robot); - s2RobotStatusList.add(Constants.READY); + crawlerList.add(crawler); + crawlerStatusList.add(Constants.READY); } // run index update @@ -386,7 +386,7 @@ public class WebFsIndexHelper implements Serializable { indexUpdater.setPriority(indexUpdaterPriority); indexUpdater.setSessionIdList(sessionIdList); indexUpdater.setDaemon(true); - indexUpdater.setS2RobotList(s2RobotList); + indexUpdater.setCrawlerList(crawlerList); for (final BoostDocumentRule rule : boostDocumentRuleService.getAvailableBoostDocumentRuleList()) { indexUpdater.addBoostDocumentRule(new org.codelibs.fess.indexer.BoostDocumentRule(rule)); } @@ -394,19 +394,19 @@ public class WebFsIndexHelper implements Serializable { int startedCrawlerNum = 0; int activeCrawlerNum = 0; - while (startedCrawlerNum < s2RobotList.size()) { + while (startedCrawlerNum < crawlerList.size()) { // Force to stop crawl if (systemHelper.isForceStop()) { - for (final S2Robot s2Robot : s2RobotList) { - s2Robot.stop(); + for (final Crawler crawler : crawlerList) { + crawler.stop(); } break; } if (activeCrawlerNum < multiprocessCrawlingCount) { // start crawling - s2RobotList.get(startedCrawlerNum).execute(); - s2RobotStatusList.set(startedCrawlerNum, Constants.RUNNING); + crawlerList.get(startedCrawlerNum).execute(); + crawlerStatusList.set(startedCrawlerNum, Constants.RUNNING); startedCrawlerNum++; activeCrawlerNum++; try { @@ -419,10 +419,10 @@ public class WebFsIndexHelper implements Serializable { // check status for (int i = 0; i < startedCrawlerNum; i++) { - if (!s2RobotList.get(i).getRobotContext().isRunning() && s2RobotStatusList.get(i).equals(Constants.RUNNING)) { - s2RobotList.get(i).awaitTermination(); - s2RobotStatusList.set(i, Constants.DONE); - final String sid = s2RobotList.get(i).getRobotContext().getSessionId(); + if (!crawlerList.get(i).getCrawlerContext().isRunning() && crawlerStatusList.get(i).equals(Constants.RUNNING)) { + crawlerList.get(i).awaitTermination(); + crawlerStatusList.set(i, Constants.DONE); + final String sid = crawlerList.get(i).getCrawlerContext().getSessionId(); indexUpdater.addFinishedSessionId(sid); activeCrawlerNum--; } @@ -437,20 +437,20 @@ public class WebFsIndexHelper implements Serializable { boolean finishedAll = false; while (!finishedAll) { finishedAll = true; - for (int i = 0; i < s2RobotList.size(); i++) { - s2RobotList.get(i).awaitTermination(crawlingExecutionInterval); - if (!s2RobotList.get(i).getRobotContext().isRunning() && !s2RobotStatusList.get(i).equals(Constants.DONE)) { - s2RobotStatusList.set(i, Constants.DONE); - final String sid = s2RobotList.get(i).getRobotContext().getSessionId(); + for (int i = 0; i < crawlerList.size(); i++) { + crawlerList.get(i).awaitTermination(crawlingExecutionInterval); + if (!crawlerList.get(i).getCrawlerContext().isRunning() && !crawlerStatusList.get(i).equals(Constants.DONE)) { + crawlerStatusList.set(i, Constants.DONE); + final String sid = crawlerList.get(i).getCrawlerContext().getSessionId(); indexUpdater.addFinishedSessionId(sid); } - if (!s2RobotStatusList.get(i).equals(Constants.DONE)) { + if (!crawlerStatusList.get(i).equals(Constants.DONE)) { finishedAll = false; } } } - s2RobotList.clear(); - s2RobotStatusList.clear(); + crawlerList.clear(); + crawlerStatusList.clear(); // put cralwing info final CrawlingSessionHelper crawlingSessionHelper = ComponentUtil.getCrawlingSessionHelper(); diff --git a/src/main/java/org/codelibs/fess/indexer/BoostDocumentRule.java b/src/main/java/org/codelibs/fess/indexer/BoostDocumentRule.java index 576e4bdb8..0e9335429 100644 --- a/src/main/java/org/codelibs/fess/indexer/BoostDocumentRule.java +++ b/src/main/java/org/codelibs/fess/indexer/BoostDocumentRule.java @@ -53,7 +53,11 @@ public class BoostDocumentRule { return ((Boolean) value).booleanValue(); } } catch (final Exception e) { - logger.warn("Failed to parse a doc for boost: " + map, e); + if (logger.isDebugEnabled()) { + logger.debug("Failed to evaluate \"" + matchExpression + "\" for " + map, e); + } else { + logger.warn("Failed to evaluate \"" + matchExpression + "\"."); + } } return false; diff --git a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java index c512415b6..2010e3e61 100644 --- a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java +++ b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java @@ -36,17 +36,17 @@ import org.codelibs.fess.helper.IntervalControlHelper; import org.codelibs.fess.helper.SearchLogHelper; import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.S2Robot; -import org.codelibs.robot.entity.AccessResult; -import org.codelibs.robot.entity.AccessResultData; -import org.codelibs.robot.entity.EsAccessResult; -import org.codelibs.robot.entity.EsUrlQueue; -import org.codelibs.robot.service.DataService; -import org.codelibs.robot.service.UrlFilterService; -import org.codelibs.robot.service.UrlQueueService; -import org.codelibs.robot.service.impl.EsDataService; -import org.codelibs.robot.transformer.Transformer; -import org.codelibs.robot.util.EsResultList; +import org.codelibs.fess.crawler.Crawler; +import org.codelibs.fess.crawler.entity.AccessResult; +import org.codelibs.fess.crawler.entity.AccessResultData; +import org.codelibs.fess.crawler.entity.EsAccessResult; +import org.codelibs.fess.crawler.entity.EsUrlQueue; +import org.codelibs.fess.crawler.service.DataService; +import org.codelibs.fess.crawler.service.UrlFilterService; +import org.codelibs.fess.crawler.service.UrlQueueService; +import org.codelibs.fess.crawler.service.impl.EsDataService; +import org.codelibs.fess.crawler.transformer.Transformer; +import org.codelibs.fess.crawler.util.EsResultList; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.index.query.FilterBuilders; import org.elasticsearch.index.query.QueryBuilder; @@ -122,7 +122,7 @@ public class IndexUpdater extends Thread { private final Map docValueMap = new HashMap(); - private List s2RobotList; + private List crawlerList; public IndexUpdater() { // nothing @@ -176,7 +176,7 @@ public class IndexUpdater extends Thread { .boolFilter() .must(FilterBuilders.termsFilter(EsAccessResult.SESSION_ID, sessionIdList)) .must(FilterBuilders.termFilter(EsAccessResult.STATUS, - org.codelibs.robot.Constants.OK_STATUS))); + org.codelibs.fess.crawler.Constants.OK_STATUS))); builder.setQuery(queryBuilder); builder.setFrom(0); if (maxDocumentCacheSize <= 0) { @@ -507,8 +507,8 @@ public class IndexUpdater extends Thread { private void forceStop() { systemHelper.setForceStop(true); - for (final S2Robot s2Robot : s2RobotList) { - s2Robot.stop(); + for (final Crawler crawler : crawlerList) { + crawler.stop(); } } @@ -557,7 +557,7 @@ public class IndexUpdater extends Thread { docValueMap.put(fieldName, value); } - public void setS2RobotList(final List s2RobotList) { - this.s2RobotList = s2RobotList; + public void setCrawlerList(final List crawlerList) { + this.crawlerList = crawlerList; } } diff --git a/src/main/java/org/codelibs/fess/util/ComponentUtil.java b/src/main/java/org/codelibs/fess/util/ComponentUtil.java index 2f8901bba..13c4b015b 100644 --- a/src/main/java/org/codelibs/fess/util/ComponentUtil.java +++ b/src/main/java/org/codelibs/fess/util/ComponentUtil.java @@ -45,9 +45,9 @@ import org.codelibs.fess.helper.UserAgentHelper; import org.codelibs.fess.helper.ViewHelper; import org.codelibs.fess.indexer.IndexUpdater; import org.codelibs.fess.job.JobExecutor; -import org.codelibs.robot.entity.EsAccessResult; -import org.codelibs.robot.extractor.ExtractorFactory; -import org.codelibs.robot.service.DataService; +import org.codelibs.fess.crawler.entity.EsAccessResult; +import org.codelibs.fess.crawler.extractor.ExtractorFactory; +import org.codelibs.fess.crawler.service.DataService; import org.lastaflute.core.message.MessageManager; import org.lastaflute.di.core.SingletonLaContainer; import org.lastaflute.di.core.factory.SingletonLaContainerFactory; diff --git a/src/main/java/org/codelibs/fess/util/ResourceUtil.java b/src/main/java/org/codelibs/fess/util/ResourceUtil.java index 586fff005..959655914 100644 --- a/src/main/java/org/codelibs/fess/util/ResourceUtil.java +++ b/src/main/java/org/codelibs/fess/util/ResourceUtil.java @@ -73,19 +73,19 @@ public class ResourceUtil { } catch (final Throwable e) { // NOSONAR // ignore } - Path path = Paths.get(".", names); - if (Files.exists(path)) { - return path; + final Path defaultPath = Paths.get("WEB-INF/" + base, names); + if (Files.exists(defaultPath)) { + return defaultPath; } - path = Paths.get("src/main/webapps/WEB-INF/" + base, names); - if (Files.exists(path)) { - return path; + final Path srcBasePath = Paths.get("src/main/webapps/WEB-INF/" + base, names); + if (Files.exists(srcBasePath)) { + return srcBasePath; } - path = Paths.get("target/fess/WEB-INF/" + base, names); - if (Files.exists(path)) { - return path; + final Path targetBasePath = Paths.get("target/fess/WEB-INF/" + base, names); + if (Files.exists(targetBasePath)) { + return targetBasePath; } - return path; + return defaultPath; } public static File[] getJarFiles(final String namePrefix) { diff --git a/src/main/resources/app.xml b/src/main/resources/app.xml index 393e06e7e..4abe69586 100644 --- a/src/main/resources/app.xml +++ b/src/main/resources/app.xml @@ -10,8 +10,8 @@ - - + + diff --git a/src/main/resources/crawler.xml b/src/main/resources/crawler.xml new file mode 100644 index 000000000..007040aae --- /dev/null +++ b/src/main/resources/crawler.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ".crawler" + "queue" + + + ".crawler" + "data" + + + ".crawler" + "filter" + + + diff --git a/src/main/resources/s2robot/contentlength.xml b/src/main/resources/crawler/contentlength.xml similarity index 70% rename from src/main/resources/s2robot/contentlength.xml rename to src/main/resources/crawler/contentlength.xml index 7c314d1d6..d155cbb86 100644 --- a/src/main/resources/s2robot/contentlength.xml +++ b/src/main/resources/crawler/contentlength.xml @@ -1,11 +1,11 @@ - - + + + class="org.codelibs.fess.crawler.helper.ContentLengthHelper" instance="singleton"> 10485760 "text/html" diff --git a/src/main/resources/s2robot/es.xml b/src/main/resources/crawler/es.xml similarity index 80% rename from src/main/resources/s2robot/es.xml rename to src/main/resources/crawler/es.xml index 4d9d756b9..1bc8475aa 100644 --- a/src/main/resources/s2robot/es.xml +++ b/src/main/resources/crawler/es.xml @@ -3,6 +3,6 @@ "http://dbflute.org/meta/lastadi10.dtd"> + class="org.codelibs.fess.crawler.client.EsClient"> diff --git a/src/main/resources/s2robot/interval.xml b/src/main/resources/crawler/interval.xml similarity index 79% rename from src/main/resources/s2robot/interval.xml rename to src/main/resources/crawler/interval.xml index 33eceda48..fb4df5452 100644 --- a/src/main/resources/s2robot/interval.xml +++ b/src/main/resources/crawler/interval.xml @@ -1,8 +1,8 @@ - - + + - - + + + class="org.codelibs.fess.helper.CrawlerLogHelper"> diff --git a/src/main/resources/s2robot/rule.xml b/src/main/resources/crawler/rule.xml similarity index 75% rename from src/main/resources/s2robot/rule.xml rename to src/main/resources/crawler/rule.xml index 4c905265c..22315b477 100644 --- a/src/main/resources/s2robot/rule.xml +++ b/src/main/resources/crawler/rule.xml @@ -1,11 +1,11 @@ - - - + + + - + sitemapsRule @@ -23,10 +23,10 @@ - + "sitemapsRule" - + @@ -35,10 +35,10 @@ - + "webHtmlRule" - + fessXpathTransformer (int[])[200] (int[])[304] @@ -56,10 +56,10 @@ - + "webFileRule" - + fessFileTransformer (int[])[200] (int[])[304] @@ -85,10 +85,10 @@ - + "fsFileRule" - + fessFileTransformer (int[])[200] (int[])[304] @@ -116,10 +116,10 @@ - + "defaultRule" - + fessTikaTransformer (int[])[200] (int[])[304] diff --git a/src/main/resources/s2robot/transformer.xml b/src/main/resources/crawler/transformer.xml similarity index 97% rename from src/main/resources/s2robot/transformer.xml rename to src/main/resources/crawler/transformer.xml index a2e68741f..67d274c26 100644 --- a/src/main/resources/s2robot/transformer.xml +++ b/src/main/resources/crawler/transformer.xml @@ -1,8 +1,8 @@ - - + + diff --git a/src/main/resources/s2robot_es.xml b/src/main/resources/s2robot_es.xml deleted file mode 100644 index 3dfdb1708..000000000 --- a/src/main/resources/s2robot_es.xml +++ /dev/null @@ -1,52 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ".robot" - "queue" - - - ".robot" - "data" - - - ".robot" - "filter" - - - diff --git a/src/main/webapp/WEB-INF/cmd/resources/app.xml b/src/main/webapp/WEB-INF/cmd/resources/app.xml index 86e3d2d6b..6e7d0a54f 100644 --- a/src/main/webapp/WEB-INF/cmd/resources/app.xml +++ b/src/main/webapp/WEB-INF/cmd/resources/app.xml @@ -5,7 +5,7 @@ - +