diff --git a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java index 3d78adb1a..cd80e923d 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java @@ -194,6 +194,7 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl { resultMap.putAll(paramMap); resultMap.put("csvfile", csvFile.getAbsolutePath()); resultMap.put("csvfilename", csvFile.getName()); + resultMap.put("crawlingConfig", dataConfig); boolean foundValues = false; for (int i = 0; i < list.size(); i++) { String key = null; diff --git a/src/main/java/org/codelibs/fess/ds/impl/DatabaseDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/DatabaseDataStoreImpl.java index cae69a40a..1ba9173c2 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/DatabaseDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/DatabaseDataStoreImpl.java @@ -109,7 +109,7 @@ public class DatabaseDataStoreImpl extends AbstractDataStoreImpl { final Map dataMap = new HashMap<>(); dataMap.putAll(defaultDataMap); for (final Map.Entry entry : scriptMap.entrySet()) { - final Object convertValue = convertValue(entry.getValue(), rs, paramMap); + final Object convertValue = convertValue(config, entry.getValue(), rs, paramMap); if (convertValue != null) { dataMap.put(entry.getKey(), convertValue); } @@ -189,15 +189,16 @@ public class DatabaseDataStoreImpl extends AbstractDataStoreImpl { } } - protected Object convertValue(final String template, final ResultSet rs, final Map paramMap) { - return convertValue(template, new ResultSetParamMap(rs, paramMap)); + protected Object convertValue(final DataConfig config, final String template, final ResultSet rs, final Map paramMap) { + return convertValue(template, new ResultSetParamMap(config, rs, paramMap)); } protected static class ResultSetParamMap implements Map { private final Map paramMap = new HashMap<>(); - public ResultSetParamMap(final ResultSet resultSet, final Map paramMap) { + public ResultSetParamMap(final DataConfig config, final ResultSet resultSet, final Map paramMap) { this.paramMap.putAll(paramMap); + this.paramMap.put("crawlingConfig", config); try { final ResultSetMetaData metaData = resultSet.getMetaData(); diff --git a/src/main/java/org/codelibs/fess/ds/impl/EsDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/EsDataStoreImpl.java index 8adb30db5..3f51fb331 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/EsDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/EsDataStoreImpl.java @@ -162,6 +162,7 @@ public class EsDataStoreImpl extends AbstractDataStoreImpl { resultMap.put("version", Long.valueOf(hit.getVersion())); resultMap.put("hit", hit); resultMap.put("source", hit.getSource()); + resultMap.put("crawlingConfig", dataConfig); if (logger.isDebugEnabled()) { for (final Map.Entry entry : resultMap.entrySet()) { diff --git a/src/main/java/org/codelibs/fess/helper/DocumentHelper.java b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java index d9d4d4cf1..049000996 100644 --- a/src/main/java/org/codelibs/fess/helper/DocumentHelper.java +++ b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java @@ -18,14 +18,32 @@ package org.codelibs.fess.helper; import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.StringUtils; +import org.codelibs.core.io.SerializeUtil; import org.codelibs.core.lang.StringUtil; +import org.codelibs.fess.crawler.builder.RequestDataBuilder; +import org.codelibs.fess.crawler.client.CrawlerClient; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.crawler.entity.RequestData; import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.entity.ResultData; +import org.codelibs.fess.crawler.exception.ChildUrlsException; +import org.codelibs.fess.crawler.exception.CrawlerSystemException; +import org.codelibs.fess.crawler.exception.CrawlingAccessException; +import org.codelibs.fess.crawler.processor.ResponseProcessor; +import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor; +import org.codelibs.fess.crawler.rule.Rule; +import org.codelibs.fess.crawler.rule.RuleManager; +import org.codelibs.fess.crawler.transformer.Transformer; import org.codelibs.fess.crawler.util.TextUtil; +import org.codelibs.fess.es.config.exentity.CrawlingConfig; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; +import org.lastaflute.di.core.SingletonLaContainer; public class DocumentHelper { public String getContent(final ResponseData responseData, final String content, final Map dataMap) { @@ -85,4 +103,58 @@ public class DocumentHelper { return StringUtil.EMPTY; // empty } } + + public Map processRequest(final CrawlingConfig crawlingConfig, final String sessionId, final String url) { + if (StringUtil.isBlank(sessionId)) { + throw new CrawlingAccessException("sessionId is null."); + } + + final CrawlerClientFactory crawlerClientFactory = ComponentUtil.getCrawlerClientFactory(); + crawlingConfig.initializeClientFactory(crawlerClientFactory); + final CrawlerClient client = crawlerClientFactory.getClient(url); + if (client == null) { + throw new CrawlingAccessException("CrawlerClient is null for " + url); + } + + final long startTime = System.currentTimeMillis(); + try (final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build())) { + if (responseData.getRedirectLocation() != null) { + Set childUrlList = new HashSet<>(); + childUrlList.add(RequestDataBuilder.newRequestData().get().url(responseData.getRedirectLocation()).build()); + throw new ChildUrlsException(childUrlList, "Redirected from " + url); + } + responseData.setExecutionTime(System.currentTimeMillis() - startTime); + responseData.setSessionId(sessionId); + + final RuleManager ruleManager = SingletonLaContainer.getComponent(RuleManager.class); + final Rule rule = ruleManager.getRule(responseData); + if (rule == null) { + throw new CrawlingAccessException("No url rule for " + url); + } else { + responseData.setRuleId(rule.getRuleId()); + final ResponseProcessor responseProcessor = rule.getResponseProcessor(); + if (responseProcessor instanceof DefaultResponseProcessor) { + final Transformer transformer = ((DefaultResponseProcessor) responseProcessor).getTransformer(); + final ResultData resultData = transformer.transform(responseData); + final byte[] data = resultData.getData(); + if (data != null) { + try { + @SuppressWarnings("unchecked") + final Map result = (Map) SerializeUtil.fromBinaryToObject(data); + return result; + } catch (final Exception e) { + throw new CrawlerSystemException("Could not create an instance from bytes.", e); + } + } + } else { + throw new CrawlingAccessException("The response processor is not DefaultResponseProcessor. responseProcessor: " + + responseProcessor + ", url: " + url); + } + } + return null; + } catch (final Exception e) { + throw new CrawlingAccessException("Failed to parse " + url, e); + } + } + }