diff --git a/pom.xml b/pom.xml
index f14ca78b2..ef206b855 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1298,10 +1298,6 @@
org.apache.httpcomponents
httpmime
-
- org.apache.commons
- commons-exec
-
org.slf4j
jcl-over-slf4j
diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java
index 2bd44b4bc..54ca95840 100644
--- a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java
+++ b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java
@@ -95,17 +95,13 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(responseData.getSessionId());
final Extractor extractor = getExtractor(responseData);
- final Map params = new HashMap<>(crawlingConfig.getConfigParameterMap(ConfigName.CONFIG));
- params.put(TikaMetadataKeys.RESOURCE_NAME_KEY, getResourceName(responseData));
final String mimeType = responseData.getMimeType();
- params.put(HttpHeaders.CONTENT_TYPE, mimeType);
- params.put(HttpHeaders.CONTENT_ENCODING, responseData.getCharSet());
final StringBuilder contentMetaBuf = new StringBuilder(1000);
final Map dataMap = new HashMap<>();
final Map metaDataMap = new HashMap<>();
String content;
try (final InputStream in = responseData.getResponseBody()) {
- final ExtractData extractData = getExtractData(extractor, in, params);
+ final ExtractData extractData = getExtractData(extractor, in, createExtractParams(responseData, crawlingConfig));
content = extractData.getContent();
if (fessConfig.isCrawlerDocumentFileIgnoreEmptyContent() && StringUtil.isBlank(content)) {
return null;
@@ -334,7 +330,16 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
return dataMap;
}
- private ExtractData getExtractData(final Extractor extractor, final InputStream in, final Map params) {
+ protected Map createExtractParams(final ResponseData responseData, final CrawlingConfig crawlingConfig) {
+ final Map params = new HashMap<>(crawlingConfig.getConfigParameterMap(ConfigName.CONFIG));
+ params.put(TikaMetadataKeys.RESOURCE_NAME_KEY, getResourceName(responseData));
+ params.put(HttpHeaders.CONTENT_TYPE, responseData.getMimeType());
+ params.put(HttpHeaders.CONTENT_ENCODING, responseData.getCharSet());
+ params.put(ExtractData.URL, responseData.getUrl());
+ return params;
+ }
+
+ protected ExtractData getExtractData(final Extractor extractor, final InputStream in, final Map params) {
try {
return extractor.getText(in, params);
} catch (final RuntimeException e) {
@@ -455,4 +460,4 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
metaContentMapping.put(metaname, dynamicField);
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/org/codelibs/fess/job/CrawlJob.java b/src/main/java/org/codelibs/fess/job/CrawlJob.java
index 2bfa54217..10f3e5e58 100644
--- a/src/main/java/org/codelibs/fess/job/CrawlJob.java
+++ b/src/main/java/org/codelibs/fess/job/CrawlJob.java
@@ -289,6 +289,9 @@ public class CrawlJob {
addSystemProperty(cmdList, "fess.log.level", null, null);
} else {
cmdList.add("-Dfess.log.level=" + logLevel);
+ if (logLevel.equalsIgnoreCase("debug")) {
+ cmdList.add("-Dorg.apache.tika.service.error.warn=true");
+ }
}
stream(fessConfig.getJvmCrawlerOptionsAsArray()).of(
stream -> stream.filter(StringUtil::isNotBlank).forEach(value -> cmdList.add(value)));