fix #1254 add commons-exec

This commit is contained in:
Shinsuke Sugaya 2017-08-31 21:59:08 +09:00
parent 3ca467ae99
commit d1f5a514f1
3 changed files with 15 additions and 11 deletions

View file

@ -1298,10 +1298,6 @@
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpmime</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>

View file

@ -95,17 +95,13 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(responseData.getSessionId());
final Extractor extractor = getExtractor(responseData);
final Map<String, String> params = new HashMap<>(crawlingConfig.getConfigParameterMap(ConfigName.CONFIG));
params.put(TikaMetadataKeys.RESOURCE_NAME_KEY, getResourceName(responseData));
final String mimeType = responseData.getMimeType();
params.put(HttpHeaders.CONTENT_TYPE, mimeType);
params.put(HttpHeaders.CONTENT_ENCODING, responseData.getCharSet());
final StringBuilder contentMetaBuf = new StringBuilder(1000);
final Map<String, Object> dataMap = new HashMap<>();
final Map<String, Object> metaDataMap = new HashMap<>();
String content;
try (final InputStream in = responseData.getResponseBody()) {
final ExtractData extractData = getExtractData(extractor, in, params);
final ExtractData extractData = getExtractData(extractor, in, createExtractParams(responseData, crawlingConfig));
content = extractData.getContent();
if (fessConfig.isCrawlerDocumentFileIgnoreEmptyContent() && StringUtil.isBlank(content)) {
return null;
@ -334,7 +330,16 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
return dataMap;
}
private ExtractData getExtractData(final Extractor extractor, final InputStream in, final Map<String, String> params) {
protected Map<String, String> createExtractParams(final ResponseData responseData, final CrawlingConfig crawlingConfig) {
final Map<String, String> params = new HashMap<>(crawlingConfig.getConfigParameterMap(ConfigName.CONFIG));
params.put(TikaMetadataKeys.RESOURCE_NAME_KEY, getResourceName(responseData));
params.put(HttpHeaders.CONTENT_TYPE, responseData.getMimeType());
params.put(HttpHeaders.CONTENT_ENCODING, responseData.getCharSet());
params.put(ExtractData.URL, responseData.getUrl());
return params;
}
protected ExtractData getExtractData(final Extractor extractor, final InputStream in, final Map<String, String> params) {
try {
return extractor.getText(in, params);
} catch (final RuntimeException e) {
@ -455,4 +460,4 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
metaContentMapping.put(metaname, dynamicField);
}
}
}

View file

@ -289,6 +289,9 @@ public class CrawlJob {
addSystemProperty(cmdList, "fess.log.level", null, null);
} else {
cmdList.add("-Dfess.log.level=" + logLevel);
if (logLevel.equalsIgnoreCase("debug")) {
cmdList.add("-Dorg.apache.tika.service.error.warn=true");
}
}
stream(fessConfig.getJvmCrawlerOptionsAsArray()).of(
stream -> stream.filter(StringUtil::isNotBlank).forEach(value -> cmdList.add(value)));