fix #1254 add commons-exec
This commit is contained in:
parent
3ca467ae99
commit
d1f5a514f1
3 changed files with 15 additions and 11 deletions
4
pom.xml
4
pom.xml
|
@ -1298,10 +1298,6 @@
|
|||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpmime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-exec</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>jcl-over-slf4j</artifactId>
|
||||
|
|
|
@ -95,17 +95,13 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
|
||||
final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(responseData.getSessionId());
|
||||
final Extractor extractor = getExtractor(responseData);
|
||||
final Map<String, String> params = new HashMap<>(crawlingConfig.getConfigParameterMap(ConfigName.CONFIG));
|
||||
params.put(TikaMetadataKeys.RESOURCE_NAME_KEY, getResourceName(responseData));
|
||||
final String mimeType = responseData.getMimeType();
|
||||
params.put(HttpHeaders.CONTENT_TYPE, mimeType);
|
||||
params.put(HttpHeaders.CONTENT_ENCODING, responseData.getCharSet());
|
||||
final StringBuilder contentMetaBuf = new StringBuilder(1000);
|
||||
final Map<String, Object> dataMap = new HashMap<>();
|
||||
final Map<String, Object> metaDataMap = new HashMap<>();
|
||||
String content;
|
||||
try (final InputStream in = responseData.getResponseBody()) {
|
||||
final ExtractData extractData = getExtractData(extractor, in, params);
|
||||
final ExtractData extractData = getExtractData(extractor, in, createExtractParams(responseData, crawlingConfig));
|
||||
content = extractData.getContent();
|
||||
if (fessConfig.isCrawlerDocumentFileIgnoreEmptyContent() && StringUtil.isBlank(content)) {
|
||||
return null;
|
||||
|
@ -334,7 +330,16 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
return dataMap;
|
||||
}
|
||||
|
||||
private ExtractData getExtractData(final Extractor extractor, final InputStream in, final Map<String, String> params) {
|
||||
protected Map<String, String> createExtractParams(final ResponseData responseData, final CrawlingConfig crawlingConfig) {
|
||||
final Map<String, String> params = new HashMap<>(crawlingConfig.getConfigParameterMap(ConfigName.CONFIG));
|
||||
params.put(TikaMetadataKeys.RESOURCE_NAME_KEY, getResourceName(responseData));
|
||||
params.put(HttpHeaders.CONTENT_TYPE, responseData.getMimeType());
|
||||
params.put(HttpHeaders.CONTENT_ENCODING, responseData.getCharSet());
|
||||
params.put(ExtractData.URL, responseData.getUrl());
|
||||
return params;
|
||||
}
|
||||
|
||||
protected ExtractData getExtractData(final Extractor extractor, final InputStream in, final Map<String, String> params) {
|
||||
try {
|
||||
return extractor.getText(in, params);
|
||||
} catch (final RuntimeException e) {
|
||||
|
@ -455,4 +460,4 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
metaContentMapping.put(metaname, dynamicField);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -289,6 +289,9 @@ public class CrawlJob {
|
|||
addSystemProperty(cmdList, "fess.log.level", null, null);
|
||||
} else {
|
||||
cmdList.add("-Dfess.log.level=" + logLevel);
|
||||
if (logLevel.equalsIgnoreCase("debug")) {
|
||||
cmdList.add("-Dorg.apache.tika.service.error.warn=true");
|
||||
}
|
||||
}
|
||||
stream(fessConfig.getJvmCrawlerOptionsAsArray()).of(
|
||||
stream -> stream.filter(StringUtil::isNotBlank).forEach(value -> cmdList.add(value)));
|
||||
|
|
Loading…
Add table
Reference in a new issue