fix #2821 Exclude X-FESS metadata from indexing and add transformation process for metadata inclusion.
This commit is contained in:
parent
dfb41630e6
commit
24d77de5ae
4 changed files with 21 additions and 9 deletions
|
@ -136,15 +136,15 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
dataMap.put(mapping.getValue1(), Double.parseDouble(values[0]));
|
||||
} else if (Constants.MAPPING_TYPE_DATE.equalsIgnoreCase(mapping.getValue2())
|
||||
|| Constants.MAPPING_TYPE_PDF_DATE.equalsIgnoreCase(mapping.getValue2())) {
|
||||
final String dateFormate;
|
||||
final String dateFormat;
|
||||
if (StringUtil.isNotBlank(mapping.getValue3())) {
|
||||
dateFormate = mapping.getValue3();
|
||||
dateFormat = mapping.getValue3();
|
||||
} else if (Constants.MAPPING_TYPE_PDF_DATE.equalsIgnoreCase(mapping.getValue2())) {
|
||||
dateFormate = mapping.getValue2();
|
||||
dateFormat = Constants.MAPPING_TYPE_PDF_DATE;
|
||||
} else {
|
||||
dateFormate = Constants.DATE_OPTIONAL_TIME;
|
||||
dateFormat = Constants.DATE_OPTIONAL_TIME;
|
||||
}
|
||||
final Date dt = FessFunctions.parseDate(values[0], dateFormate);
|
||||
final Date dt = FessFunctions.parseDate(values[0], dateFormat);
|
||||
if (dt != null) {
|
||||
dataMap.put(mapping.getValue1(), FessFunctions.formatDate(dt));
|
||||
} else {
|
||||
|
|
|
@ -391,7 +391,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. cpu */
|
||||
String CRAWLER_HOTTHREAD_TYPE = "crawler.hotthread.type";
|
||||
|
||||
/** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.* */
|
||||
/** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.* */
|
||||
String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes";
|
||||
|
||||
/** The key of the configuration. e.g. title=title:string<br>
|
||||
|
@ -2926,7 +2926,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
|
||||
/**
|
||||
* Get the value for the key 'crawler.metadata.content.excludes'. <br>
|
||||
* The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.* <br>
|
||||
* The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.* <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerMetadataContentExcludes();
|
||||
|
@ -10899,7 +10899,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TIMEOUT, "30s");
|
||||
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TYPE, "cpu");
|
||||
defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES,
|
||||
"resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*");
|
||||
"resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.*");
|
||||
defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING, "title=title:string\nTitle=title:string\ndc:title=title:string\n");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang");
|
||||
|
|
|
@ -975,6 +975,18 @@ public interface FessProp {
|
|||
return params.get(name);
|
||||
}
|
||||
|
||||
default void addCrawlerMetadataNameMapping(final String name, final String fieldName, final String mappingType,
|
||||
final String dateFormat) {
|
||||
if (getCrawlerMetadataNameMapping(name) != null) {
|
||||
return;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
final Map<String, Tuple3<String, String, String>> params =
|
||||
(Map<String, Tuple3<String, String, String>>) propMap.get(CRAWLER_METADATA_NAME_MAPPING);
|
||||
params.put(name, new Tuple3<>(fieldName, mappingType, dateFormat));
|
||||
}
|
||||
|
||||
String getSuggestPopularWordFields();
|
||||
|
||||
default String[] getSuggestPopularWordFieldsAsArray() {
|
||||
|
|
|
@ -228,7 +228,7 @@ crawler.hotthread.snapshots=10
|
|||
crawler.hotthread.threads=3
|
||||
crawler.hotthread.timeout=30s
|
||||
crawler.hotthread.type=cpu
|
||||
crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*
|
||||
crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.*
|
||||
crawler.metadata.name.mapping=\
|
||||
title=title:string\n\
|
||||
Title=title:string\n\
|
||||
|
|
Loading…
Add table
Reference in a new issue