Bläddra i källkod

fix #2821 Exclude X-FESS metadata from indexing and add transformation process for metadata inclusion.

Shinsuke Sugaya 1 år sedan
förälder
incheckning
ec33a25495

+ 5 - 5
src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

@@ -136,15 +136,15 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
                                 dataMap.put(mapping.getValue1(), Double.parseDouble(values[0]));
                                 dataMap.put(mapping.getValue1(), Double.parseDouble(values[0]));
                             } else if (Constants.MAPPING_TYPE_DATE.equalsIgnoreCase(mapping.getValue2())
                             } else if (Constants.MAPPING_TYPE_DATE.equalsIgnoreCase(mapping.getValue2())
                                     || Constants.MAPPING_TYPE_PDF_DATE.equalsIgnoreCase(mapping.getValue2())) {
                                     || Constants.MAPPING_TYPE_PDF_DATE.equalsIgnoreCase(mapping.getValue2())) {
-                                final String dateFormate;
+                                final String dateFormat;
                                 if (StringUtil.isNotBlank(mapping.getValue3())) {
                                 if (StringUtil.isNotBlank(mapping.getValue3())) {
-                                    dateFormate = mapping.getValue3();
+                                    dateFormat = mapping.getValue3();
                                 } else if (Constants.MAPPING_TYPE_PDF_DATE.equalsIgnoreCase(mapping.getValue2())) {
                                 } else if (Constants.MAPPING_TYPE_PDF_DATE.equalsIgnoreCase(mapping.getValue2())) {
-                                    dateFormate = mapping.getValue2();
+                                    dateFormat = Constants.MAPPING_TYPE_PDF_DATE;
                                 } else {
                                 } else {
-                                    dateFormate = Constants.DATE_OPTIONAL_TIME;
+                                    dateFormat = Constants.DATE_OPTIONAL_TIME;
                                 }
                                 }
-                                final Date dt = FessFunctions.parseDate(values[0], dateFormate);
+                                final Date dt = FessFunctions.parseDate(values[0], dateFormat);
                                 if (dt != null) {
                                 if (dt != null) {
                                     dataMap.put(mapping.getValue1(), FessFunctions.formatDate(dt));
                                     dataMap.put(mapping.getValue1(), FessFunctions.formatDate(dt));
                                 } else {
                                 } else {

+ 3 - 3
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -391,7 +391,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. cpu */
     /** The key of the configuration. e.g. cpu */
     String CRAWLER_HOTTHREAD_TYPE = "crawler.hotthread.type";
     String CRAWLER_HOTTHREAD_TYPE = "crawler.hotthread.type";
 
 
-    /** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.* */
+    /** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.* */
     String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes";
     String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes";
 
 
     /** The key of the configuration. e.g. title=title:string<br>
     /** The key of the configuration. e.g. title=title:string<br>
@@ -2926,7 +2926,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
 
 
     /**
     /**
      * Get the value for the key 'crawler.metadata.content.excludes'. <br>
      * Get the value for the key 'crawler.metadata.content.excludes'. <br>
-     * The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.* <br>
+     * The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.* <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      */
      */
     String getCrawlerMetadataContentExcludes();
     String getCrawlerMetadataContentExcludes();
@@ -10899,7 +10899,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TIMEOUT, "30s");
             defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TIMEOUT, "30s");
             defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TYPE, "cpu");
             defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TYPE, "cpu");
             defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES,
             defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES,
-                    "resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*");
+                    "resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.*");
             defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING, "title=title:string\nTitle=title:string\ndc:title=title:string\n");
             defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING, "title=title:string\nTitle=title:string\ndc:title=title:string\n");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang");

+ 12 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java

@@ -975,6 +975,18 @@ public interface FessProp {
         return params.get(name);
         return params.get(name);
     }
     }
 
 
+    default void addCrawlerMetadataNameMapping(final String name, final String fieldName, final String mappingType,
+            final String dateFormat) {
+        if (getCrawlerMetadataNameMapping(name) != null) {
+            return;
+        }
+
+        @SuppressWarnings("unchecked")
+      final  Map<String, Tuple3<String, String, String>> params =
+                (Map<String, Tuple3<String, String, String>>) propMap.get(CRAWLER_METADATA_NAME_MAPPING);
+        params.put(name, new Tuple3<>(fieldName, mappingType, dateFormat));
+    }
+
     String getSuggestPopularWordFields();
     String getSuggestPopularWordFields();
 
 
     default String[] getSuggestPopularWordFieldsAsArray() {
     default String[] getSuggestPopularWordFieldsAsArray() {

+ 1 - 1
src/main/resources/fess_config.properties

@@ -228,7 +228,7 @@ crawler.hotthread.snapshots=10
 crawler.hotthread.threads=3
 crawler.hotthread.threads=3
 crawler.hotthread.timeout=30s
 crawler.hotthread.timeout=30s
 crawler.hotthread.type=cpu
 crawler.hotthread.type=cpu
-crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*
+crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.*
 crawler.metadata.name.mapping=\
 crawler.metadata.name.mapping=\
 title=title:string\n\
 title=title:string\n\
 Title=title:string\n\
 Title=title:string\n\