Переглянути джерело

fix #2019 last_modified metadata support

Shinsuke Sugaya 6 роки тому
батько
коміт
d38ce5982a

+ 2 - 0
src/main/java/org/codelibs/fess/Constants.java

@@ -158,6 +158,8 @@ public class Constants extends CoreLibConstants {
 
     public static final String ISO_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
 
+    public static final String DATE_OPTIONAL_TIME = "date_optional_time";
+
     public static final int DONE_STATUS = 9999;
 
     public static final String DEFAULT_IGNORE_FAILURE_TYPE = StringUtil.EMPTY;

+ 35 - 9
src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

@@ -19,7 +19,6 @@ import static org.codelibs.core.stream.StreamUtil.stream;
 
 import java.io.InputStream;
 import java.net.URLDecoder;
-import java.time.temporal.TemporalAccessor;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
@@ -55,8 +54,8 @@ import org.codelibs.fess.helper.PathMappingHelper;
 import org.codelibs.fess.helper.PermissionHelper;
 import org.codelibs.fess.helper.SystemHelper;
 import org.codelibs.fess.mylasta.direction.FessConfig;
+import org.codelibs.fess.taglib.FessFunctions;
 import org.codelibs.fess.util.ComponentUtil;
-import org.elasticsearch.common.joda.Joda;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -140,10 +139,15 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
                                         } else if (Constants.MAPPING_TYPE_DOUBLE.equalsIgnoreCase(mapping.getValue2())) {
                                             dataMap.put(mapping.getValue1(), Double.parseDouble(values[0]));
                                         } else if (Constants.MAPPING_TYPE_DATE.equalsIgnoreCase(mapping.getValue2())) {
-                                            final String format =
-                                                    StringUtil.isNotBlank(mapping.getValue3()) ? mapping.getValue3() : "date_optional_time";
-                                            final TemporalAccessor dt = Joda.forPattern(format).parse(mapping.getValue2());
-                                            dataMap.put(mapping.getValue1(), Joda.forPattern("date_optional_time").format(dt));
+                                            final Date dt =
+                                                    FessFunctions.parseDate(values[0],
+                                                            StringUtil.isNotBlank(mapping.getValue3()) ? mapping.getValue3()
+                                                                    : Constants.DATE_OPTIONAL_TIME);
+                                            if (dt != null) {
+                                                dataMap.put(mapping.getValue1(), FessFunctions.formatDate(dt));
+                                            } else {
+                                                logger.warn("Failed to parse " + mapping.toString());
+                                            }
                                         } else {
                                             logger.warn("Unknown mapping type: {}={}", key, mapping);
                                         }
@@ -271,9 +275,9 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
         // content_length
         putResultDataBody(dataMap, fessConfig.getIndexFieldContentLength(), Long.toString(responseData.getContentLength()));
         // last_modified
-        final Date lastModified = responseData.getLastModified();
+        final Date lastModified = getLastModified(dataMap, responseData);
         if (lastModified != null) {
-            putResultDataBody(dataMap, fessConfig.getIndexFieldLastModified(), lastModified);
+            dataMap.put(fessConfig.getIndexFieldLastModified(), lastModified); // overwrite
             // timestamp
             putResultDataBody(dataMap, fessConfig.getIndexFieldTimestamp(), lastModified);
         } else {
@@ -331,6 +335,28 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
         return dataMap;
     }
 
+    protected Date getLastModified(final Map<String, Object> dataMap, final ResponseData responseData) {
+        final Object lastModifiedObj = dataMap.get(fessConfig.getIndexFieldLastModified());
+        if (lastModifiedObj instanceof Date) {
+            return (Date) lastModifiedObj;
+        } else if (lastModifiedObj instanceof String) {
+            final Date lastModified = FessFunctions.parseDate(lastModifiedObj.toString());
+            if (lastModified != null) {
+                return lastModified;
+            }
+        } else if (lastModifiedObj instanceof String[]) {
+            final String[] lastModifieds = (String[]) lastModifiedObj;
+            if (lastModifieds.length > 0) {
+                final Date lastModified = FessFunctions.parseDate(lastModifieds[0]);
+                if (lastModified != null) {
+                    return lastModified;
+                }
+            }
+        }
+
+        return responseData.getLastModified();
+    }
+
     protected boolean hasTitle(final Map<String, Object> dataMap) {
         final Object titleObj = dataMap.get(fessConfig.getIndexFieldTitle());
         if (titleObj != null) {
@@ -366,7 +392,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
         return new ExtractData();
     }
 
-    private String getResourceName(final ResponseData responseData) {
+    protected String getResourceName(final ResponseData responseData) {
         String name = responseData.getUrl();
         final String enc = responseData.getCharSet();
 

+ 6 - 1
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -288,6 +288,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
 
     /** The key of the configuration. e.g. title=title:string
     Title=title:string
+    last_modified=Last-Save-Date:date
+    last_modified=Last-Modified:date
     */
     String CRAWLER_METADATA_NAME_MAPPING = "crawler.metadata.name.mapping";
 
@@ -2208,6 +2210,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      * Get the value for the key 'crawler.metadata.name.mapping'. <br>
      * The value is, e.g. title=title:string
     Title=title:string
+    last_modified=Last-Save-Date:date
+    last_modified=Last-Modified:date
     <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      */
@@ -8615,7 +8619,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             defaultMap.put(FessConfig.CRAWLER_FAILURE_URL_STATUS_CODES, "404");
             defaultMap.put(FessConfig.CRAWLER_SYSTEM_MONITOR_INTERVAL, "60");
             defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES, "resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*");
-            defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING, "title=title:string\nTitle=title:string\n");
+            defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING,
+                    "title=title:string\nTitle=title:string\nlast_modified=Last-Save-Date:date\nlast_modified=Last-Modified:date\n");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DIGEST_XPATH, "//META[@name='description']/@content");

+ 5 - 6
src/main/java/org/codelibs/fess/taglib/FessFunctions.java

@@ -23,7 +23,6 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.text.DecimalFormat;
-import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
@@ -46,6 +45,7 @@ import org.codelibs.fess.Constants;
 import org.codelibs.fess.entity.FacetQueryView;
 import org.codelibs.fess.helper.ViewHelper;
 import org.codelibs.fess.util.ComponentUtil;
+import org.elasticsearch.common.joda.Joda;
 import org.lastaflute.di.util.LdiURLUtil;
 import org.lastaflute.web.util.LaRequestUtil;
 import org.lastaflute.web.util.LaResponseUtil;
@@ -113,7 +113,7 @@ public class FessFunctions {
     }
 
     public static Date parseDate(final String value) {
-        return parseDate(value, Constants.ISO_DATETIME_FORMAT);
+        return parseDate(value, Constants.DATE_OPTIONAL_TIME);
     }
 
     public static Date parseDate(final String value, final String format) {
@@ -121,10 +121,9 @@ public class FessFunctions {
             return null;
         }
         try {
-            final SimpleDateFormat sdf = new SimpleDateFormat(format);
-            sdf.setTimeZone(Constants.TIMEZONE_UTC);
-            return sdf.parse(value);
-        } catch (final ParseException e) {
+            final long time = Joda.forPattern(format).parseMillis(value);
+            return new Date(time);
+        } catch (final Exception e) {
             return null;
         }
     }

+ 2 - 0
src/main/resources/fess_config.properties

@@ -177,6 +177,8 @@ crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Co
 crawler.metadata.name.mapping=\
 title=title:string\n\
 Title=title:string\n\
+last_modified=Last-Save-Date:date\n\
+last_modified=Last-Modified:date\n\
 
 # html
 crawler.document.html.content.xpath=//BODY

+ 45 - 0
src/test/java/org/codelibs/fess/taglib/FessFunctionsTest.java

@@ -0,0 +1,45 @@
+/*
+ * Copyright 2012-2019 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.taglib;
+
+import java.util.Date;
+
+import org.codelibs.fess.unit.UnitFessTestCase;
+
+public class FessFunctionsTest extends UnitFessTestCase {
+    public void test_parseDate() {
+        Date date;
+
+        date = FessFunctions.parseDate("");
+        assertNull(date);
+
+        date = FessFunctions.parseDate("2004-04-01T12:34:56.123Z");
+        assertEquals("2004-04-01T12:34:56.123Z", FessFunctions.formatDate(date));
+
+        date = FessFunctions.parseDate("2004-04-01T12:34:56Z");
+        assertEquals("2004-04-01T12:34:56.000Z", FessFunctions.formatDate(date));
+
+        date = FessFunctions.parseDate("2004-04-01T12:34Z");
+        assertEquals("2004-04-01T12:34:00.000Z", FessFunctions.formatDate(date));
+
+        date = FessFunctions.parseDate("2004-04-01");
+        assertEquals("2004-04-01T00:00:00.000Z", FessFunctions.formatDate(date));
+
+        date = FessFunctions.parseDate("2004-04-01T12:34:56.123+09:00");
+        assertEquals("2004-04-01T03:34:56.123Z", FessFunctions.formatDate(date));
+
+    }
+}