diff --git a/src/main/java/org/codelibs/fess/Constants.java b/src/main/java/org/codelibs/fess/Constants.java index 3b95cdaa1..18ec21f5f 100644 --- a/src/main/java/org/codelibs/fess/Constants.java +++ b/src/main/java/org/codelibs/fess/Constants.java @@ -158,6 +158,8 @@ public class Constants extends CoreLibConstants { public static final String ISO_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; + public static final String DATE_OPTIONAL_TIME = "date_optional_time"; + public static final int DONE_STATUS = 9999; public static final String DEFAULT_IGNORE_FAILURE_TYPE = StringUtil.EMPTY; diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java index 8210017bf..3367ff2c8 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java @@ -19,7 +19,6 @@ import static org.codelibs.core.stream.StreamUtil.stream; import java.io.InputStream; import java.net.URLDecoder; -import java.time.temporal.TemporalAccessor; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -55,8 +54,8 @@ import org.codelibs.fess.helper.PathMappingHelper; import org.codelibs.fess.helper.PermissionHelper; import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.mylasta.direction.FessConfig; +import org.codelibs.fess.taglib.FessFunctions; import org.codelibs.fess.util.ComponentUtil; -import org.elasticsearch.common.joda.Joda; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -140,10 +139,15 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im } else if (Constants.MAPPING_TYPE_DOUBLE.equalsIgnoreCase(mapping.getValue2())) { dataMap.put(mapping.getValue1(), Double.parseDouble(values[0])); } else if (Constants.MAPPING_TYPE_DATE.equalsIgnoreCase(mapping.getValue2())) { - final String format = - StringUtil.isNotBlank(mapping.getValue3()) ? mapping.getValue3() : "date_optional_time"; - final TemporalAccessor dt = Joda.forPattern(format).parse(mapping.getValue2()); - dataMap.put(mapping.getValue1(), Joda.forPattern("date_optional_time").format(dt)); + final Date dt = + FessFunctions.parseDate(values[0], + StringUtil.isNotBlank(mapping.getValue3()) ? mapping.getValue3() + : Constants.DATE_OPTIONAL_TIME); + if (dt != null) { + dataMap.put(mapping.getValue1(), FessFunctions.formatDate(dt)); + } else { + logger.warn("Failed to parse " + mapping.toString()); + } } else { logger.warn("Unknown mapping type: {}={}", key, mapping); } @@ -271,9 +275,9 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im // content_length putResultDataBody(dataMap, fessConfig.getIndexFieldContentLength(), Long.toString(responseData.getContentLength())); // last_modified - final Date lastModified = responseData.getLastModified(); + final Date lastModified = getLastModified(dataMap, responseData); if (lastModified != null) { - putResultDataBody(dataMap, fessConfig.getIndexFieldLastModified(), lastModified); + dataMap.put(fessConfig.getIndexFieldLastModified(), lastModified); // overwrite // timestamp putResultDataBody(dataMap, fessConfig.getIndexFieldTimestamp(), lastModified); } else { @@ -331,6 +335,28 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im return dataMap; } + protected Date getLastModified(final Map dataMap, final ResponseData responseData) { + final Object lastModifiedObj = dataMap.get(fessConfig.getIndexFieldLastModified()); + if (lastModifiedObj instanceof Date) { + return (Date) lastModifiedObj; + } else if (lastModifiedObj instanceof String) { + final Date lastModified = FessFunctions.parseDate(lastModifiedObj.toString()); + if (lastModified != null) { + return lastModified; + } + } else if (lastModifiedObj instanceof String[]) { + final String[] lastModifieds = (String[]) lastModifiedObj; + if (lastModifieds.length > 0) { + final Date lastModified = FessFunctions.parseDate(lastModifieds[0]); + if (lastModified != null) { + return lastModified; + } + } + } + + return responseData.getLastModified(); + } + protected boolean hasTitle(final Map dataMap) { final Object titleObj = dataMap.get(fessConfig.getIndexFieldTitle()); if (titleObj != null) { @@ -366,7 +392,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im return new ExtractData(); } - private String getResourceName(final ResponseData responseData) { + protected String getResourceName(final ResponseData responseData) { String name = responseData.getUrl(); final String enc = responseData.getCharSet(); diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index 744799d6b..73db3b877 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -288,6 +288,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. title=title:string Title=title:string + last_modified=Last-Save-Date:date + last_modified=Last-Modified:date */ String CRAWLER_METADATA_NAME_MAPPING = "crawler.metadata.name.mapping"; @@ -2208,6 +2210,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction * Get the value for the key 'crawler.metadata.name.mapping'.
* The value is, e.g. title=title:string Title=title:string + last_modified=Last-Save-Date:date + last_modified=Last-Modified:date
* @return The value of found property. (NotNull: if not found, exception but basically no way) */ @@ -8615,7 +8619,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction defaultMap.put(FessConfig.CRAWLER_FAILURE_URL_STATUS_CODES, "404"); defaultMap.put(FessConfig.CRAWLER_SYSTEM_MONITOR_INTERVAL, "60"); defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES, "resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*"); - defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING, "title=title:string\nTitle=title:string\n"); + defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING, + "title=title:string\nTitle=title:string\nlast_modified=Last-Save-Date:date\nlast_modified=Last-Modified:date\n"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DIGEST_XPATH, "//META[@name='description']/@content"); diff --git a/src/main/java/org/codelibs/fess/taglib/FessFunctions.java b/src/main/java/org/codelibs/fess/taglib/FessFunctions.java index 86e400557..47be2ebac 100644 --- a/src/main/java/org/codelibs/fess/taglib/FessFunctions.java +++ b/src/main/java/org/codelibs/fess/taglib/FessFunctions.java @@ -23,7 +23,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.text.DecimalFormat; -import java.text.ParseException; import java.text.SimpleDateFormat; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; @@ -46,6 +45,7 @@ import org.codelibs.fess.Constants; import org.codelibs.fess.entity.FacetQueryView; import org.codelibs.fess.helper.ViewHelper; import org.codelibs.fess.util.ComponentUtil; +import org.elasticsearch.common.joda.Joda; import org.lastaflute.di.util.LdiURLUtil; import org.lastaflute.web.util.LaRequestUtil; import org.lastaflute.web.util.LaResponseUtil; @@ -113,7 +113,7 @@ public class FessFunctions { } public static Date parseDate(final String value) { - return parseDate(value, Constants.ISO_DATETIME_FORMAT); + return parseDate(value, Constants.DATE_OPTIONAL_TIME); } public static Date parseDate(final String value, final String format) { @@ -121,10 +121,9 @@ public class FessFunctions { return null; } try { - final SimpleDateFormat sdf = new SimpleDateFormat(format); - sdf.setTimeZone(Constants.TIMEZONE_UTC); - return sdf.parse(value); - } catch (final ParseException e) { + final long time = Joda.forPattern(format).parseMillis(value); + return new Date(time); + } catch (final Exception e) { return null; } } diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index 950281c6d..75c80cfe9 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -177,6 +177,8 @@ crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Co crawler.metadata.name.mapping=\ title=title:string\n\ Title=title:string\n\ +last_modified=Last-Save-Date:date\n\ +last_modified=Last-Modified:date\n\ # html crawler.document.html.content.xpath=//BODY diff --git a/src/test/java/org/codelibs/fess/taglib/FessFunctionsTest.java b/src/test/java/org/codelibs/fess/taglib/FessFunctionsTest.java new file mode 100644 index 000000000..6eb31c1bd --- /dev/null +++ b/src/test/java/org/codelibs/fess/taglib/FessFunctionsTest.java @@ -0,0 +1,45 @@ +/* + * Copyright 2012-2019 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.taglib; + +import java.util.Date; + +import org.codelibs.fess.unit.UnitFessTestCase; + +public class FessFunctionsTest extends UnitFessTestCase { + public void test_parseDate() { + Date date; + + date = FessFunctions.parseDate(""); + assertNull(date); + + date = FessFunctions.parseDate("2004-04-01T12:34:56.123Z"); + assertEquals("2004-04-01T12:34:56.123Z", FessFunctions.formatDate(date)); + + date = FessFunctions.parseDate("2004-04-01T12:34:56Z"); + assertEquals("2004-04-01T12:34:56.000Z", FessFunctions.formatDate(date)); + + date = FessFunctions.parseDate("2004-04-01T12:34Z"); + assertEquals("2004-04-01T12:34:00.000Z", FessFunctions.formatDate(date)); + + date = FessFunctions.parseDate("2004-04-01"); + assertEquals("2004-04-01T00:00:00.000Z", FessFunctions.formatDate(date)); + + date = FessFunctions.parseDate("2004-04-01T12:34:56.123+09:00"); + assertEquals("2004-04-01T03:34:56.123Z", FessFunctions.formatDate(date)); + + } +}