fix #2019 last_modified metadata support
This commit is contained in:
parent
8eadcbb48b
commit
d38ce5982a
6 changed files with 95 additions and 16 deletions
|
@ -158,6 +158,8 @@ public class Constants extends CoreLibConstants {
|
|||
|
||||
public static final String ISO_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
|
||||
|
||||
public static final String DATE_OPTIONAL_TIME = "date_optional_time";
|
||||
|
||||
public static final int DONE_STATUS = 9999;
|
||||
|
||||
public static final String DEFAULT_IGNORE_FAILURE_TYPE = StringUtil.EMPTY;
|
||||
|
|
|
@ -19,7 +19,6 @@ import static org.codelibs.core.stream.StreamUtil.stream;
|
|||
|
||||
import java.io.InputStream;
|
||||
import java.net.URLDecoder;
|
||||
import java.time.temporal.TemporalAccessor;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
|
@ -55,8 +54,8 @@ import org.codelibs.fess.helper.PathMappingHelper;
|
|||
import org.codelibs.fess.helper.PermissionHelper;
|
||||
import org.codelibs.fess.helper.SystemHelper;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.taglib.FessFunctions;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.elasticsearch.common.joda.Joda;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -140,10 +139,15 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
} else if (Constants.MAPPING_TYPE_DOUBLE.equalsIgnoreCase(mapping.getValue2())) {
|
||||
dataMap.put(mapping.getValue1(), Double.parseDouble(values[0]));
|
||||
} else if (Constants.MAPPING_TYPE_DATE.equalsIgnoreCase(mapping.getValue2())) {
|
||||
final String format =
|
||||
StringUtil.isNotBlank(mapping.getValue3()) ? mapping.getValue3() : "date_optional_time";
|
||||
final TemporalAccessor dt = Joda.forPattern(format).parse(mapping.getValue2());
|
||||
dataMap.put(mapping.getValue1(), Joda.forPattern("date_optional_time").format(dt));
|
||||
final Date dt =
|
||||
FessFunctions.parseDate(values[0],
|
||||
StringUtil.isNotBlank(mapping.getValue3()) ? mapping.getValue3()
|
||||
: Constants.DATE_OPTIONAL_TIME);
|
||||
if (dt != null) {
|
||||
dataMap.put(mapping.getValue1(), FessFunctions.formatDate(dt));
|
||||
} else {
|
||||
logger.warn("Failed to parse " + mapping.toString());
|
||||
}
|
||||
} else {
|
||||
logger.warn("Unknown mapping type: {}={}", key, mapping);
|
||||
}
|
||||
|
@ -271,9 +275,9 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
// content_length
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldContentLength(), Long.toString(responseData.getContentLength()));
|
||||
// last_modified
|
||||
final Date lastModified = responseData.getLastModified();
|
||||
final Date lastModified = getLastModified(dataMap, responseData);
|
||||
if (lastModified != null) {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldLastModified(), lastModified);
|
||||
dataMap.put(fessConfig.getIndexFieldLastModified(), lastModified); // overwrite
|
||||
// timestamp
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldTimestamp(), lastModified);
|
||||
} else {
|
||||
|
@ -331,6 +335,28 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
return dataMap;
|
||||
}
|
||||
|
||||
protected Date getLastModified(final Map<String, Object> dataMap, final ResponseData responseData) {
|
||||
final Object lastModifiedObj = dataMap.get(fessConfig.getIndexFieldLastModified());
|
||||
if (lastModifiedObj instanceof Date) {
|
||||
return (Date) lastModifiedObj;
|
||||
} else if (lastModifiedObj instanceof String) {
|
||||
final Date lastModified = FessFunctions.parseDate(lastModifiedObj.toString());
|
||||
if (lastModified != null) {
|
||||
return lastModified;
|
||||
}
|
||||
} else if (lastModifiedObj instanceof String[]) {
|
||||
final String[] lastModifieds = (String[]) lastModifiedObj;
|
||||
if (lastModifieds.length > 0) {
|
||||
final Date lastModified = FessFunctions.parseDate(lastModifieds[0]);
|
||||
if (lastModified != null) {
|
||||
return lastModified;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return responseData.getLastModified();
|
||||
}
|
||||
|
||||
protected boolean hasTitle(final Map<String, Object> dataMap) {
|
||||
final Object titleObj = dataMap.get(fessConfig.getIndexFieldTitle());
|
||||
if (titleObj != null) {
|
||||
|
@ -366,7 +392,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
return new ExtractData();
|
||||
}
|
||||
|
||||
private String getResourceName(final ResponseData responseData) {
|
||||
protected String getResourceName(final ResponseData responseData) {
|
||||
String name = responseData.getUrl();
|
||||
final String enc = responseData.getCharSet();
|
||||
|
||||
|
|
|
@ -288,6 +288,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
|
||||
/** The key of the configuration. e.g. title=title:string
|
||||
Title=title:string
|
||||
last_modified=Last-Save-Date:date
|
||||
last_modified=Last-Modified:date
|
||||
*/
|
||||
String CRAWLER_METADATA_NAME_MAPPING = "crawler.metadata.name.mapping";
|
||||
|
||||
|
@ -2208,6 +2210,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
* Get the value for the key 'crawler.metadata.name.mapping'. <br>
|
||||
* The value is, e.g. title=title:string
|
||||
Title=title:string
|
||||
last_modified=Last-Save-Date:date
|
||||
last_modified=Last-Modified:date
|
||||
<br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
|
@ -8615,7 +8619,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
defaultMap.put(FessConfig.CRAWLER_FAILURE_URL_STATUS_CODES, "404");
|
||||
defaultMap.put(FessConfig.CRAWLER_SYSTEM_MONITOR_INTERVAL, "60");
|
||||
defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES, "resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*");
|
||||
defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING, "title=title:string\nTitle=title:string\n");
|
||||
defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING,
|
||||
"title=title:string\nTitle=title:string\nlast_modified=Last-Save-Date:date\nlast_modified=Last-Modified:date\n");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DIGEST_XPATH, "//META[@name='description']/@content");
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
@ -46,6 +45,7 @@ import org.codelibs.fess.Constants;
|
|||
import org.codelibs.fess.entity.FacetQueryView;
|
||||
import org.codelibs.fess.helper.ViewHelper;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.elasticsearch.common.joda.Joda;
|
||||
import org.lastaflute.di.util.LdiURLUtil;
|
||||
import org.lastaflute.web.util.LaRequestUtil;
|
||||
import org.lastaflute.web.util.LaResponseUtil;
|
||||
|
@ -113,7 +113,7 @@ public class FessFunctions {
|
|||
}
|
||||
|
||||
public static Date parseDate(final String value) {
|
||||
return parseDate(value, Constants.ISO_DATETIME_FORMAT);
|
||||
return parseDate(value, Constants.DATE_OPTIONAL_TIME);
|
||||
}
|
||||
|
||||
public static Date parseDate(final String value, final String format) {
|
||||
|
@ -121,10 +121,9 @@ public class FessFunctions {
|
|||
return null;
|
||||
}
|
||||
try {
|
||||
final SimpleDateFormat sdf = new SimpleDateFormat(format);
|
||||
sdf.setTimeZone(Constants.TIMEZONE_UTC);
|
||||
return sdf.parse(value);
|
||||
} catch (final ParseException e) {
|
||||
final long time = Joda.forPattern(format).parseMillis(value);
|
||||
return new Date(time);
|
||||
} catch (final Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -177,6 +177,8 @@ crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Co
|
|||
crawler.metadata.name.mapping=\
|
||||
title=title:string\n\
|
||||
Title=title:string\n\
|
||||
last_modified=Last-Save-Date:date\n\
|
||||
last_modified=Last-Modified:date\n\
|
||||
|
||||
# html
|
||||
crawler.document.html.content.xpath=//BODY
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Copyright 2012-2019 CodeLibs Project and the Others.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific language
|
||||
* governing permissions and limitations under the License.
|
||||
*/
|
||||
package org.codelibs.fess.taglib;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
import org.codelibs.fess.unit.UnitFessTestCase;
|
||||
|
||||
public class FessFunctionsTest extends UnitFessTestCase {
|
||||
public void test_parseDate() {
|
||||
Date date;
|
||||
|
||||
date = FessFunctions.parseDate("");
|
||||
assertNull(date);
|
||||
|
||||
date = FessFunctions.parseDate("2004-04-01T12:34:56.123Z");
|
||||
assertEquals("2004-04-01T12:34:56.123Z", FessFunctions.formatDate(date));
|
||||
|
||||
date = FessFunctions.parseDate("2004-04-01T12:34:56Z");
|
||||
assertEquals("2004-04-01T12:34:56.000Z", FessFunctions.formatDate(date));
|
||||
|
||||
date = FessFunctions.parseDate("2004-04-01T12:34Z");
|
||||
assertEquals("2004-04-01T12:34:00.000Z", FessFunctions.formatDate(date));
|
||||
|
||||
date = FessFunctions.parseDate("2004-04-01");
|
||||
assertEquals("2004-04-01T00:00:00.000Z", FessFunctions.formatDate(date));
|
||||
|
||||
date = FessFunctions.parseDate("2004-04-01T12:34:56.123+09:00");
|
||||
assertEquals("2004-04-01T03:34:56.123Z", FessFunctions.formatDate(date));
|
||||
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue