fix #2020 parse ModDate

This commit is contained in:
Shinsuke Sugaya 2019-02-15 06:45:09 +09:00
parent 0d774a8160
commit ae365b14c6
4 changed files with 18 additions and 2 deletions

View file

@ -290,6 +290,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
Title=title:string
Last-Save-Date=last_modified:date
Last-Modified=last_modified:date
ModDate=last_modified:pdf_date
*/
String CRAWLER_METADATA_NAME_MAPPING = "crawler.metadata.name.mapping";
@ -2212,6 +2213,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
Title=title:string
Last-Save-Date=last_modified:date
Last-Modified=last_modified:date
ModDate=last_modified:pdf_date
<br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
@ -8619,8 +8621,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.CRAWLER_FAILURE_URL_STATUS_CODES, "404");
defaultMap.put(FessConfig.CRAWLER_SYSTEM_MONITOR_INTERVAL, "60");
defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES, "resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*");
defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING,
"title=title:string\nTitle=title:string\nLast-Save-Date=last_modified:date\nLast-Modified=last_modified:date\n");
defaultMap
.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING,
"title=title:string\nTitle=title:string\nLast-Save-Date=last_modified:date\nLast-Modified=last_modified:date\nModDate=last_modified:pdf_date\n");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DIGEST_XPATH, "//META[@name='description']/@content");

View file

@ -27,6 +27,7 @@ import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Base64;
import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
import java.util.List;
@ -40,6 +41,7 @@ import java.util.stream.Collectors;
import javax.servlet.http.HttpServletRequest;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.pdfbox.util.DateConverter;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.Constants;
import org.codelibs.fess.entity.FacetQueryView;
@ -64,6 +66,8 @@ public class FessFunctions {
private static final String FACET_PREFIX = "facet.";
private static final String PDF_DATE = "pdf_date";
private static LoadingCache<String, Long> resourceHashCache = CacheBuilder.newBuilder().maximumSize(1000)
.expireAfterWrite(10, TimeUnit.MINUTES).build(new CacheLoader<String, Long>() {
@Override
@ -120,7 +124,13 @@ public class FessFunctions {
if (value == null) {
return null;
}
try {
if (PDF_DATE.equals(format)) {
final Calendar cal = DateConverter.toCalendar(value);
return cal != null ? cal.getTime() : null;
}
final long time = Joda.forPattern(format).parseMillis(value);
return new Date(time);
} catch (final Exception e) {

View file

@ -179,6 +179,7 @@ title=title:string\n\
Title=title:string\n\
Last-Save-Date=last_modified:date\n\
Last-Modified=last_modified:date\n\
ModDate=last_modified:pdf_date\n\
# html
crawler.document.html.content.xpath=//BODY

View file

@ -41,5 +41,7 @@ public class FessFunctionsTest extends UnitFessTestCase {
date = FessFunctions.parseDate("2004-04-01T12:34:56.123+09:00");
assertEquals("2004-04-01T03:34:56.123Z", FessFunctions.formatDate(date));
date = FessFunctions.parseDate("D:20040401033456-05'00'", "pdf_date");
assertEquals("2004-04-01T08:34:56.000Z", FessFunctions.formatDate(date));
}
}