fix #2020 parse ModDate
This commit is contained in:
parent
0d774a8160
commit
ae365b14c6
4 changed files with 18 additions and 2 deletions
|
@ -290,6 +290,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
Title=title:string
|
||||
Last-Save-Date=last_modified:date
|
||||
Last-Modified=last_modified:date
|
||||
ModDate=last_modified:pdf_date
|
||||
*/
|
||||
String CRAWLER_METADATA_NAME_MAPPING = "crawler.metadata.name.mapping";
|
||||
|
||||
|
@ -2212,6 +2213,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
Title=title:string
|
||||
Last-Save-Date=last_modified:date
|
||||
Last-Modified=last_modified:date
|
||||
ModDate=last_modified:pdf_date
|
||||
<br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
|
@ -8619,8 +8621,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
defaultMap.put(FessConfig.CRAWLER_FAILURE_URL_STATUS_CODES, "404");
|
||||
defaultMap.put(FessConfig.CRAWLER_SYSTEM_MONITOR_INTERVAL, "60");
|
||||
defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES, "resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*");
|
||||
defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING,
|
||||
"title=title:string\nTitle=title:string\nLast-Save-Date=last_modified:date\nLast-Modified=last_modified:date\n");
|
||||
defaultMap
|
||||
.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING,
|
||||
"title=title:string\nTitle=title:string\nLast-Save-Date=last_modified:date\nLast-Modified=last_modified:date\nModDate=last_modified:pdf_date\n");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DIGEST_XPATH, "//META[@name='description']/@content");
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.text.SimpleDateFormat;
|
|||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Base64;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.Enumeration;
|
||||
import java.util.List;
|
||||
|
@ -40,6 +41,7 @@ import java.util.stream.Collectors;
|
|||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.apache.commons.text.StringEscapeUtils;
|
||||
import org.apache.pdfbox.util.DateConverter;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.fess.Constants;
|
||||
import org.codelibs.fess.entity.FacetQueryView;
|
||||
|
@ -64,6 +66,8 @@ public class FessFunctions {
|
|||
|
||||
private static final String FACET_PREFIX = "facet.";
|
||||
|
||||
private static final String PDF_DATE = "pdf_date";
|
||||
|
||||
private static LoadingCache<String, Long> resourceHashCache = CacheBuilder.newBuilder().maximumSize(1000)
|
||||
.expireAfterWrite(10, TimeUnit.MINUTES).build(new CacheLoader<String, Long>() {
|
||||
@Override
|
||||
|
@ -120,7 +124,13 @@ public class FessFunctions {
|
|||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
if (PDF_DATE.equals(format)) {
|
||||
final Calendar cal = DateConverter.toCalendar(value);
|
||||
return cal != null ? cal.getTime() : null;
|
||||
}
|
||||
|
||||
final long time = Joda.forPattern(format).parseMillis(value);
|
||||
return new Date(time);
|
||||
} catch (final Exception e) {
|
||||
|
|
|
@ -179,6 +179,7 @@ title=title:string\n\
|
|||
Title=title:string\n\
|
||||
Last-Save-Date=last_modified:date\n\
|
||||
Last-Modified=last_modified:date\n\
|
||||
ModDate=last_modified:pdf_date\n\
|
||||
|
||||
# html
|
||||
crawler.document.html.content.xpath=//BODY
|
||||
|
|
|
@ -41,5 +41,7 @@ public class FessFunctionsTest extends UnitFessTestCase {
|
|||
date = FessFunctions.parseDate("2004-04-01T12:34:56.123+09:00");
|
||||
assertEquals("2004-04-01T03:34:56.123Z", FessFunctions.formatDate(date));
|
||||
|
||||
date = FessFunctions.parseDate("D:20040401033456-05'00'", "pdf_date");
|
||||
assertEquals("2004-04-01T08:34:56.000Z", FessFunctions.formatDate(date));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue