improve cache handling, remove unused jsps, update lastaflute
This commit is contained in:
parent
8c1369fd8f
commit
3c1bb626fd
23 changed files with 763 additions and 328 deletions
|
@ -41,6 +41,7 @@ map:{
|
|||
; fess = map:{
|
||||
; path = ..
|
||||
; freeGenList = list:{ env ; config ; label ; message ; mail ; template ; jsp ; doc }
|
||||
; configPluginInterface = org.codelibs.fess.mylasta.direction.FessProp
|
||||
; propertiesHtmlList = list:{ env ; config ; label ; message }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,6 +45,7 @@ import org.codelibs.fess.crawler.entity.UrlQueue;
|
|||
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
|
||||
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
|
||||
import org.codelibs.fess.crawler.extractor.Extractor;
|
||||
import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer;
|
||||
import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
|
||||
|
@ -57,36 +58,20 @@ import org.codelibs.fess.helper.SambaHelper;
|
|||
import org.codelibs.fess.helper.SystemHelper;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import jcifs.smb.ACE;
|
||||
import jcifs.smb.SID;
|
||||
|
||||
public abstract class AbstractFessFileTransformer extends AbstractFessXpathTransformer {
|
||||
private static final Logger logger = LoggerFactory // NOPMD
|
||||
.getLogger(AbstractFessFileTransformer.class);
|
||||
public abstract class AbstractFessFileTransformer extends AbstractTransformer implements FessTransformer {
|
||||
|
||||
public String encoding = null;
|
||||
|
||||
public String noTitleLabel = "No title.";
|
||||
|
||||
public int abbreviationMarginLength = 10;
|
||||
|
||||
public boolean ignoreEmptyContent = false;
|
||||
|
||||
public int maxTitleLength = 100;
|
||||
|
||||
public int maxDigestLength = 200;
|
||||
|
||||
public boolean appendMetaContentToContent = true;
|
||||
|
||||
public boolean appendBodyContentToContent = true;
|
||||
protected String charsetName = Constants.UTF_8;
|
||||
|
||||
public Map<String, String> parentEncodingMap = Collections.synchronizedMap(new LruHashMap<String, String>(1000));
|
||||
|
||||
protected Map<String, String> metaContentMapping;
|
||||
|
||||
protected FessConfig fessConfig;
|
||||
|
||||
protected abstract Extractor getExtractor(ResponseData responseData);
|
||||
|
||||
@Override
|
||||
|
@ -109,11 +94,11 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
try {
|
||||
final ExtractData extractData = extractor.getText(in, params);
|
||||
content = extractData.getContent();
|
||||
if (ignoreEmptyContent && StringUtil.isBlank(content)) {
|
||||
if (fessConfig.isCrawlerDocumentFileIgnoreEmptyContent() && StringUtil.isBlank(content)) {
|
||||
return null;
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("ExtractData: " + extractData);
|
||||
if (getLogger().isDebugEnabled()) {
|
||||
getLogger().debug("ExtractData: " + extractData);
|
||||
}
|
||||
// meta
|
||||
for (final String key : extractData.getKeySet()) {
|
||||
|
@ -191,10 +176,10 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
putResultDataBody(dataMap, fessConfig.getIndexFieldSegment(), sessionId);
|
||||
// content
|
||||
final StringBuilder buf = new StringBuilder(content.length() + 1000);
|
||||
if (appendBodyContentToContent) {
|
||||
if (fessConfig.isCrawlerDocumentFileAppendBodyContent()) {
|
||||
buf.append(content);
|
||||
}
|
||||
if (appendMetaContentToContent) {
|
||||
if (fessConfig.isCrawlerDocumentFileAppendMetaContent()) {
|
||||
if (buf.length() > 0) {
|
||||
buf.append(' ');
|
||||
}
|
||||
|
@ -206,23 +191,29 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
} else {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), StringUtil.EMPTY);
|
||||
}
|
||||
if (Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache()))
|
||||
|| fessConfig.isCrawlerDocumentCacheEnable()) {
|
||||
final String cache = content.trim().replaceAll("[ \\t\\x0B\\f]+", " ");
|
||||
// text cache
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldCache(), cache);
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldHasCache(), Constants.TRUE);
|
||||
if ((Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache())) || fessConfig
|
||||
.isCrawlerDocumentCacheEnable()) && fessConfig.isSupportedDocumentCacheMimetypes(mimeType)) {
|
||||
if (responseData.getContentLength() > 0
|
||||
&& responseData.getContentLength() <= fessConfig.getCrawlerDocumentCacheMaxSizeAsInteger().longValue()) {
|
||||
|
||||
final String cache = content.trim().replaceAll("[ \\t\\x0B\\f]+", " ");
|
||||
// text cache
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldCache(), cache);
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldHasCache(), Constants.TRUE);
|
||||
}
|
||||
}
|
||||
// digest
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldDigest(),
|
||||
Constants.DIGEST_PREFIX + abbreviate(normalizeContent(content), maxDigestLength));
|
||||
Constants.DIGEST_PREFIX
|
||||
+ abbreviate(normalizeContent(content), fessConfig.getCrawlerDocumentFileMaxDigestLengthAsInteger()));
|
||||
// title
|
||||
if (!dataMap.containsKey(fessConfig.getIndexFieldTitle())) {
|
||||
if (url.endsWith("/")) {
|
||||
if (StringUtil.isNotBlank(content)) {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(), abbreviate(body, maxTitleLength));
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(),
|
||||
abbreviate(body, fessConfig.getCrawlerDocumentFileMaxTitleLengthAsInteger()));
|
||||
} else {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(), noTitleLabel);
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(), fessConfig.getCrawlerDocumentFileNoTitleLabel());
|
||||
}
|
||||
} else {
|
||||
final String u = decodeUrlAsName(url, url.startsWith("file:"));
|
||||
|
@ -235,9 +226,9 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
}
|
||||
}
|
||||
// host
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldHost(), getHost(url));
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldHost(), getHostOnFile(url));
|
||||
// site
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldSite(), getSite(url, urlEncoding));
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldSite(), getSiteOnFile(url, urlEncoding));
|
||||
// url
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldUrl(), url);
|
||||
// created
|
||||
|
@ -287,8 +278,8 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
final SID sid = item.getSID();
|
||||
roleTypeList.add(sambaHelper.getAccountId(sid));
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("smbUrl:" + responseData.getUrl() + " roleType:" + roleTypeList.toString());
|
||||
if (getLogger().isDebugEnabled()) {
|
||||
getLogger().debug("smbUrl:" + responseData.getUrl() + " roleType:" + roleTypeList.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -335,7 +326,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
protected String abbreviate(final String str, final int maxWidth) {
|
||||
String newStr = StringUtils.abbreviate(str, maxWidth);
|
||||
try {
|
||||
if (newStr.getBytes(Constants.UTF_8).length > maxWidth + abbreviationMarginLength) {
|
||||
if (newStr.getBytes(Constants.UTF_8).length > maxWidth + fessConfig.getCrawlerDocumentFileAbbreviationMarginLengthAsInteger()) {
|
||||
newStr = StringUtils.abbreviate(str, maxWidth / 2);
|
||||
}
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
|
@ -370,7 +361,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
}
|
||||
|
||||
String enc = Constants.UTF_8;
|
||||
if (encoding == null) {
|
||||
if (StringUtil.isBlank(fessConfig.getCrawlerDocumentFileNameEncoding())) {
|
||||
final UrlQueue<?> urlQueue = CrawlingParameterUtil.getUrlQueue();
|
||||
if (urlQueue != null) {
|
||||
final String parentUrl = urlQueue.getParentUrl();
|
||||
|
@ -385,7 +376,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
}
|
||||
}
|
||||
} else {
|
||||
enc = encoding;
|
||||
enc = fessConfig.getCrawlerDocumentFileNameEncoding();
|
||||
}
|
||||
|
||||
final String escapedUrl = escapePlus ? url.replace("+", "%2B") : url;
|
||||
|
@ -415,8 +406,7 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getHost(final String url) {
|
||||
protected String getHostOnFile(final String url) {
|
||||
if (StringUtil.isBlank(url)) {
|
||||
return StringUtil.EMPTY; // empty
|
||||
}
|
||||
|
@ -435,30 +425,29 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
return "localhost";
|
||||
}
|
||||
|
||||
return super.getHost(url);
|
||||
return getHost(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getSite(final String url, final String encoding) {
|
||||
protected String getSiteOnFile(final String url, final String encoding) {
|
||||
if (StringUtil.isBlank(url)) {
|
||||
return StringUtil.EMPTY; // empty
|
||||
}
|
||||
|
||||
if (url.startsWith("file:////")) {
|
||||
final String value = decodeUrlAsName(url.substring(9), true);
|
||||
return StringUtils.abbreviate("\\\\" + value.replace('/', '\\'), maxSiteLength);
|
||||
return StringUtils.abbreviate("\\\\" + value.replace('/', '\\'), getMaxSiteLength());
|
||||
} else if (url.startsWith("file:")) {
|
||||
final String value = decodeUrlAsName(url.substring(5), true);
|
||||
if (value.length() > 2 && value.charAt(2) == ':') {
|
||||
// Windows
|
||||
return StringUtils.abbreviate(value.substring(1).replace('/', '\\'), maxSiteLength);
|
||||
return StringUtils.abbreviate(value.substring(1).replace('/', '\\'), getMaxSiteLength());
|
||||
} else {
|
||||
// Unix
|
||||
return StringUtils.abbreviate(value, maxSiteLength);
|
||||
return StringUtils.abbreviate(value, getMaxSiteLength());
|
||||
}
|
||||
}
|
||||
|
||||
return super.getSite(url, encoding);
|
||||
return getSite(url, encoding);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -480,4 +469,5 @@ public abstract class AbstractFessFileTransformer extends AbstractFessXpathTrans
|
|||
}
|
||||
metaContentMapping.put(metaname, dynamicField);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -15,10 +15,13 @@
|
|||
*/
|
||||
package org.codelibs.fess.crawler.transformer;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.codelibs.fess.crawler.entity.ResponseData;
|
||||
import org.codelibs.fess.crawler.extractor.Extractor;
|
||||
import org.codelibs.fess.crawler.extractor.ExtractorFactory;
|
||||
import org.codelibs.fess.exception.FessSystemException;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -26,6 +29,21 @@ import org.slf4j.LoggerFactory;
|
|||
public class FessFileTransformer extends AbstractFessFileTransformer {
|
||||
private static final Logger logger = LoggerFactory.getLogger(FessFileTransformer.class);
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
fessConfig = ComponentUtil.getFessConfig();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FessConfig getFessConfig() {
|
||||
return fessConfig;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Logger getLogger() {
|
||||
return logger;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Extractor getExtractor(final ResponseData responseData) {
|
||||
final ExtractorFactory extractorFactory = ComponentUtil.getExtractorFactory();
|
||||
|
|
|
@ -15,12 +15,35 @@
|
|||
*/
|
||||
package org.codelibs.fess.crawler.transformer;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.codelibs.fess.crawler.entity.ResponseData;
|
||||
import org.codelibs.fess.crawler.extractor.Extractor;
|
||||
import org.codelibs.fess.exception.FessSystemException;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.lastaflute.di.core.SingletonLaContainer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class FessTikaTransformer extends AbstractFessFileTransformer {
|
||||
private static final Logger logger = LoggerFactory.getLogger(FessTikaTransformer.class);
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
fessConfig = ComponentUtil.getFessConfig();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FessConfig getFessConfig() {
|
||||
return fessConfig;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Logger getLogger() {
|
||||
return logger;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Extractor getExtractor(final ResponseData responseData) {
|
||||
final Extractor extractor = SingletonLaContainer.getComponent("tikaExtractor");
|
||||
|
|
|
@ -22,29 +22,20 @@ import java.util.Map;
|
|||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.fess.crawler.transformer.impl.XpathTransformer;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import groovy.lang.Binding;
|
||||
import groovy.lang.GroovyShell;
|
||||
|
||||
public abstract class AbstractFessXpathTransformer extends XpathTransformer {
|
||||
private static final Logger logger = LoggerFactory.getLogger(AbstractFessXpathTransformer.class);
|
||||
public interface FessTransformer {
|
||||
|
||||
public int maxSiteLength = 50;
|
||||
FessConfig getFessConfig();
|
||||
|
||||
public String unknownHostname = "unknown";
|
||||
Logger getLogger();
|
||||
|
||||
public String siteEncoding;
|
||||
|
||||
public boolean replaceSiteEncodingWhenEnglish = false;
|
||||
|
||||
public boolean appendResultData = true;
|
||||
|
||||
protected String getHost(final String u) {
|
||||
public default String getHost(final String u) {
|
||||
if (StringUtil.isBlank(u)) {
|
||||
return StringUtil.EMPTY; // empty
|
||||
}
|
||||
|
@ -63,13 +54,13 @@ public abstract class AbstractFessXpathTransformer extends XpathTransformer {
|
|||
}
|
||||
|
||||
if (url.equals(originalUrl)) {
|
||||
return unknownHostname;
|
||||
return getFessConfig().getCrawlerDocumentUnknownHostname();
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
protected String getSite(final String u, final String encoding) {
|
||||
public default String getSite(final String u, final String encoding) {
|
||||
if (StringUtil.isBlank(u)) {
|
||||
return StringUtil.EMPTY; // empty
|
||||
}
|
||||
|
@ -87,15 +78,15 @@ public abstract class AbstractFessXpathTransformer extends XpathTransformer {
|
|||
|
||||
if (encoding != null) {
|
||||
String enc;
|
||||
if (siteEncoding != null) {
|
||||
if (replaceSiteEncodingWhenEnglish) {
|
||||
if (StringUtil.isNotBlank(getFessConfig().getCrawlerDocumentSiteEncoding())) {
|
||||
if (getFessConfig().isCrawlerDocumentUseSiteEncodingOnEnglish()) {
|
||||
if ("ISO-8859-1".equalsIgnoreCase(encoding) || "US-ASCII".equalsIgnoreCase(encoding)) {
|
||||
enc = siteEncoding;
|
||||
enc = getFessConfig().getCrawlerDocumentSiteEncoding();
|
||||
} else {
|
||||
enc = encoding;
|
||||
}
|
||||
} else {
|
||||
enc = siteEncoding;
|
||||
enc = getFessConfig().getCrawlerDocumentSiteEncoding();
|
||||
}
|
||||
} else {
|
||||
enc = encoding;
|
||||
|
@ -106,39 +97,35 @@ public abstract class AbstractFessXpathTransformer extends XpathTransformer {
|
|||
} catch (final Exception e) {}
|
||||
}
|
||||
|
||||
return StringUtils.abbreviate(url, maxSiteLength);
|
||||
return StringUtils.abbreviate(url, getMaxSiteLength());
|
||||
}
|
||||
|
||||
protected String normalizeContent(final String content) {
|
||||
public default String normalizeContent(final String content) {
|
||||
if (content == null) {
|
||||
return StringUtil.EMPTY; // empty
|
||||
}
|
||||
return content.replaceAll("\\s+", " ");
|
||||
}
|
||||
|
||||
protected void putResultDataBody(final Map<String, Object> dataMap, final String key, final Object value) {
|
||||
public default void putResultDataBody(final Map<String, Object> dataMap, final String key, final Object value) {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
if (fessConfig.getIndexFieldUrl().equals(key)) {
|
||||
dataMap.put(key, value);
|
||||
} else if (dataMap.containsKey(key)) {
|
||||
if (appendResultData) {
|
||||
if (getFessConfig().isCrawlerDocumentAppendData()) {
|
||||
final Object oldValue = dataMap.get(key);
|
||||
if (key.endsWith("_m")) {
|
||||
final Object[] oldValues = (Object[]) oldValue;
|
||||
if (value.getClass().isArray()) {
|
||||
final Object[] newValues = (Object[]) value;
|
||||
final Object[] values = Arrays.copyOf(oldValues, oldValues.length + newValues.length);
|
||||
for (int i = 0; i < newValues.length; i++) {
|
||||
values[values.length - 1 + i] = newValues[i];
|
||||
}
|
||||
dataMap.put(key, values);
|
||||
} else {
|
||||
final Object[] values = Arrays.copyOf(oldValues, oldValues.length + 1);
|
||||
values[values.length - 1] = value;
|
||||
dataMap.put(key, values);
|
||||
final Object[] oldValues = (Object[]) oldValue;
|
||||
if (value.getClass().isArray()) {
|
||||
final Object[] newValues = (Object[]) value;
|
||||
final Object[] values = Arrays.copyOf(oldValues, oldValues.length + newValues.length);
|
||||
for (int i = 0; i < newValues.length; i++) {
|
||||
values[values.length - 1 + i] = newValues[i];
|
||||
}
|
||||
dataMap.put(key, values);
|
||||
} else {
|
||||
dataMap.put(key, oldValue + " " + value);
|
||||
final Object[] values = Arrays.copyOf(oldValues, oldValues.length + 1);
|
||||
values[values.length - 1] = value;
|
||||
dataMap.put(key, values);
|
||||
}
|
||||
} else {
|
||||
dataMap.put(key, value);
|
||||
|
@ -148,7 +135,8 @@ public abstract class AbstractFessXpathTransformer extends XpathTransformer {
|
|||
}
|
||||
}
|
||||
|
||||
protected void putResultDataWithTemplate(final Map<String, Object> dataMap, final String key, final Object value, final String template) {
|
||||
public default void putResultDataWithTemplate(final Map<String, Object> dataMap, final String key, final Object value,
|
||||
final String template) {
|
||||
Object target = value;
|
||||
if (template != null) {
|
||||
final Map<String, Object> paramMap = new HashMap<>(dataMap.size() + 1);
|
||||
|
@ -173,7 +161,7 @@ public abstract class AbstractFessXpathTransformer extends XpathTransformer {
|
|||
}
|
||||
}
|
||||
|
||||
protected String evaluateValue(final String template, final Map<String, Object> paramMap) {
|
||||
public default String evaluateValue(final String template, final Map<String, Object> paramMap) {
|
||||
if (StringUtil.isEmpty(template)) {
|
||||
return StringUtil.EMPTY;
|
||||
}
|
||||
|
@ -185,8 +173,13 @@ public abstract class AbstractFessXpathTransformer extends XpathTransformer {
|
|||
}
|
||||
return value.toString();
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Invalid value format: " + template, e);
|
||||
getLogger().warn("Invalid value format: " + template, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public default int getMaxSiteLength() {
|
||||
return getFessConfig().getCrawlerDocumentMaxSiteLengthAsInteger();
|
||||
}
|
||||
|
||||
}
|
|
@ -29,6 +29,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import javax.xml.transform.TransformerException;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
@ -47,6 +48,7 @@ import org.codelibs.fess.crawler.entity.UrlQueue;
|
|||
import org.codelibs.fess.crawler.exception.ChildUrlsException;
|
||||
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
|
||||
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
|
||||
import org.codelibs.fess.crawler.transformer.impl.XpathTransformer;
|
||||
import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
|
||||
import org.codelibs.fess.crawler.util.ResponseDataUtil;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
|
||||
|
@ -68,30 +70,31 @@ import org.w3c.dom.Node;
|
|||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
||||
public class FessXpathTransformer extends XpathTransformer implements FessTransformer {
|
||||
private static final Logger logger = LoggerFactory.getLogger(FessXpathTransformer.class);
|
||||
|
||||
private static final int UTF8_BOM_SIZE = 3;
|
||||
|
||||
public String cacheXpath = "//BODY";
|
||||
|
||||
public String contentXpath = "//BODY";
|
||||
|
||||
public String langXpath = "//HTML/@lang";
|
||||
|
||||
public String digestXpath = "//META[@name='description']/@content";
|
||||
|
||||
public String canonicalXpath = "//LINK[@rel='canonical']/@href";
|
||||
|
||||
public List<String> prunedTagList = new ArrayList<String>();
|
||||
|
||||
public boolean prunedCacheContent = true;
|
||||
|
||||
public int maxDigestLength = 200;
|
||||
public Map<String, String> convertUrlMap = new HashMap<>();
|
||||
|
||||
public int maxCacheLength = 2621440; // 2.5Mbytes
|
||||
protected FessConfig fessConfig;
|
||||
|
||||
public Map<String, String> convertUrlMap = new HashMap<String, String>();
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
fessConfig = ComponentUtil.getFessConfig();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FessConfig getFessConfig() {
|
||||
return fessConfig;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Logger getLogger() {
|
||||
return logger;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void storeData(final ResponseData responseData, final ResultData resultData) {
|
||||
|
@ -181,7 +184,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
|
||||
protected void putAdditionalData(final Map<String, Object> dataMap, final ResponseData responseData, final Document document) {
|
||||
// canonical
|
||||
if (StringUtil.isNotBlank(canonicalXpath)) {
|
||||
if (StringUtil.isNotBlank(fessConfig.getCrawlerDocumentHtmlCannonicalXpath())) {
|
||||
final String canonicalUrl = getCanonicalUrl(responseData, document);
|
||||
if (canonicalUrl != null && !canonicalUrl.equals(responseData.getUrl())) {
|
||||
final Set<RequestData> childUrlSet = new HashSet<>();
|
||||
|
@ -202,6 +205,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
String url = responseData.getUrl();
|
||||
final String indexingTarget = crawlingConfig.getIndexingTarget(url);
|
||||
url = pathMappingHelper.replaceUrl(sessionId, url);
|
||||
final String mimeType = responseData.getMimeType();
|
||||
|
||||
final Map<String, String> fieldConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.FIELD);
|
||||
|
||||
|
@ -223,26 +227,32 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
putResultDataBody(dataMap, fessConfig.getIndexFieldExpires(), documentExpires);
|
||||
}
|
||||
// lang
|
||||
final String lang = systemHelper.normalizeLang(getSingleNodeValue(document, langXpath, true));
|
||||
final String lang = systemHelper.normalizeLang(getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlLangXpath(), true));
|
||||
if (lang != null) {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldLang(), lang);
|
||||
}
|
||||
// title
|
||||
// content
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), getDocumentContent(responseData, document));
|
||||
if (Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache()))
|
||||
|| fessConfig.isCrawlerDocumentCacheEnable()) {
|
||||
String charSet = responseData.getCharSet();
|
||||
if (charSet == null) {
|
||||
charSet = Constants.UTF_8;
|
||||
}
|
||||
try {
|
||||
// cache
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldCache(),
|
||||
new String(InputStreamUtil.getBytes(responseData.getResponseBody()), charSet));
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldHasCache(), Constants.TRUE);
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Failed to write a cache: " + sessionId + ":" + responseData, e);
|
||||
if ((Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache())) || fessConfig
|
||||
.isCrawlerDocumentCacheEnable()) && fessConfig.isSupportedDocumentCacheMimetypes(mimeType)) {
|
||||
if (responseData.getContentLength() > 0
|
||||
&& responseData.getContentLength() <= fessConfig.getCrawlerDocumentCacheMaxSizeAsInteger().longValue()) {
|
||||
String charSet = responseData.getCharSet();
|
||||
if (charSet == null) {
|
||||
charSet = Constants.UTF_8;
|
||||
}
|
||||
try {
|
||||
// cache
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldCache(),
|
||||
new String(InputStreamUtil.getBytes(responseData.getResponseBody()), charSet));
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldHasCache(), Constants.TRUE);
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Failed to write a cache: " + sessionId + ":" + responseData, e);
|
||||
}
|
||||
} else {
|
||||
logger.debug("Content size is too large({} > {}): {}", responseData.getContentLength(),
|
||||
fessConfig.getCrawlerDocumentCacheMaxSizeAsInteger(), responseData.getUrl());
|
||||
}
|
||||
}
|
||||
// digest
|
||||
|
@ -261,7 +271,6 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
// anchor
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldAnchor(), getAnchorList(document, responseData));
|
||||
// mimetype
|
||||
final String mimeType = responseData.getMimeType();
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldMimetype(), mimeType);
|
||||
if (fileTypeHelper != null) {
|
||||
// filetype
|
||||
|
@ -324,7 +333,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
}
|
||||
|
||||
protected String getCanonicalUrl(final ResponseData responseData, final Document document) {
|
||||
final String canonicalUrl = getSingleNodeValue(document, canonicalXpath, false);
|
||||
final String canonicalUrl = getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlCannonicalXpath(), false);
|
||||
if (StringUtil.isNotBlank(canonicalUrl)) {
|
||||
return canonicalUrl;
|
||||
}
|
||||
|
@ -332,13 +341,15 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
}
|
||||
|
||||
protected String getDocumentDigest(final ResponseData responseData, final Document document) {
|
||||
final String digest = getSingleNodeValue(document, digestXpath, false);
|
||||
final String digest = getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlDigestXpath(), false);
|
||||
if (StringUtil.isNotBlank(digest)) {
|
||||
return digest;
|
||||
}
|
||||
|
||||
final String body = normalizeContent(removeCommentTag(getSingleNodeValue(document, contentXpath, prunedCacheContent)));
|
||||
return StringUtils.abbreviate(body, maxDigestLength);
|
||||
final String body =
|
||||
normalizeContent(removeCommentTag(getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlContentXpath(),
|
||||
prunedCacheContent)));
|
||||
return StringUtils.abbreviate(body, fessConfig.getCrawlerDocumentHtmlMaxDigestLengthAsInteger());
|
||||
}
|
||||
|
||||
String removeCommentTag(final String content) {
|
||||
|
@ -364,7 +375,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
}
|
||||
|
||||
private String getDocumentContent(final ResponseData responseData, final Document document) {
|
||||
return normalizeContent(getSingleNodeValue(document, contentXpath, true));
|
||||
return normalizeContent(getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlContentXpath(), true));
|
||||
}
|
||||
|
||||
protected String getSingleNodeValue(final Document document, final String xpath, final boolean pruned) {
|
||||
|
@ -420,7 +431,7 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
}
|
||||
|
||||
protected boolean isPrunedTag(final String tagName) {
|
||||
for (final String name : prunedTagList) {
|
||||
for (final String name : getCrawlerDocumentHtmlPrunedTags()) {
|
||||
if (name.equalsIgnoreCase(tagName)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -492,12 +503,6 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
return urlList;
|
||||
}
|
||||
|
||||
public void addPrunedTag(final String tagName) {
|
||||
if (StringUtil.isNotBlank(tagName)) {
|
||||
prunedTagList.add(tagName);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getData(final AccessResultData<?> accessResultData) {
|
||||
final byte[] data = accessResultData.getData();
|
||||
|
@ -554,4 +559,9 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
private boolean isUtf8BomBytes(final byte[] b) {
|
||||
return b[0] == (byte) 0xEF && b[1] == (byte) 0xBB && b[2] == (byte) 0xBF;
|
||||
}
|
||||
|
||||
protected String[] getCrawlerDocumentHtmlPrunedTags() {
|
||||
return fessConfig.getCrawlerDocumentHtmlPrunedTagsAsArray();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ import java.util.regex.Pattern;
|
|||
import javax.annotation.PostConstruct;
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.apache.commons.lang3.StringEscapeUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.codelibs.core.CoreLibConstants;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
|
@ -398,7 +399,7 @@ public class ViewHelper implements Serializable {
|
|||
if (locale == null) {
|
||||
locale = Locale.ENGLISH;
|
||||
}
|
||||
String url = DocumentUtil.getValue(doc, "url", String.class);
|
||||
String url = DocumentUtil.getValue(doc, fessConfig.getIndexFieldUrl(), String.class);
|
||||
if (url == null) {
|
||||
url = ComponentUtil.getMessageManager().getMessage(locale, "labels.search_unknown");
|
||||
}
|
||||
|
@ -417,6 +418,10 @@ public class ViewHelper implements Serializable {
|
|||
|
||||
String cache = DocumentUtil.getValue(doc, fessConfig.getIndexFieldCache(), String.class);
|
||||
if (cache != null) {
|
||||
String mimetype = DocumentUtil.getValue(doc, fessConfig.getIndexFieldMimetype(), String.class);
|
||||
if (!ComponentUtil.getFessConfig().isHtmlMimetypeForCache(mimetype)) {
|
||||
cache = StringEscapeUtils.escapeHtml4(cache);
|
||||
}
|
||||
cache = pathMappingHelper.replaceUrls(cache);
|
||||
if (queries != null && queries.length > 0) {
|
||||
doc.put("hlCache", replaceHighlightQueries(cache, queries));
|
||||
|
|
|
@ -20,7 +20,7 @@ import org.lastaflute.core.direction.exception.ConfigPropertyNotFoundException;
|
|||
/**
|
||||
* @author FreeGen
|
||||
*/
|
||||
public interface FessConfig extends FessEnv, FessProp {
|
||||
public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction.FessProp {
|
||||
|
||||
/** The key of the configuration. e.g. Fess */
|
||||
String DOMAIN_TITLE = "domain.title";
|
||||
|
@ -66,9 +66,75 @@ public interface FessConfig extends FessEnv, FessProp {
|
|||
-XX:+DisableExplicitGC */
|
||||
String JVM_SUGGEST_OPTIONS = "jvm.suggest.options";
|
||||
|
||||
/** The key of the configuration. e.g. 50 */
|
||||
String CRAWLER_DOCUMENT_MAX_SITE_LENGTH = "crawler.document.max.site.length";
|
||||
|
||||
/** The key of the configuration. e.g. UTF-8 */
|
||||
String CRAWLER_DOCUMENT_SITE_ENCODING = "crawler.document.site.encoding";
|
||||
|
||||
/** The key of the configuration. e.g. unknown */
|
||||
String CRAWLER_DOCUMENT_UNKNOWN_HOSTNAME = "crawler.document.unknown.hostname";
|
||||
|
||||
/** The key of the configuration. e.g. false */
|
||||
String CRAWLER_DOCUMENT_USE_SITE_ENCODING_ON_ENGLISH = "crawler.document.use.site.encoding.on.english";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
String CRAWLER_DOCUMENT_APPEND_DATA = "crawler.document.append.data";
|
||||
|
||||
/** The key of the configuration. e.g. //BODY */
|
||||
String CRAWLER_DOCUMENT_HTML_CONTENT_XPATH = "crawler.document.html.content.xpath";
|
||||
|
||||
/** The key of the configuration. e.g. //HTML/@lang */
|
||||
String CRAWLER_DOCUMENT_HTML_LANG_XPATH = "crawler.document.html.lang.xpath";
|
||||
|
||||
/** The key of the configuration. e.g. //META[@name='description']/@content */
|
||||
String CRAWLER_DOCUMENT_HTML_DIGEST_XPATH = "crawler.document.html.digest.xpath";
|
||||
|
||||
/** The key of the configuration. e.g. //LINK[@rel='canonical']/@href */
|
||||
String CRAWLER_DOCUMENT_HTML_CANNONICAL_XPATH = "crawler.document.html.cannonical.xpath";
|
||||
|
||||
/** The key of the configuration. e.g. noscript,script */
|
||||
String CRAWLER_DOCUMENT_HTML_PRUNED_TAGS = "crawler.document.html.pruned.tags";
|
||||
|
||||
/** The key of the configuration. e.g. 200 */
|
||||
String CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH = "crawler.document.html.max.digest.length";
|
||||
|
||||
/** The key of the configuration. e.g. */
|
||||
String CRAWLER_DOCUMENT_FILE_NAME_ENCODING = "crawler.document.file.name.encoding";
|
||||
|
||||
/** The key of the configuration. e.g. No title. */
|
||||
String CRAWLER_DOCUMENT_FILE_NO_TITLE_LABEL = "crawler.document.file.no.title.label";
|
||||
|
||||
/** The key of the configuration. e.g. 10 */
|
||||
String CRAWLER_DOCUMENT_FILE_ABBREVIATION_MARGIN_LENGTH = "crawler.document.file.abbreviation.margin.length";
|
||||
|
||||
/** The key of the configuration. e.g. false */
|
||||
String CRAWLER_DOCUMENT_FILE_IGNORE_EMPTY_CONTENT = "crawler.document.file.ignore.empty.content";
|
||||
|
||||
/** The key of the configuration. e.g. 100 */
|
||||
String CRAWLER_DOCUMENT_FILE_MAX_TITLE_LENGTH = "crawler.document.file.max.title.length";
|
||||
|
||||
/** The key of the configuration. e.g. 200 */
|
||||
String CRAWLER_DOCUMENT_FILE_MAX_DIGEST_LENGTH = "crawler.document.file.max.digest.length";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
String CRAWLER_DOCUMENT_FILE_APPEND_META_CONTENT = "crawler.document.file.append.meta.content";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
String CRAWLER_DOCUMENT_FILE_APPEND_BODY_CONTENT = "crawler.document.file.append.body.content";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
String CRAWLER_DOCUMENT_CACHE_ENABLE = "crawler.document.cache.enable";
|
||||
|
||||
/** The key of the configuration. e.g. 2621440 */
|
||||
String CRAWLER_DOCUMENT_CACHE_MAX_SIZE = "crawler.document.cache.max.size";
|
||||
|
||||
/** The key of the configuration. e.g. text/html */
|
||||
String CRAWLER_DOCUMENT_CACHE_SUPPORTED_MIMETYPES = "crawler.document.cache.supported.mimetypes";
|
||||
|
||||
/** The key of the configuration. e.g. text/html */
|
||||
String CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES = "crawler.document.cache.html.mimetypes";
|
||||
|
||||
/** The key of the configuration. e.g. favorite_count */
|
||||
String INDEX_FIELD_favorite_count = "index.field.favorite_count";
|
||||
|
||||
|
@ -475,19 +541,272 @@ public interface FessConfig extends FessEnv, FessProp {
|
|||
String getJvmSuggestOptions();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.cache.enable'. <br>
|
||||
* Get the value for the key 'crawler.document.max.site.length'. <br>
|
||||
* The value is, e.g. 50 <br>
|
||||
* comment: common
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentMaxSiteLength();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.max.site.length' as {@link Integer}. <br>
|
||||
* The value is, e.g. 50 <br>
|
||||
* comment: common
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getCrawlerDocumentMaxSiteLengthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.site.encoding'. <br>
|
||||
* The value is, e.g. UTF-8 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentSiteEncoding();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.unknown.hostname'. <br>
|
||||
* The value is, e.g. unknown <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentUnknownHostname();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.use.site.encoding.on.english'. <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentUseSiteEncodingOnEnglish();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'crawler.document.use.site.encoding.on.english' true? <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isCrawlerDocumentUseSiteEncodingOnEnglish();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.append.data'. <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentAppendData();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'crawler.document.append.data' true? <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isCrawlerDocumentAppendData();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.html.content.xpath'. <br>
|
||||
* The value is, e.g. //BODY <br>
|
||||
* comment: html
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentHtmlContentXpath();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.html.lang.xpath'. <br>
|
||||
* The value is, e.g. //HTML/@lang <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentHtmlLangXpath();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.html.digest.xpath'. <br>
|
||||
* The value is, e.g. //META[@name='description']/@content <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentHtmlDigestXpath();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.html.cannonical.xpath'. <br>
|
||||
* The value is, e.g. //LINK[@rel='canonical']/@href <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentHtmlCannonicalXpath();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.html.pruned.tags'. <br>
|
||||
* The value is, e.g. noscript,script <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentHtmlPrunedTags();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.html.max.digest.length'. <br>
|
||||
* The value is, e.g. 200 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentHtmlMaxDigestLength();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.html.max.digest.length' as {@link Integer}. <br>
|
||||
* The value is, e.g. 200 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getCrawlerDocumentHtmlMaxDigestLengthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.name.encoding'. <br>
|
||||
* The value is, e.g. <br>
|
||||
* comment: file
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentFileNameEncoding();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.name.encoding' as {@link Integer}. <br>
|
||||
* The value is, e.g. <br>
|
||||
* comment: file
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getCrawlerDocumentFileNameEncodingAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.no.title.label'. <br>
|
||||
* The value is, e.g. No title. <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentFileNoTitleLabel();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.abbreviation.margin.length'. <br>
|
||||
* The value is, e.g. 10 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentFileAbbreviationMarginLength();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.abbreviation.margin.length' as {@link Integer}. <br>
|
||||
* The value is, e.g. 10 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getCrawlerDocumentFileAbbreviationMarginLengthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.ignore.empty.content'. <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentFileIgnoreEmptyContent();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'crawler.document.file.ignore.empty.content' true? <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isCrawlerDocumentFileIgnoreEmptyContent();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.max.title.length'. <br>
|
||||
* The value is, e.g. 100 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentFileMaxTitleLength();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.max.title.length' as {@link Integer}. <br>
|
||||
* The value is, e.g. 100 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getCrawlerDocumentFileMaxTitleLengthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.max.digest.length'. <br>
|
||||
* The value is, e.g. 200 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentFileMaxDigestLength();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.max.digest.length' as {@link Integer}. <br>
|
||||
* The value is, e.g. 200 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getCrawlerDocumentFileMaxDigestLengthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.append.meta.content'. <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentFileAppendMetaContent();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'crawler.document.file.append.meta.content' true? <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isCrawlerDocumentFileAppendMetaContent();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.append.body.content'. <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentFileAppendBodyContent();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'crawler.document.file.append.body.content' true? <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isCrawlerDocumentFileAppendBodyContent();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.cache.enable'. <br>
|
||||
* The value is, e.g. true <br>
|
||||
* comment: cache
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentCacheEnable();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'crawler.document.cache.enable' true? <br>
|
||||
* The value is, e.g. false <br>
|
||||
* The value is, e.g. true <br>
|
||||
* comment: cache
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isCrawlerDocumentCacheEnable();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.cache.max.size'. <br>
|
||||
* The value is, e.g. 2621440 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentCacheMaxSize();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.cache.max.size' as {@link Integer}. <br>
|
||||
* The value is, e.g. 2621440 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getCrawlerDocumentCacheMaxSizeAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.cache.supported.mimetypes'. <br>
|
||||
* The value is, e.g. text/html <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentCacheSupportedMimetypes();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.cache.html.mimetypes'. <br>
|
||||
* The value is, e.g. text/html <br>
|
||||
* comment: ,text/plain,application/xml,application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-powerpoint,application/vnd.openxmlformats-officedocument.presentationml.presentation
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentCacheHtmlMimetypes();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'index.field.favorite_count'. <br>
|
||||
* The value is, e.g. favorite_count <br>
|
||||
|
@ -1515,6 +1834,126 @@ public interface FessConfig extends FessEnv, FessProp {
|
|||
return get(FessConfig.JVM_SUGGEST_OPTIONS);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentMaxSiteLength() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_MAX_SITE_LENGTH);
|
||||
}
|
||||
|
||||
public Integer getCrawlerDocumentMaxSiteLengthAsInteger() {
|
||||
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_MAX_SITE_LENGTH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentSiteEncoding() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_SITE_ENCODING);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentUnknownHostname() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_UNKNOWN_HOSTNAME);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentUseSiteEncodingOnEnglish() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_USE_SITE_ENCODING_ON_ENGLISH);
|
||||
}
|
||||
|
||||
public boolean isCrawlerDocumentUseSiteEncodingOnEnglish() {
|
||||
return is(FessConfig.CRAWLER_DOCUMENT_USE_SITE_ENCODING_ON_ENGLISH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentAppendData() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_APPEND_DATA);
|
||||
}
|
||||
|
||||
public boolean isCrawlerDocumentAppendData() {
|
||||
return is(FessConfig.CRAWLER_DOCUMENT_APPEND_DATA);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentHtmlContentXpath() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentHtmlLangXpath() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentHtmlDigestXpath() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_HTML_DIGEST_XPATH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentHtmlCannonicalXpath() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_HTML_CANNONICAL_XPATH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentHtmlPrunedTags() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_HTML_PRUNED_TAGS);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentHtmlMaxDigestLength() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
public Integer getCrawlerDocumentHtmlMaxDigestLengthAsInteger() {
|
||||
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFileNameEncoding() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING);
|
||||
}
|
||||
|
||||
public Integer getCrawlerDocumentFileNameEncodingAsInteger() {
|
||||
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFileNoTitleLabel() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FILE_NO_TITLE_LABEL);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFileAbbreviationMarginLength() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FILE_ABBREVIATION_MARGIN_LENGTH);
|
||||
}
|
||||
|
||||
public Integer getCrawlerDocumentFileAbbreviationMarginLengthAsInteger() {
|
||||
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_FILE_ABBREVIATION_MARGIN_LENGTH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFileIgnoreEmptyContent() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FILE_IGNORE_EMPTY_CONTENT);
|
||||
}
|
||||
|
||||
public boolean isCrawlerDocumentFileIgnoreEmptyContent() {
|
||||
return is(FessConfig.CRAWLER_DOCUMENT_FILE_IGNORE_EMPTY_CONTENT);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFileMaxTitleLength() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FILE_MAX_TITLE_LENGTH);
|
||||
}
|
||||
|
||||
public Integer getCrawlerDocumentFileMaxTitleLengthAsInteger() {
|
||||
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_FILE_MAX_TITLE_LENGTH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFileMaxDigestLength() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FILE_MAX_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
public Integer getCrawlerDocumentFileMaxDigestLengthAsInteger() {
|
||||
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_FILE_MAX_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFileAppendMetaContent() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FILE_APPEND_META_CONTENT);
|
||||
}
|
||||
|
||||
public boolean isCrawlerDocumentFileAppendMetaContent() {
|
||||
return is(FessConfig.CRAWLER_DOCUMENT_FILE_APPEND_META_CONTENT);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFileAppendBodyContent() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FILE_APPEND_BODY_CONTENT);
|
||||
}
|
||||
|
||||
public boolean isCrawlerDocumentFileAppendBodyContent() {
|
||||
return is(FessConfig.CRAWLER_DOCUMENT_FILE_APPEND_BODY_CONTENT);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentCacheEnable() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_CACHE_ENABLE);
|
||||
}
|
||||
|
@ -1523,6 +1962,22 @@ public interface FessConfig extends FessEnv, FessProp {
|
|||
return is(FessConfig.CRAWLER_DOCUMENT_CACHE_ENABLE);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentCacheMaxSize() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_CACHE_MAX_SIZE);
|
||||
}
|
||||
|
||||
public Integer getCrawlerDocumentCacheMaxSizeAsInteger() {
|
||||
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_CACHE_MAX_SIZE);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentCacheSupportedMimetypes() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_CACHE_SUPPORTED_MIMETYPES);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentCacheHtmlMimetypes() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES);
|
||||
}
|
||||
|
||||
public String getIndexFieldFavoriteCount() {
|
||||
return get(FessConfig.INDEX_FIELD_favorite_count);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.codelibs.fess.mylasta.direction;
|
|||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.fess.Constants;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.fess.util.StreamUtil;
|
||||
|
||||
public interface FessProp {
|
||||
public default String getProperty(String key) {
|
||||
|
@ -79,4 +80,30 @@ public interface FessProp {
|
|||
return getJvmSuggestOptions().split("\n");
|
||||
}
|
||||
|
||||
String getCrawlerDocumentHtmlPrunedTags();
|
||||
|
||||
public default String[] getCrawlerDocumentHtmlPrunedTagsAsArray() {
|
||||
return getCrawlerDocumentHtmlPrunedTags().split(",");
|
||||
}
|
||||
|
||||
String getCrawlerDocumentCacheHtmlMimetypes();
|
||||
|
||||
public default boolean isHtmlMimetypeForCache(String mimetype) {
|
||||
String[] mimetypes = getCrawlerDocumentCacheHtmlMimetypes().split(",");
|
||||
if (mimetypes.length == 1 && StringUtil.isBlank(mimetypes[0])) {
|
||||
return true;
|
||||
}
|
||||
return StreamUtil.of(mimetypes).anyMatch(s -> s.equalsIgnoreCase(mimetype));
|
||||
}
|
||||
|
||||
String getCrawlerDocumentCacheSupportedMimetypes();
|
||||
|
||||
public default boolean isSupportedDocumentCacheMimetypes(String mimetype) {
|
||||
String[] mimetypes = getCrawlerDocumentCacheSupportedMimetypes().split(",");
|
||||
if (mimetypes.length == 1 && StringUtil.isBlank(mimetypes[0])) {
|
||||
return true;
|
||||
}
|
||||
return StreamUtil.of(mimetypes).anyMatch(s -> s.equalsIgnoreCase(mimetype));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
<components namespace="fessCrawler">
|
||||
<include path="crawler/transformer_basic.xml"/>
|
||||
|
||||
|
||||
<component name="fessXpathTransformer" class="org.codelibs.fess.crawler.transformer.FessXpathTransformer" instance="singleton">
|
||||
<property name="name">"fessXpathTransformer"</property>
|
||||
<property name="featureMap">defaultFeatureMap</property>
|
||||
|
@ -16,31 +15,15 @@
|
|||
<property name="convertUrlMap">
|
||||
{"feed:" : "http:"}
|
||||
</property>
|
||||
<!--
|
||||
<property name="cacheXpath">"//BODY"</property>
|
||||
<property name="contentXpath">"//BODY"</property>
|
||||
<property name="anchorXpath">"//A/@href"</property>
|
||||
<property name="digestXpath">"//META[@name='description']/@content"</property>
|
||||
-->
|
||||
<property name="replaceSiteEncodingWhenEnglish">true</property>
|
||||
<property name="siteEncoding">"UTF-8"</property>
|
||||
<!-- segment -->
|
||||
<postConstruct name="addFieldRule">
|
||||
<arg>"title"</arg>
|
||||
<arg>"//TITLE"</arg>
|
||||
</postConstruct>
|
||||
<postConstruct name="addPrunedTag">
|
||||
<arg>"noscript"</arg>
|
||||
</postConstruct>
|
||||
<postConstruct name="addPrunedTag">
|
||||
<arg>"script"</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
|
||||
<component name="fessFileTransformer" class="org.codelibs.fess.crawler.transformer.FessFileTransformer" instance="singleton">
|
||||
<property name="name">"fessFileTransformer"</property>
|
||||
<property name="replaceSiteEncodingWhenEnglish">true</property>
|
||||
<property name="siteEncoding">"UTF-8"</property>
|
||||
<postConstruct name="addMetaContentMapping">
|
||||
<arg>"title"</arg>
|
||||
<arg>"title"</arg>
|
||||
|
@ -60,8 +43,6 @@
|
|||
|
||||
<component name="fessTikaTransformer" class="org.codelibs.fess.crawler.transformer.FessTikaTransformer" instance="singleton">
|
||||
<property name="name">"fessTikaTransformer"</property>
|
||||
<property name="replaceSiteEncodingWhenEnglish">true</property>
|
||||
<property name="siteEncoding">"UTF-8"</property>
|
||||
<postConstruct name="addMetaContentMapping">
|
||||
<arg>"title"</arg>
|
||||
<arg>"title"</arg>
|
||||
|
|
|
@ -50,7 +50,37 @@ jvm.suggest.options=\
|
|||
# Index
|
||||
# ====
|
||||
|
||||
crawler.document.cache.enable=false
|
||||
# common
|
||||
crawler.document.max.site.length=50
|
||||
crawler.document.site.encoding=UTF-8
|
||||
crawler.document.unknown.hostname=unknown
|
||||
crawler.document.use.site.encoding.on.english=false
|
||||
crawler.document.append.data=true
|
||||
|
||||
# html
|
||||
crawler.document.html.content.xpath=//BODY
|
||||
crawler.document.html.lang.xpath=//HTML/@lang
|
||||
crawler.document.html.digest.xpath=//META[@name='description']/@content
|
||||
crawler.document.html.cannonical.xpath=//LINK[@rel='canonical']/@href
|
||||
crawler.document.html.pruned.tags=noscript,script
|
||||
crawler.document.html.max.digest.length=200
|
||||
|
||||
# file
|
||||
crawler.document.file.name.encoding=
|
||||
crawler.document.file.no.title.label=No title.
|
||||
crawler.document.file.abbreviation.margin.length=10
|
||||
crawler.document.file.ignore.empty.content=false
|
||||
crawler.document.file.max.title.length=100
|
||||
crawler.document.file.max.digest.length=200
|
||||
crawler.document.file.append.meta.content=true
|
||||
crawler.document.file.append.body.content=true
|
||||
|
||||
# cache
|
||||
crawler.document.cache.enable=true
|
||||
crawler.document.cache.max.size=2621440
|
||||
crawler.document.cache.supported.mimetypes=text/html
|
||||
#,text/plain,application/xml,application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-powerpoint,application/vnd.openxmlformats-officedocument.presentationml.presentation
|
||||
crawler.document.cache.html.mimetypes=text/html
|
||||
|
||||
# field names
|
||||
index.field.favorite_count=favorite_count
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
<%@page pageEncoding="UTF-8" %>
|
||||
<html>
|
||||
<head>
|
||||
<jsp:include page="indexHtmlHead.jsp"/>
|
||||
</head>
|
||||
<body>
|
||||
<jsp:include page="indexMain.jsp"/>
|
||||
</body>
|
||||
</html>
|
|
@ -1,4 +0,0 @@
|
|||
<%@page pageEncoding="UTF-8" %>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=<m:charset/>" />
|
||||
<meta content="no-cache" http-equiv="Cache-Control"/>
|
||||
<title><bean:message key="labels.mobile_search_title"/></title>
|
|
@ -1,14 +0,0 @@
|
|||
<%@page pageEncoding="UTF-8" %>
|
||||
<div>
|
||||
<div style="text-align: center;">
|
||||
<m:img src="logo-top.png" magniWidth="0.8" style="vertical-align: middle;" />
|
||||
<br/>
|
||||
<s:form>
|
||||
<div>
|
||||
<html:text property="query" title="Search" size="20" maxlength="1000" />
|
||||
<br/>
|
||||
<input type="submit" value="<bean:message key="labels.top.search"/>" name="search" />
|
||||
</div>
|
||||
</s:form>
|
||||
</div>
|
||||
</div>
|
|
@ -1,18 +0,0 @@
|
|||
<%@page pageEncoding="UTF-8" %>
|
||||
<html>
|
||||
<head>
|
||||
<jsp:include page="searchHtmlHead.jsp"/>
|
||||
</head>
|
||||
<body>
|
||||
<jsp:include page="searchHeader.jsp"/>
|
||||
<c:choose>
|
||||
<c:when test="${f:h(allRecordCount) != 0}">
|
||||
<jsp:include page="searchResults.jsp"/>
|
||||
</c:when>
|
||||
<c:otherwise>
|
||||
<jsp:include page="searchNoResult.jsp"/>
|
||||
</c:otherwise>
|
||||
</c:choose>
|
||||
<jsp:include page="searchFooter.jsp"/>
|
||||
</body>
|
||||
</html>
|
|
@ -1,5 +0,0 @@
|
|||
<%@page pageEncoding="UTF-8" %>
|
||||
<hr style="border-style: solid; border-color: #ffffff;"/>
|
||||
<div style="font-size: x-small; text-align: center;">
|
||||
<bean:message key="labels.footer.copyright"/>
|
||||
</div>
|
|
@ -1,13 +0,0 @@
|
|||
<%@page pageEncoding="UTF-8" %>
|
||||
<div id="header">
|
||||
<div>
|
||||
<s:form>
|
||||
<div>
|
||||
<m:img src="logo-top.png" magniWidth="0.3" />
|
||||
<br/>
|
||||
<html:text property="query" title="Search" size="16" maxlength="1000" />
|
||||
<input type="submit" value="<bean:message key="labels.search"/>" name="search"/>
|
||||
</div>
|
||||
</s:form>
|
||||
</div>
|
||||
</div>
|
|
@ -1,4 +0,0 @@
|
|||
<%@page pageEncoding="UTF-8" %>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=<m:charset/>" />
|
||||
<meta content="no-cache" http-equiv="Cache-Control"/>
|
||||
<title>${f:h(query)} - <bean:message key="labels.search_title"/></title>
|
|
@ -1,4 +0,0 @@
|
|||
<%@page pageEncoding="UTF-8" %>
|
||||
<div id="result">
|
||||
<bean:message key="labels.did_not_match" arg0="${f:h(query)}"/>
|
||||
</div>
|
|
@ -1,55 +0,0 @@
|
|||
<%@page pageEncoding="UTF-8" %>
|
||||
<div id="result">
|
||||
<div>
|
||||
<c:forEach var="doc" varStatus="s" items="${documentItems}">
|
||||
<div>
|
||||
<a href="${doc.urlLink}"><span>${f:h(doc.contentTitle)}</span></a>
|
||||
<span id="snip">
|
||||
<br/>
|
||||
<span style="color: #666666;">
|
||||
${doc.contentDescription}
|
||||
</span>
|
||||
</span>
|
||||
<span style="color: #008000;">
|
||||
<br/>
|
||||
${f:h(doc.site)}
|
||||
</span>
|
||||
<br/>
|
||||
</div>
|
||||
<br/>
|
||||
</c:forEach>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="subfooter" style="text-align: center;">
|
||||
<p>
|
||||
<c:if test="${existPrevPage}">
|
||||
<span>
|
||||
<s:link href="prev?query=${f:u(query)}&pn=${f:u(currentPageNumber)}&num=${f:u(pageSize)}">
|
||||
<bean:message key="labels.prev_page"/>
|
||||
</s:link>
|
||||
</span>
|
||||
</c:if>
|
||||
<c:forEach var="pageNumber" varStatus="s" items="${pageNumberList}">
|
||||
<c:if test="${pageNumber == currentPageNumber}">
|
||||
<span>
|
||||
${pageNumber}
|
||||
</span>
|
||||
</c:if>
|
||||
<c:if test="${pageNumber != currentPageNumber}">
|
||||
<span>
|
||||
<s:link href="move?query=${f:u(query)}&pn=${f:u(pageNumber)}&num=${f:u(pageSize)}">
|
||||
${f:h(pageNumber)}
|
||||
</s:link>
|
||||
</span>
|
||||
</c:if>
|
||||
</c:forEach>
|
||||
<c:if test="${existNextPage}">
|
||||
<span>
|
||||
<s:link href="next?query=${f:u(query)}&pn=${f:u(currentPageNumber)}&num=${f:u(pageSize)}">
|
||||
<bean:message key="labels.next_page"/>
|
||||
</s:link>
|
||||
</span>
|
||||
</c:if>
|
||||
</p>
|
||||
</div>
|
|
@ -36,8 +36,10 @@
|
|||
<div class="site ellipsis">
|
||||
<cite>${f:h(doc.sitePath)}</cite>
|
||||
<c:if test="${doc.has_cache=='true'}">
|
||||
<small>
|
||||
<la:link href="/cache/?docId=${doc.doc_id}${appendHighlightParams}" class="cache"><la:message
|
||||
key="labels.search_result_cache" /></la:link>
|
||||
</small>
|
||||
</c:if>
|
||||
</div>
|
||||
<div class="more hidden-md-up">
|
||||
|
|
|
@ -34,7 +34,7 @@ public class FessFileTransformerTest extends UnitFessTestCase {
|
|||
|
||||
public void test_decodeUrl_ok() throws Exception {
|
||||
String url, exp;
|
||||
final FessFileTransformer transformer = new FessFileTransformer();
|
||||
final FessFileTransformer transformer = createInstance();
|
||||
|
||||
url = "";
|
||||
exp = "";
|
||||
|
@ -62,156 +62,171 @@ public class FessFileTransformerTest extends UnitFessTestCase {
|
|||
}
|
||||
|
||||
public void test_decodeUrl_null() throws Exception {
|
||||
final FessFileTransformer transformer = new FessFileTransformer();
|
||||
final FessFileTransformer transformer = createInstance();
|
||||
assertNull(transformer.decodeUrlAsName(null, true));
|
||||
}
|
||||
|
||||
public void test_getHost_ok() {
|
||||
String url, exp;
|
||||
final FessFileTransformer transformer = new FessFileTransformer();
|
||||
final FessFileTransformer transformer = createInstance();
|
||||
|
||||
url = "";
|
||||
exp = "";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "http://server/home/user";
|
||||
exp = "server";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file:/home/user";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file:/c:/home/user";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file:////server/home/user";
|
||||
exp = "server";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file:/" + encodeUrl("ホーム") + "/user";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file:/c:/" + encodeUrl("ホーム") + "/user";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file:////" + encodeUrl("サーバー") + "/home/user";
|
||||
exp = "サーバー";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
}
|
||||
|
||||
public void test_getHost_unexpected() {
|
||||
String url, exp;
|
||||
final FessFileTransformer transformer = new FessFileTransformer();
|
||||
final FessFileTransformer transformer = createInstance();
|
||||
|
||||
url = null;
|
||||
exp = "";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "example:";
|
||||
exp = "unknown";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file:";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file://";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file:///";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file://///";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file://///example";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
url = "file:/c:";
|
||||
exp = "localhost";
|
||||
assertEquals(exp, transformer.getHost(url));
|
||||
assertEquals(exp, transformer.getHostOnFile(url));
|
||||
|
||||
}
|
||||
|
||||
public void test_getSite_ok() {
|
||||
String url, exp;
|
||||
final FessFileTransformer transformer = new FessFileTransformer();
|
||||
final FessFileTransformer transformer = createInstance();
|
||||
|
||||
url = "";
|
||||
exp = "";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "http://example.com/";
|
||||
exp = "example.com/";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "http://example.com/index.html";
|
||||
exp = "example.com/index.html";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file:/home/user";
|
||||
exp = "/home/user";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file:/c:/home/user";
|
||||
exp = "c:\\home\\user";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file:/c:/";
|
||||
exp = "c:\\";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file:////server/user";
|
||||
exp = "\\\\server\\user";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
}
|
||||
|
||||
transformer.maxSiteLength = 10;
|
||||
public void test_getSite_ok_len10() {
|
||||
String url, exp;
|
||||
final FessFileTransformer transformer = new FessFileTransformer() {
|
||||
@Override
|
||||
public int getMaxSiteLength() {
|
||||
return 10;
|
||||
}
|
||||
};
|
||||
transformer.init();
|
||||
|
||||
url = "file:/home/user/foo";
|
||||
exp = "/home/u...";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
}
|
||||
|
||||
public void test_getSite_unexpected() {
|
||||
String url, exp;
|
||||
final FessFileTransformer transformer = new FessFileTransformer();
|
||||
final FessFileTransformer transformer = createInstance();
|
||||
|
||||
url = "file:";
|
||||
exp = "";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file";
|
||||
exp = "file";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file:/";
|
||||
exp = "/";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file:/c:";
|
||||
exp = "c:";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file://";
|
||||
exp = "//";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file:///";
|
||||
exp = "///";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
|
||||
url = "file://///";
|
||||
exp = "\\\\\\";
|
||||
assertEquals(exp, transformer.getSite(url, "UTF-8"));
|
||||
assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8"));
|
||||
}
|
||||
|
||||
private FessFileTransformer createInstance() {
|
||||
final FessFileTransformer transformer = new FessFileTransformer();
|
||||
transformer.init();
|
||||
return transformer;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,6 +46,7 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
|
|||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
fessXpathTransformer = new FessXpathTransformer();
|
||||
fessXpathTransformer.init();
|
||||
fessXpathTransformer.convertUrlMap.put("feed:", "http:");
|
||||
}
|
||||
|
||||
|
@ -53,7 +54,11 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
|
|||
final String data = "<html><body><br/><script>foo</script><noscript>bar</noscript></body></html>";
|
||||
final Document document = getDocument(data);
|
||||
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer();
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer() {
|
||||
protected String[] getCrawlerDocumentHtmlPrunedTags() {
|
||||
return new String[0];
|
||||
}
|
||||
};
|
||||
|
||||
final Node pruneNode = transformer.pruneNode(document.cloneNode(true));
|
||||
assertEquals(getXmlString(document), getXmlString(pruneNode));
|
||||
|
@ -63,8 +68,11 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
|
|||
final String data = "<html><body><br/><script>foo</script><noscript>bar</noscript></body></html>";
|
||||
final Document document = getDocument(data);
|
||||
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer();
|
||||
transformer.prunedTagList.add("noscript");
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer() {
|
||||
protected String[] getCrawlerDocumentHtmlPrunedTags() {
|
||||
return new String[] { "noscript" };
|
||||
}
|
||||
};
|
||||
|
||||
final Node pruneNode = transformer.pruneNode(document.cloneNode(true));
|
||||
final String docString = getXmlString(document);
|
||||
|
@ -83,9 +91,11 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
|
|||
final String data = "<html><body><br/><script>foo</script><noscript>bar</noscript></body></html>";
|
||||
final Document document = getDocument(data);
|
||||
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer();
|
||||
transformer.prunedTagList.add("script");
|
||||
transformer.prunedTagList.add("noscript");
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer() {
|
||||
protected String[] getCrawlerDocumentHtmlPrunedTags() {
|
||||
return new String[] { "script", "noscript" };
|
||||
}
|
||||
};
|
||||
|
||||
final Node pruneNode = transformer.pruneNode(document.cloneNode(true));
|
||||
final String docString = getXmlString(document);
|
||||
|
@ -235,6 +245,7 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
|
|||
|
||||
public void test_canonicalXpath() throws Exception {
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer();
|
||||
transformer.init();
|
||||
|
||||
final Map<String, Object> dataMap = new HashMap<String, Object>();
|
||||
final ResponseData responseData = new ResponseData();
|
||||
|
|
Loading…
Add table
Reference in a new issue