fix #2221 move keys
This commit is contained in:
parent
23eddea328
commit
223bb4f21e
10 changed files with 86 additions and 36 deletions
|
@ -410,10 +410,6 @@ public class Constants extends CoreLibConstants {
|
|||
|
||||
public static final String SEARCH_PREFERENCE_LOCAL = "_local";
|
||||
|
||||
public static final String CONFIG_CLEANUP_FILTERS = "cleanup.urlFilters";
|
||||
|
||||
public static final String CONFIG_CLEANUP_ALL = "cleanup.all";
|
||||
|
||||
public static final String GSA_API_VERSION = "3.2";
|
||||
|
||||
public static final String PERMISSIONS = "permissions";
|
||||
|
|
|
@ -46,6 +46,7 @@ import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer;
|
|||
import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
|
||||
import org.codelibs.fess.helper.CrawlingConfigHelper;
|
||||
import org.codelibs.fess.helper.CrawlingInfoHelper;
|
||||
import org.codelibs.fess.helper.DocumentHelper;
|
||||
|
@ -384,7 +385,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
params.put(ExtractData.URL, responseData.getUrl());
|
||||
Map<String, String> configParam = crawlingConfig.getConfigParameterMap(ConfigName.CONFIG);
|
||||
if (configParam != null) {
|
||||
String keepOriginalBody = configParam.get("keep_original_body");
|
||||
String keepOriginalBody = configParam.get(Config.KEEP_ORIGINAL_BODY);
|
||||
if (StringUtil.isNotBlank(keepOriginalBody)) {
|
||||
params.put(TikaExtractor.NORMALIZE_TEXT, Constants.TRUE.equalsIgnoreCase(keepOriginalBody) ? Constants.FALSE
|
||||
: Constants.TRUE);
|
||||
|
|
|
@ -54,6 +54,8 @@ import org.codelibs.fess.crawler.transformer.impl.XpathTransformer;
|
|||
import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.XPath;
|
||||
import org.codelibs.fess.helper.CrawlingConfigHelper;
|
||||
import org.codelibs.fess.helper.CrawlingInfoHelper;
|
||||
import org.codelibs.fess.helper.DocumentHelper;
|
||||
|
@ -80,10 +82,6 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
|
||||
private static final String X_ROBOTS_TAG = "X-Robots-Tag";
|
||||
|
||||
private static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
|
||||
|
||||
private static final String IGNORE_ROBOTS_TAGS = "ignore.robots.tags";
|
||||
|
||||
private static final String META_NAME_THUMBNAIL_CONTENT = "//META[@name=\"thumbnail\" or @name=\"THUMBNAIL\"]/@content";
|
||||
|
||||
private static final String META_PROPERTY_OGIMAGE_CONTENT = "//META[@property=\"og:image\"]/@content";
|
||||
|
@ -206,7 +204,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
|
||||
protected void processMetaRobots(final ResponseData responseData, final ResultData resultData, final Document document) {
|
||||
final Map<String, String> configMap = getConfigPrameterMap(responseData, ConfigName.CONFIG);
|
||||
final String ignore = configMap.get(IGNORE_ROBOTS_TAGS);
|
||||
final String ignore = configMap.get(Config.IGNORE_ROBOTS_TAGS);
|
||||
if (ignore == null) {
|
||||
if (fessConfig.isCrawlerIgnoreRobotsTags()) {
|
||||
return;
|
||||
|
@ -253,7 +251,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
|
||||
protected void processXRobotsTag(final ResponseData responseData, final ResultData resultData) {
|
||||
final Map<String, String> configMap = getConfigPrameterMap(responseData, ConfigName.CONFIG);
|
||||
final String ignore = configMap.get(IGNORE_ROBOTS_TAGS);
|
||||
final String ignore = configMap.get(Config.IGNORE_ROBOTS_TAGS);
|
||||
if (ignore == null) {
|
||||
if (fessConfig.isCrawlerIgnoreRobotsTags()) {
|
||||
return;
|
||||
|
@ -502,7 +500,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
}
|
||||
|
||||
protected String getLangXpath(final FessConfig fessConfig, final Map<String, String> xpathConfigMap) {
|
||||
final String xpath = xpathConfigMap.get("default.lang");
|
||||
final String xpath = xpathConfigMap.get(XPath.DEFAULT_LANG);
|
||||
if (StringUtil.isNotBlank(xpath)) {
|
||||
return xpath;
|
||||
}
|
||||
|
@ -510,7 +508,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
}
|
||||
|
||||
protected String getContentXpath(final FessConfig fessConfig, final Map<String, String> xpathConfigMap) {
|
||||
final String xpath = xpathConfigMap.get("default.content");
|
||||
final String xpath = xpathConfigMap.get(XPath.DEFAULT_CONTENT);
|
||||
if (StringUtil.isNotBlank(xpath)) {
|
||||
return xpath;
|
||||
}
|
||||
|
@ -518,7 +516,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
}
|
||||
|
||||
protected String getDigestXpath(final FessConfig fessConfig, final Map<String, String> xpathConfigMap) {
|
||||
final String xpath = xpathConfigMap.get("default.digest");
|
||||
final String xpath = xpathConfigMap.get(XPath.DEFAULT_DIGEST);
|
||||
if (StringUtil.isNotBlank(xpath)) {
|
||||
return xpath;
|
||||
}
|
||||
|
@ -527,7 +525,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
|
||||
protected String getCanonicalUrl(final ResponseData responseData, final Document document) {
|
||||
final Map<String, String> configMap = getConfigPrameterMap(responseData, ConfigName.CONFIG);
|
||||
String xpath = configMap.get(HTML_CANONICAL_XPATH);
|
||||
String xpath = configMap.get(Config.HTML_CANONICAL_XPATH);
|
||||
if (xpath == null) {
|
||||
xpath = fessConfig.getCrawlerDocumentHtmlCanonicalXpath();
|
||||
}
|
||||
|
|
|
@ -20,7 +20,9 @@ import java.util.Map;
|
|||
import org.apache.http.auth.UsernamePasswordCredentials;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.fess.crawler.client.CrawlerClientFactory;
|
||||
import org.codelibs.fess.crawler.client.ftp.FtpClient;
|
||||
import org.codelibs.fess.crawler.client.http.HcHttpClient;
|
||||
import org.codelibs.fess.crawler.client.smb.SmbClient;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
|
||||
|
@ -51,8 +53,8 @@ public interface CrawlingConfig {
|
|||
final String proxyHost = fessConfig.getHttpProxyHost();
|
||||
final String proxyPort = fessConfig.getHttpProxyPort();
|
||||
if (StringUtil.isNotBlank(proxyHost) && StringUtil.isNotBlank(proxyPort)) {
|
||||
paramMap.put(HcHttpClient.PROXY_HOST_PROPERTY, proxyHost);
|
||||
paramMap.put(HcHttpClient.PROXY_PORT_PROPERTY, proxyPort);
|
||||
paramMap.put(Param.Client.PROXY_HOST, proxyHost);
|
||||
paramMap.put(Param.Client.PROXY_PORT, proxyPort);
|
||||
final String proxyUsername = fessConfig.getHttpProxyUsername();
|
||||
final String proxyPassword = fessConfig.getHttpProxyPassword();
|
||||
if (proxyUsername != null && proxyPassword != null) {
|
||||
|
@ -86,4 +88,50 @@ public interface CrawlingConfig {
|
|||
public enum ConfigName {
|
||||
CLIENT, XPATH, META, VALUE, SCRIPT, FIELD, CONFIG;
|
||||
}
|
||||
|
||||
public static class Param {
|
||||
// client.*
|
||||
public static class Client {
|
||||
public static final String SMB_AUTHENTICATIONS = SmbClient.SMB_AUTHENTICATIONS_PROPERTY;
|
||||
public static final String SMB1_AUTHENTICATIONS = org.codelibs.fess.crawler.client.smb1.SmbClient.SMB_AUTHENTICATIONS_PROPERTY;
|
||||
public static final String FTP_AUTHENTICATIONS = FtpClient.FTP_AUTHENTICATIONS_PROPERTY;
|
||||
public static final String ROBOTS_TXT_ENABLED = HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY;
|
||||
public static final String PROXY_PASSWORD = "proxyPassword";
|
||||
public static final String PROXY_USERNAME = "proxyUsername";
|
||||
public static final String PROXY_PORT = HcHttpClient.PROXY_PORT_PROPERTY;
|
||||
public static final String PROXY_HOST = HcHttpClient.PROXY_HOST_PROPERTY;
|
||||
public static final String USER_AGENT = HcHttpClient.USER_AGENT_PROPERTY;
|
||||
}
|
||||
|
||||
// xpath.*
|
||||
public static class XPath {
|
||||
public static final String DEFAULT_LANG = "default.lang";
|
||||
public static final String DEFAULT_CONTENT = "default.content";
|
||||
public static final String DEFAULT_DIGEST = "default.digest";
|
||||
// xapth.<field>=<value>
|
||||
}
|
||||
|
||||
// config.*
|
||||
public static class Config {
|
||||
public static final String KEEP_ORIGINAL_BODY = "keep.original.body";
|
||||
public static final String CLEANUP_ALL = "cleanup.all";
|
||||
public static final String CLEANUP_URL_FILTERS = "cleanup.urlFilters";
|
||||
public static final String JCIFS_PREFIX = "jcifs.";
|
||||
public static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
|
||||
public static final String PIPELINE = "pipeline";
|
||||
public static final String IGNORE_ROBOTS_TAGS = "ignore.robots.tags";
|
||||
}
|
||||
|
||||
// meta.*
|
||||
// meta.<field>=<value>
|
||||
|
||||
// value.*
|
||||
// value.<field>=<value>
|
||||
|
||||
// script.*
|
||||
// script.<field>=<value>
|
||||
|
||||
// field.*
|
||||
// field.<field>=<value>
|
||||
}
|
||||
}
|
|
@ -190,10 +190,10 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig {
|
|||
ftpAuthList.add(ftpAuth);
|
||||
}
|
||||
}
|
||||
paramMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY, smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()]));
|
||||
paramMap.put(org.codelibs.fess.crawler.client.smb1.SmbClient.SMB_AUTHENTICATIONS_PROPERTY,
|
||||
paramMap.put(Param.Client.SMB_AUTHENTICATIONS, smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()]));
|
||||
paramMap.put(Param.Client.SMB1_AUTHENTICATIONS,
|
||||
smb1AuthList.toArray(new org.codelibs.fess.crawler.client.smb1.SmbAuthentication[smb1AuthList.size()]));
|
||||
paramMap.put(FtpClient.FTP_AUTHENTICATIONS_PROPERTY, ftpAuthList.toArray(new FtpAuthentication[ftpAuthList.size()]));
|
||||
paramMap.put(Param.Client.FTP_AUTHENTICATIONS, ftpAuthList.toArray(new FtpAuthentication[ftpAuthList.size()]));
|
||||
|
||||
return paramMap;
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine;
|
|||
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
|
||||
import org.codelibs.fess.es.config.bsentity.BsWebAuthentication;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.fess.util.ParameterUtil;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -64,8 +65,8 @@ public class WebAuthentication extends BsWebAuthentication {
|
|||
return new DigestScheme();
|
||||
} else if (Constants.NTLM.equals(scheme)) {
|
||||
final Properties props = new Properties();
|
||||
getWebConfig().getConfigParameterMap(ConfigName.CONFIG).entrySet().stream().filter(e -> e.getKey().startsWith("jcifs."))
|
||||
.forEach(e -> {
|
||||
getWebConfig().getConfigParameterMap(ConfigName.CONFIG).entrySet().stream()
|
||||
.filter(e -> e.getKey().startsWith(Config.JCIFS_PREFIX)).forEach(e -> {
|
||||
props.setProperty(e.getKey(), e.getValue());
|
||||
});
|
||||
return new NTLMScheme(new JcifsEngine(props));
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.codelibs.fess.crawler.client.CrawlerClientFactory;
|
|||
import org.codelibs.fess.crawler.client.http.Authentication;
|
||||
import org.codelibs.fess.crawler.client.http.HcHttpClient;
|
||||
import org.codelibs.fess.es.config.bsentity.BsWebConfig;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Client;
|
||||
import org.codelibs.fess.helper.SystemHelper;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
|
@ -159,13 +160,13 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
}
|
||||
|
||||
// robots txt enabled
|
||||
if (paramMap.get(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY) == null) {
|
||||
paramMap.put(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY, !fessConfig.isCrawlerIgnoreRobotsTxt());
|
||||
if (paramMap.get(Param.Client.ROBOTS_TXT_ENABLED) == null) {
|
||||
paramMap.put(Param.Client.ROBOTS_TXT_ENABLED, !fessConfig.isCrawlerIgnoreRobotsTxt());
|
||||
}
|
||||
|
||||
final String userAgent = getUserAgent();
|
||||
if (StringUtil.isNotBlank(userAgent)) {
|
||||
paramMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);
|
||||
paramMap.put(Client.USER_AGENT, userAgent);
|
||||
}
|
||||
|
||||
final List<WebAuthentication> webAuthList = webAuthenticationService.getWebAuthenticationList(getId());
|
||||
|
@ -184,13 +185,15 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY,
|
||||
rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()]));
|
||||
|
||||
final String proxyHost = (String) paramMap.get("proxyHost");
|
||||
final String proxyPort = (String) paramMap.get("proxyPort");
|
||||
final String proxyHost = (String) paramMap.get(Param.Client.PROXY_HOST);
|
||||
final String proxyPort = (String) paramMap.get(Param.Client.PROXY_PORT);
|
||||
if (StringUtil.isNotBlank(proxyHost) && StringUtil.isNotBlank(proxyPort)) {
|
||||
// proxy credentials
|
||||
if (paramMap.get("proxyUsername") != null && paramMap.get("proxyPassword") != null) {
|
||||
paramMap.put(HcHttpClient.PROXY_CREDENTIALS_PROPERTY, new UsernamePasswordCredentials(paramMap.remove("proxyUsername")
|
||||
.toString(), paramMap.remove("proxyPassword").toString()));
|
||||
if (paramMap.get(Param.Client.PROXY_USERNAME) != null && paramMap.get(Param.Client.PROXY_PASSWORD) != null) {
|
||||
paramMap.put(
|
||||
HcHttpClient.PROXY_CREDENTIALS_PROPERTY,
|
||||
new UsernamePasswordCredentials(paramMap.remove(Param.Client.PROXY_USERNAME).toString(), paramMap.remove(
|
||||
Param.Client.PROXY_PASSWORD).toString()));
|
||||
}
|
||||
} else {
|
||||
initializeDefaultHttpProxy(paramMap);
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.codelibs.fess.es.config.exbhv.WebConfigBhv;
|
|||
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigType;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
|
||||
import org.codelibs.fess.es.config.exentity.DataConfig;
|
||||
import org.codelibs.fess.es.config.exentity.FailureUrl;
|
||||
import org.codelibs.fess.es.config.exentity.FileConfig;
|
||||
|
@ -125,7 +126,7 @@ public class CrawlingConfigHelper {
|
|||
if (config == null) {
|
||||
return OptionalThing.empty();
|
||||
}
|
||||
final String pipeline = config.getConfigParameterMap(ConfigName.CONFIG).get("pipeline");
|
||||
final String pipeline = config.getConfigParameterMap(ConfigName.CONFIG).get(Config.PIPELINE);
|
||||
if (StringUtil.isBlank(pipeline)) {
|
||||
return OptionalThing.empty();
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ import org.codelibs.fess.crawler.transformer.Transformer;
|
|||
import org.codelibs.fess.crawler.util.TextUtil;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.lastaflute.di.core.SingletonLaContainer;
|
||||
|
@ -83,7 +84,7 @@ public class DocumentHelper {
|
|||
|
||||
if (crawlingConfig != null) {
|
||||
Map<String, String> configParam = crawlingConfig.getConfigParameterMap(ConfigName.CONFIG);
|
||||
if (configParam != null && Constants.TRUE.equalsIgnoreCase(configParam.get("keep_original_body"))) {
|
||||
if (configParam != null && Constants.TRUE.equalsIgnoreCase(configParam.get(Param.Config.KEEP_ORIGINAL_BODY))) {
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.codelibs.fess.crawler.service.impl.EsUrlQueueService;
|
|||
import org.codelibs.fess.es.config.exbhv.BoostDocumentRuleBhv;
|
||||
import org.codelibs.fess.es.config.exentity.BoostDocumentRule;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
|
||||
import org.codelibs.fess.es.config.exentity.FileConfig;
|
||||
import org.codelibs.fess.es.config.exentity.WebConfig;
|
||||
import org.codelibs.fess.indexer.IndexUpdater;
|
||||
|
@ -135,9 +136,9 @@ public class WebFsIndexHelper {
|
|||
webConfig.initializeClientFactory(crawler.getClientFactory());
|
||||
final Map<String, String> configParamMap = webConfig.getConfigParameterMap(ConfigName.CONFIG);
|
||||
|
||||
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
|
||||
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_ALL))) {
|
||||
deleteCrawlData(sid);
|
||||
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
|
||||
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_URL_FILTERS))) {
|
||||
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
|
||||
try {
|
||||
urlFilterService.delete(sid);
|
||||
|
@ -243,9 +244,9 @@ public class WebFsIndexHelper {
|
|||
fileConfig.initializeClientFactory(crawler.getClientFactory());
|
||||
final Map<String, String> configParamMap = fileConfig.getConfigParameterMap(ConfigName.CONFIG);
|
||||
|
||||
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
|
||||
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_ALL))) {
|
||||
deleteCrawlData(sid);
|
||||
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
|
||||
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_URL_FILTERS))) {
|
||||
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
|
||||
try {
|
||||
urlFilterService.delete(sid);
|
||||
|
|
Loading…
Add table
Reference in a new issue