fix #2221 move keys

This commit is contained in:
Shinsuke Sugaya 2019-08-25 09:46:15 +09:00
parent 23eddea328
commit 223bb4f21e
10 changed files with 86 additions and 36 deletions

View file

@ -410,10 +410,6 @@ public class Constants extends CoreLibConstants {
public static final String SEARCH_PREFERENCE_LOCAL = "_local";
public static final String CONFIG_CLEANUP_FILTERS = "cleanup.urlFilters";
public static final String CONFIG_CLEANUP_ALL = "cleanup.all";
public static final String GSA_API_VERSION = "3.2";
public static final String PERMISSIONS = "permissions";

View file

@ -46,6 +46,7 @@ import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer;
import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
import org.codelibs.fess.helper.CrawlingConfigHelper;
import org.codelibs.fess.helper.CrawlingInfoHelper;
import org.codelibs.fess.helper.DocumentHelper;
@ -384,7 +385,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
params.put(ExtractData.URL, responseData.getUrl());
Map<String, String> configParam = crawlingConfig.getConfigParameterMap(ConfigName.CONFIG);
if (configParam != null) {
String keepOriginalBody = configParam.get("keep_original_body");
String keepOriginalBody = configParam.get(Config.KEEP_ORIGINAL_BODY);
if (StringUtil.isNotBlank(keepOriginalBody)) {
params.put(TikaExtractor.NORMALIZE_TEXT, Constants.TRUE.equalsIgnoreCase(keepOriginalBody) ? Constants.FALSE
: Constants.TRUE);

View file

@ -54,6 +54,8 @@ import org.codelibs.fess.crawler.transformer.impl.XpathTransformer;
import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.XPath;
import org.codelibs.fess.helper.CrawlingConfigHelper;
import org.codelibs.fess.helper.CrawlingInfoHelper;
import org.codelibs.fess.helper.DocumentHelper;
@ -80,10 +82,6 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
private static final String X_ROBOTS_TAG = "X-Robots-Tag";
private static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
private static final String IGNORE_ROBOTS_TAGS = "ignore.robots.tags";
private static final String META_NAME_THUMBNAIL_CONTENT = "//META[@name=\"thumbnail\" or @name=\"THUMBNAIL\"]/@content";
private static final String META_PROPERTY_OGIMAGE_CONTENT = "//META[@property=\"og:image\"]/@content";
@ -206,7 +204,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
protected void processMetaRobots(final ResponseData responseData, final ResultData resultData, final Document document) {
final Map<String, String> configMap = getConfigPrameterMap(responseData, ConfigName.CONFIG);
final String ignore = configMap.get(IGNORE_ROBOTS_TAGS);
final String ignore = configMap.get(Config.IGNORE_ROBOTS_TAGS);
if (ignore == null) {
if (fessConfig.isCrawlerIgnoreRobotsTags()) {
return;
@ -253,7 +251,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
protected void processXRobotsTag(final ResponseData responseData, final ResultData resultData) {
final Map<String, String> configMap = getConfigPrameterMap(responseData, ConfigName.CONFIG);
final String ignore = configMap.get(IGNORE_ROBOTS_TAGS);
final String ignore = configMap.get(Config.IGNORE_ROBOTS_TAGS);
if (ignore == null) {
if (fessConfig.isCrawlerIgnoreRobotsTags()) {
return;
@ -502,7 +500,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
}
protected String getLangXpath(final FessConfig fessConfig, final Map<String, String> xpathConfigMap) {
final String xpath = xpathConfigMap.get("default.lang");
final String xpath = xpathConfigMap.get(XPath.DEFAULT_LANG);
if (StringUtil.isNotBlank(xpath)) {
return xpath;
}
@ -510,7 +508,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
}
protected String getContentXpath(final FessConfig fessConfig, final Map<String, String> xpathConfigMap) {
final String xpath = xpathConfigMap.get("default.content");
final String xpath = xpathConfigMap.get(XPath.DEFAULT_CONTENT);
if (StringUtil.isNotBlank(xpath)) {
return xpath;
}
@ -518,7 +516,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
}
protected String getDigestXpath(final FessConfig fessConfig, final Map<String, String> xpathConfigMap) {
final String xpath = xpathConfigMap.get("default.digest");
final String xpath = xpathConfigMap.get(XPath.DEFAULT_DIGEST);
if (StringUtil.isNotBlank(xpath)) {
return xpath;
}
@ -527,7 +525,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
protected String getCanonicalUrl(final ResponseData responseData, final Document document) {
final Map<String, String> configMap = getConfigPrameterMap(responseData, ConfigName.CONFIG);
String xpath = configMap.get(HTML_CANONICAL_XPATH);
String xpath = configMap.get(Config.HTML_CANONICAL_XPATH);
if (xpath == null) {
xpath = fessConfig.getCrawlerDocumentHtmlCanonicalXpath();
}

View file

@ -20,7 +20,9 @@ import java.util.Map;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.crawler.client.CrawlerClientFactory;
import org.codelibs.fess.crawler.client.ftp.FtpClient;
import org.codelibs.fess.crawler.client.http.HcHttpClient;
import org.codelibs.fess.crawler.client.smb.SmbClient;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
@ -51,8 +53,8 @@ public interface CrawlingConfig {
final String proxyHost = fessConfig.getHttpProxyHost();
final String proxyPort = fessConfig.getHttpProxyPort();
if (StringUtil.isNotBlank(proxyHost) && StringUtil.isNotBlank(proxyPort)) {
paramMap.put(HcHttpClient.PROXY_HOST_PROPERTY, proxyHost);
paramMap.put(HcHttpClient.PROXY_PORT_PROPERTY, proxyPort);
paramMap.put(Param.Client.PROXY_HOST, proxyHost);
paramMap.put(Param.Client.PROXY_PORT, proxyPort);
final String proxyUsername = fessConfig.getHttpProxyUsername();
final String proxyPassword = fessConfig.getHttpProxyPassword();
if (proxyUsername != null && proxyPassword != null) {
@ -86,4 +88,50 @@ public interface CrawlingConfig {
public enum ConfigName {
CLIENT, XPATH, META, VALUE, SCRIPT, FIELD, CONFIG;
}
public static class Param {
// client.*
public static class Client {
public static final String SMB_AUTHENTICATIONS = SmbClient.SMB_AUTHENTICATIONS_PROPERTY;
public static final String SMB1_AUTHENTICATIONS = org.codelibs.fess.crawler.client.smb1.SmbClient.SMB_AUTHENTICATIONS_PROPERTY;
public static final String FTP_AUTHENTICATIONS = FtpClient.FTP_AUTHENTICATIONS_PROPERTY;
public static final String ROBOTS_TXT_ENABLED = HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY;
public static final String PROXY_PASSWORD = "proxyPassword";
public static final String PROXY_USERNAME = "proxyUsername";
public static final String PROXY_PORT = HcHttpClient.PROXY_PORT_PROPERTY;
public static final String PROXY_HOST = HcHttpClient.PROXY_HOST_PROPERTY;
public static final String USER_AGENT = HcHttpClient.USER_AGENT_PROPERTY;
}
// xpath.*
public static class XPath {
public static final String DEFAULT_LANG = "default.lang";
public static final String DEFAULT_CONTENT = "default.content";
public static final String DEFAULT_DIGEST = "default.digest";
// xapth.<field>=<value>
}
// config.*
public static class Config {
public static final String KEEP_ORIGINAL_BODY = "keep.original.body";
public static final String CLEANUP_ALL = "cleanup.all";
public static final String CLEANUP_URL_FILTERS = "cleanup.urlFilters";
public static final String JCIFS_PREFIX = "jcifs.";
public static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
public static final String PIPELINE = "pipeline";
public static final String IGNORE_ROBOTS_TAGS = "ignore.robots.tags";
}
// meta.*
// meta.<field>=<value>
// value.*
// value.<field>=<value>
// script.*
// script.<field>=<value>
// field.*
// field.<field>=<value>
}
}

View file

@ -190,10 +190,10 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig {
ftpAuthList.add(ftpAuth);
}
}
paramMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY, smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()]));
paramMap.put(org.codelibs.fess.crawler.client.smb1.SmbClient.SMB_AUTHENTICATIONS_PROPERTY,
paramMap.put(Param.Client.SMB_AUTHENTICATIONS, smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()]));
paramMap.put(Param.Client.SMB1_AUTHENTICATIONS,
smb1AuthList.toArray(new org.codelibs.fess.crawler.client.smb1.SmbAuthentication[smb1AuthList.size()]));
paramMap.put(FtpClient.FTP_AUTHENTICATIONS_PROPERTY, ftpAuthList.toArray(new FtpAuthentication[ftpAuthList.size()]));
paramMap.put(Param.Client.FTP_AUTHENTICATIONS, ftpAuthList.toArray(new FtpAuthentication[ftpAuthList.size()]));
return paramMap;
}

View file

@ -36,6 +36,7 @@ import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.es.config.bsentity.BsWebAuthentication;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.ParameterUtil;
import org.slf4j.Logger;
@ -64,8 +65,8 @@ public class WebAuthentication extends BsWebAuthentication {
return new DigestScheme();
} else if (Constants.NTLM.equals(scheme)) {
final Properties props = new Properties();
getWebConfig().getConfigParameterMap(ConfigName.CONFIG).entrySet().stream().filter(e -> e.getKey().startsWith("jcifs."))
.forEach(e -> {
getWebConfig().getConfigParameterMap(ConfigName.CONFIG).entrySet().stream()
.filter(e -> e.getKey().startsWith(Config.JCIFS_PREFIX)).forEach(e -> {
props.setProperty(e.getKey(), e.getValue());
});
return new NTLMScheme(new JcifsEngine(props));

View file

@ -32,6 +32,7 @@ import org.codelibs.fess.crawler.client.CrawlerClientFactory;
import org.codelibs.fess.crawler.client.http.Authentication;
import org.codelibs.fess.crawler.client.http.HcHttpClient;
import org.codelibs.fess.es.config.bsentity.BsWebConfig;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Client;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
@ -159,13 +160,13 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
}
// robots txt enabled
if (paramMap.get(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY) == null) {
paramMap.put(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY, !fessConfig.isCrawlerIgnoreRobotsTxt());
if (paramMap.get(Param.Client.ROBOTS_TXT_ENABLED) == null) {
paramMap.put(Param.Client.ROBOTS_TXT_ENABLED, !fessConfig.isCrawlerIgnoreRobotsTxt());
}
final String userAgent = getUserAgent();
if (StringUtil.isNotBlank(userAgent)) {
paramMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);
paramMap.put(Client.USER_AGENT, userAgent);
}
final List<WebAuthentication> webAuthList = webAuthenticationService.getWebAuthenticationList(getId());
@ -184,13 +185,15 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY,
rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()]));
final String proxyHost = (String) paramMap.get("proxyHost");
final String proxyPort = (String) paramMap.get("proxyPort");
final String proxyHost = (String) paramMap.get(Param.Client.PROXY_HOST);
final String proxyPort = (String) paramMap.get(Param.Client.PROXY_PORT);
if (StringUtil.isNotBlank(proxyHost) && StringUtil.isNotBlank(proxyPort)) {
// proxy credentials
if (paramMap.get("proxyUsername") != null && paramMap.get("proxyPassword") != null) {
paramMap.put(HcHttpClient.PROXY_CREDENTIALS_PROPERTY, new UsernamePasswordCredentials(paramMap.remove("proxyUsername")
.toString(), paramMap.remove("proxyPassword").toString()));
if (paramMap.get(Param.Client.PROXY_USERNAME) != null && paramMap.get(Param.Client.PROXY_PASSWORD) != null) {
paramMap.put(
HcHttpClient.PROXY_CREDENTIALS_PROPERTY,
new UsernamePasswordCredentials(paramMap.remove(Param.Client.PROXY_USERNAME).toString(), paramMap.remove(
Param.Client.PROXY_PASSWORD).toString()));
}
} else {
initializeDefaultHttpProxy(paramMap);

View file

@ -38,6 +38,7 @@ import org.codelibs.fess.es.config.exbhv.WebConfigBhv;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigType;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.es.config.exentity.FailureUrl;
import org.codelibs.fess.es.config.exentity.FileConfig;
@ -125,7 +126,7 @@ public class CrawlingConfigHelper {
if (config == null) {
return OptionalThing.empty();
}
final String pipeline = config.getConfigParameterMap(ConfigName.CONFIG).get("pipeline");
final String pipeline = config.getConfigParameterMap(ConfigName.CONFIG).get(Config.PIPELINE);
if (StringUtil.isBlank(pipeline)) {
return OptionalThing.empty();
}

View file

@ -51,6 +51,7 @@ import org.codelibs.fess.crawler.transformer.Transformer;
import org.codelibs.fess.crawler.util.TextUtil;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.lastaflute.di.core.SingletonLaContainer;
@ -83,7 +84,7 @@ public class DocumentHelper {
if (crawlingConfig != null) {
Map<String, String> configParam = crawlingConfig.getConfigParameterMap(ConfigName.CONFIG);
if (configParam != null && Constants.TRUE.equalsIgnoreCase(configParam.get("keep_original_body"))) {
if (configParam != null && Constants.TRUE.equalsIgnoreCase(configParam.get(Param.Config.KEEP_ORIGINAL_BODY))) {
return content;
}
}

View file

@ -36,6 +36,7 @@ import org.codelibs.fess.crawler.service.impl.EsUrlQueueService;
import org.codelibs.fess.es.config.exbhv.BoostDocumentRuleBhv;
import org.codelibs.fess.es.config.exentity.BoostDocumentRule;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
import org.codelibs.fess.es.config.exentity.FileConfig;
import org.codelibs.fess.es.config.exentity.WebConfig;
import org.codelibs.fess.indexer.IndexUpdater;
@ -135,9 +136,9 @@ public class WebFsIndexHelper {
webConfig.initializeClientFactory(crawler.getClientFactory());
final Map<String, String> configParamMap = webConfig.getConfigParameterMap(ConfigName.CONFIG);
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_ALL))) {
deleteCrawlData(sid);
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_URL_FILTERS))) {
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
try {
urlFilterService.delete(sid);
@ -243,9 +244,9 @@ public class WebFsIndexHelper {
fileConfig.initializeClientFactory(crawler.getClientFactory());
final Map<String, String> configParamMap = fileConfig.getConfigParameterMap(ConfigName.CONFIG);
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_ALL))) {
deleteCrawlData(sid);
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_URL_FILTERS))) {
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
try {
urlFilterService.delete(sid);