fix #1519 datastore supports http proxy

This commit is contained in:
Shinsuke Sugaya 2018-02-20 23:06:12 +09:00
parent 097eb0a496
commit 50034eb3a3

View file

@ -22,6 +22,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.http.auth.AuthScheme;
import org.apache.http.auth.AuthScope;
@ -38,10 +39,12 @@ import org.codelibs.fess.crawler.client.ftp.FtpAuthentication;
import org.codelibs.fess.crawler.client.ftp.FtpClient;
import org.codelibs.fess.crawler.client.http.Authentication;
import org.codelibs.fess.crawler.client.http.HcHttpClient;
import org.codelibs.fess.crawler.client.http.form.FormScheme;
import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl;
import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine;
import org.codelibs.fess.crawler.client.smb.SmbAuthentication;
import org.codelibs.fess.crawler.client.smb.SmbClient;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.es.config.bsentity.BsDataConfig;
import org.codelibs.fess.es.config.exbhv.DataConfigToLabelBhv;
import org.codelibs.fess.es.config.exbhv.LabelTypeBhv;
@ -61,9 +64,11 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
private static final Logger logger = LoggerFactory.getLogger(DataConfig.class);
private static final String CRAWLER_WEB_HEADER_PREFIX = "crawler.web.header.";
private static final String CRAWLER_WEB_PREFIX = "crawler.web.";
private static final String CRAWLER_WEB_AUTH = "crawler.web.auth";
private static final String CRAWLER_WEB_HEADER_PREFIX = CRAWLER_WEB_PREFIX + "header.";
private static final String CRAWLER_WEB_AUTH = CRAWLER_WEB_PREFIX + "auth";
private static final String CRAWLER_USERAGENT = "crawler.useragent";
@ -209,63 +214,10 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
final List<Authentication> basicAuthList = new ArrayList<>();
for (final String webAuthName : webAuthNames) {
final String scheme = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".scheme");
final String hostname = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".host");
final String port = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".port");
final String realm = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".realm");
final String username = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".username");
final String password = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".password");
if (StringUtil.isEmpty(username)) {
logger.warn("username is empty. webAuth:" + webAuthName);
continue;
}
AuthScheme authScheme = null;
if (Constants.BASIC.equals(scheme)) {
authScheme = new BasicScheme();
} else if (Constants.DIGEST.equals(scheme)) {
authScheme = new DigestScheme();
} else if (Constants.NTLM.equals(scheme)) {
authScheme = new NTLMScheme(new JcifsEngine());
}
// TODO FORM
AuthScope authScope;
if (StringUtil.isBlank(hostname)) {
authScope = AuthScope.ANY;
} else {
int p = AuthScope.ANY_PORT;
if (StringUtil.isNotBlank(port)) {
try {
p = Integer.parseInt(port);
} catch (final NumberFormatException e) {
logger.warn("Failed to parse " + port, e);
}
}
String r = realm;
if (StringUtil.isBlank(realm)) {
r = AuthScope.ANY_REALM;
}
String s = scheme;
if (StringUtil.isBlank(scheme) || Constants.NTLM.equals(scheme)) {
s = AuthScope.ANY_SCHEME;
}
authScope = new AuthScope(hostname, p, r, s);
}
Credentials credentials;
if (Constants.NTLM.equals(scheme)) {
final String workstation = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".workstation");
final String domain = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".domain");
credentials =
new NTCredentials(username, password == null ? StringUtil.EMPTY : password,
workstation == null ? StringUtil.EMPTY : workstation, domain == null ? StringUtil.EMPTY : domain);
} else {
credentials = new UsernamePasswordCredentials(username, password == null ? StringUtil.EMPTY : password);
}
final AuthScheme authScheme = getAuthScheme(paramMap, webAuthName, scheme);
final AuthScope authScope = getAuthScope(webAuthName, scheme, paramMap);
final Credentials credentials = getCredentials(webAuthName, scheme, paramMap);
basicAuthList.add(new AuthenticationImpl(authScope, credentials, authScheme));
}
factoryParamMap.put(HcHttpClient.BASIC_AUTHENTICATIONS_PROPERTY,
@ -287,6 +239,19 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()]));
}
// proxy credentials
final String proxyHost = paramMap.get(CRAWLER_WEB_PREFIX + "proxyHost");
final String proxyPort = paramMap.get(CRAWLER_WEB_PREFIX + "proxyPort");
if (StringUtil.isNotBlank(proxyHost) && StringUtil.isNotBlank(proxyPort)) {
factoryParamMap.put(HcHttpClient.PROXY_HOST_PROPERTY, proxyHost);
factoryParamMap.put(HcHttpClient.PROXY_PORT_PROPERTY, proxyPort);
final String proxyUsername = paramMap.get(CRAWLER_WEB_PREFIX + "proxyUsername");
final String proxyPassword = paramMap.get(CRAWLER_WEB_PREFIX + "proxyPassword");
if (proxyUsername != null && proxyPassword != null) {
factoryParamMap.put(HcHttpClient.PROXY_CREDENTIALS_PROPERTY, new UsernamePasswordCredentials(proxyUsername, proxyPassword));
}
}
// file auth
final String fileAuthStr = paramMap.get(CRAWLER_FILE_AUTH);
if (StringUtil.isNotBlank(fileAuthStr)) {
@ -356,6 +321,74 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
return factoryParamMap;
}
private AuthScheme getAuthScheme(final Map<String, String> paramMap, final String webAuthName, final String scheme) {
AuthScheme authScheme = null;
if (Constants.BASIC.equals(scheme)) {
authScheme = new BasicScheme();
} else if (Constants.DIGEST.equals(scheme)) {
authScheme = new DigestScheme();
} else if (Constants.NTLM.equals(scheme)) {
authScheme = new NTLMScheme(new JcifsEngine());
} else if (Constants.FORM.equals(scheme)) {
final String prefix = CRAWLER_WEB_AUTH + "." + webAuthName + ".";
final Map<String, String> parameterMap =
paramMap.entrySet().stream().filter(e -> e.getKey().startsWith(prefix))
.collect(Collectors.toMap(e -> e.getKey().substring(prefix.length()), e -> e.getValue()));
authScheme = new FormScheme(parameterMap);
}
return authScheme;
}
private Credentials getCredentials(final String webAuthName, final String scheme, final Map<String, String> paramMap) {
final String username = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".username");
if (StringUtil.isEmpty(username)) {
throw new CrawlerSystemException("username is empty. webAuth:" + webAuthName);
}
final String password = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".password");
Credentials credentials;
if (Constants.NTLM.equals(scheme)) {
final String workstation = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".workstation");
final String domain = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".domain");
credentials =
new NTCredentials(username, password == null ? StringUtil.EMPTY : password, workstation == null ? StringUtil.EMPTY
: workstation, domain == null ? StringUtil.EMPTY : domain);
} else {
credentials = new UsernamePasswordCredentials(username, password == null ? StringUtil.EMPTY : password);
}
return credentials;
}
private AuthScope getAuthScope(final String webAuthName, final String scheme, final Map<String, String> paramMap) {
final String hostname = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".host");
final String port = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".port");
final String realm = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".realm");
AuthScope authScope;
if (StringUtil.isBlank(hostname)) {
authScope = AuthScope.ANY;
} else {
int p = AuthScope.ANY_PORT;
if (StringUtil.isNotBlank(port)) {
try {
p = Integer.parseInt(port);
} catch (final NumberFormatException e) {
logger.warn("Failed to parse " + port, e);
}
}
String r = realm;
if (StringUtil.isBlank(realm)) {
r = AuthScope.ANY_REALM;
}
String s = scheme;
if (StringUtil.isBlank(scheme) || Constants.NTLM.equals(scheme)) {
s = AuthScope.ANY_SCHEME;
}
authScope = new AuthScope(hostname, p, r, s);
}
return authScope;
}
@Override
public Map<String, String> getConfigParameterMap(final ConfigName name) {
return Collections.emptyMap();