fix #1519 datastore supports http proxy
This commit is contained in:
parent
097eb0a496
commit
50034eb3a3
1 changed files with 91 additions and 58 deletions
|
@ -22,6 +22,7 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.http.auth.AuthScheme;
|
||||
import org.apache.http.auth.AuthScope;
|
||||
|
@ -38,10 +39,12 @@ import org.codelibs.fess.crawler.client.ftp.FtpAuthentication;
|
|||
import org.codelibs.fess.crawler.client.ftp.FtpClient;
|
||||
import org.codelibs.fess.crawler.client.http.Authentication;
|
||||
import org.codelibs.fess.crawler.client.http.HcHttpClient;
|
||||
import org.codelibs.fess.crawler.client.http.form.FormScheme;
|
||||
import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl;
|
||||
import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine;
|
||||
import org.codelibs.fess.crawler.client.smb.SmbAuthentication;
|
||||
import org.codelibs.fess.crawler.client.smb.SmbClient;
|
||||
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
|
||||
import org.codelibs.fess.es.config.bsentity.BsDataConfig;
|
||||
import org.codelibs.fess.es.config.exbhv.DataConfigToLabelBhv;
|
||||
import org.codelibs.fess.es.config.exbhv.LabelTypeBhv;
|
||||
|
@ -61,9 +64,11 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
|
|||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DataConfig.class);
|
||||
|
||||
private static final String CRAWLER_WEB_HEADER_PREFIX = "crawler.web.header.";
|
||||
private static final String CRAWLER_WEB_PREFIX = "crawler.web.";
|
||||
|
||||
private static final String CRAWLER_WEB_AUTH = "crawler.web.auth";
|
||||
private static final String CRAWLER_WEB_HEADER_PREFIX = CRAWLER_WEB_PREFIX + "header.";
|
||||
|
||||
private static final String CRAWLER_WEB_AUTH = CRAWLER_WEB_PREFIX + "auth";
|
||||
|
||||
private static final String CRAWLER_USERAGENT = "crawler.useragent";
|
||||
|
||||
|
@ -209,63 +214,10 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
|
|||
final List<Authentication> basicAuthList = new ArrayList<>();
|
||||
for (final String webAuthName : webAuthNames) {
|
||||
final String scheme = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".scheme");
|
||||
final String hostname = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".host");
|
||||
final String port = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".port");
|
||||
final String realm = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".realm");
|
||||
final String username = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".username");
|
||||
final String password = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".password");
|
||||
|
||||
if (StringUtil.isEmpty(username)) {
|
||||
logger.warn("username is empty. webAuth:" + webAuthName);
|
||||
continue;
|
||||
}
|
||||
|
||||
AuthScheme authScheme = null;
|
||||
if (Constants.BASIC.equals(scheme)) {
|
||||
authScheme = new BasicScheme();
|
||||
} else if (Constants.DIGEST.equals(scheme)) {
|
||||
authScheme = new DigestScheme();
|
||||
} else if (Constants.NTLM.equals(scheme)) {
|
||||
authScheme = new NTLMScheme(new JcifsEngine());
|
||||
}
|
||||
// TODO FORM
|
||||
|
||||
AuthScope authScope;
|
||||
if (StringUtil.isBlank(hostname)) {
|
||||
authScope = AuthScope.ANY;
|
||||
} else {
|
||||
int p = AuthScope.ANY_PORT;
|
||||
if (StringUtil.isNotBlank(port)) {
|
||||
try {
|
||||
p = Integer.parseInt(port);
|
||||
} catch (final NumberFormatException e) {
|
||||
logger.warn("Failed to parse " + port, e);
|
||||
}
|
||||
}
|
||||
|
||||
String r = realm;
|
||||
if (StringUtil.isBlank(realm)) {
|
||||
r = AuthScope.ANY_REALM;
|
||||
}
|
||||
|
||||
String s = scheme;
|
||||
if (StringUtil.isBlank(scheme) || Constants.NTLM.equals(scheme)) {
|
||||
s = AuthScope.ANY_SCHEME;
|
||||
}
|
||||
authScope = new AuthScope(hostname, p, r, s);
|
||||
}
|
||||
|
||||
Credentials credentials;
|
||||
if (Constants.NTLM.equals(scheme)) {
|
||||
final String workstation = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".workstation");
|
||||
final String domain = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".domain");
|
||||
credentials =
|
||||
new NTCredentials(username, password == null ? StringUtil.EMPTY : password,
|
||||
workstation == null ? StringUtil.EMPTY : workstation, domain == null ? StringUtil.EMPTY : domain);
|
||||
} else {
|
||||
credentials = new UsernamePasswordCredentials(username, password == null ? StringUtil.EMPTY : password);
|
||||
}
|
||||
|
||||
final AuthScheme authScheme = getAuthScheme(paramMap, webAuthName, scheme);
|
||||
final AuthScope authScope = getAuthScope(webAuthName, scheme, paramMap);
|
||||
final Credentials credentials = getCredentials(webAuthName, scheme, paramMap);
|
||||
basicAuthList.add(new AuthenticationImpl(authScope, credentials, authScheme));
|
||||
}
|
||||
factoryParamMap.put(HcHttpClient.BASIC_AUTHENTICATIONS_PROPERTY,
|
||||
|
@ -287,6 +239,19 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
|
|||
rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()]));
|
||||
}
|
||||
|
||||
// proxy credentials
|
||||
final String proxyHost = paramMap.get(CRAWLER_WEB_PREFIX + "proxyHost");
|
||||
final String proxyPort = paramMap.get(CRAWLER_WEB_PREFIX + "proxyPort");
|
||||
if (StringUtil.isNotBlank(proxyHost) && StringUtil.isNotBlank(proxyPort)) {
|
||||
factoryParamMap.put(HcHttpClient.PROXY_HOST_PROPERTY, proxyHost);
|
||||
factoryParamMap.put(HcHttpClient.PROXY_PORT_PROPERTY, proxyPort);
|
||||
final String proxyUsername = paramMap.get(CRAWLER_WEB_PREFIX + "proxyUsername");
|
||||
final String proxyPassword = paramMap.get(CRAWLER_WEB_PREFIX + "proxyPassword");
|
||||
if (proxyUsername != null && proxyPassword != null) {
|
||||
factoryParamMap.put(HcHttpClient.PROXY_CREDENTIALS_PROPERTY, new UsernamePasswordCredentials(proxyUsername, proxyPassword));
|
||||
}
|
||||
}
|
||||
|
||||
// file auth
|
||||
final String fileAuthStr = paramMap.get(CRAWLER_FILE_AUTH);
|
||||
if (StringUtil.isNotBlank(fileAuthStr)) {
|
||||
|
@ -356,6 +321,74 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig {
|
|||
return factoryParamMap;
|
||||
}
|
||||
|
||||
private AuthScheme getAuthScheme(final Map<String, String> paramMap, final String webAuthName, final String scheme) {
|
||||
AuthScheme authScheme = null;
|
||||
if (Constants.BASIC.equals(scheme)) {
|
||||
authScheme = new BasicScheme();
|
||||
} else if (Constants.DIGEST.equals(scheme)) {
|
||||
authScheme = new DigestScheme();
|
||||
} else if (Constants.NTLM.equals(scheme)) {
|
||||
authScheme = new NTLMScheme(new JcifsEngine());
|
||||
} else if (Constants.FORM.equals(scheme)) {
|
||||
final String prefix = CRAWLER_WEB_AUTH + "." + webAuthName + ".";
|
||||
final Map<String, String> parameterMap =
|
||||
paramMap.entrySet().stream().filter(e -> e.getKey().startsWith(prefix))
|
||||
.collect(Collectors.toMap(e -> e.getKey().substring(prefix.length()), e -> e.getValue()));
|
||||
authScheme = new FormScheme(parameterMap);
|
||||
}
|
||||
return authScheme;
|
||||
}
|
||||
|
||||
private Credentials getCredentials(final String webAuthName, final String scheme, final Map<String, String> paramMap) {
|
||||
final String username = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".username");
|
||||
if (StringUtil.isEmpty(username)) {
|
||||
throw new CrawlerSystemException("username is empty. webAuth:" + webAuthName);
|
||||
}
|
||||
final String password = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".password");
|
||||
Credentials credentials;
|
||||
if (Constants.NTLM.equals(scheme)) {
|
||||
final String workstation = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".workstation");
|
||||
final String domain = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".domain");
|
||||
credentials =
|
||||
new NTCredentials(username, password == null ? StringUtil.EMPTY : password, workstation == null ? StringUtil.EMPTY
|
||||
: workstation, domain == null ? StringUtil.EMPTY : domain);
|
||||
} else {
|
||||
credentials = new UsernamePasswordCredentials(username, password == null ? StringUtil.EMPTY : password);
|
||||
}
|
||||
return credentials;
|
||||
}
|
||||
|
||||
private AuthScope getAuthScope(final String webAuthName, final String scheme, final Map<String, String> paramMap) {
|
||||
final String hostname = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".host");
|
||||
final String port = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".port");
|
||||
final String realm = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".realm");
|
||||
AuthScope authScope;
|
||||
if (StringUtil.isBlank(hostname)) {
|
||||
authScope = AuthScope.ANY;
|
||||
} else {
|
||||
int p = AuthScope.ANY_PORT;
|
||||
if (StringUtil.isNotBlank(port)) {
|
||||
try {
|
||||
p = Integer.parseInt(port);
|
||||
} catch (final NumberFormatException e) {
|
||||
logger.warn("Failed to parse " + port, e);
|
||||
}
|
||||
}
|
||||
|
||||
String r = realm;
|
||||
if (StringUtil.isBlank(realm)) {
|
||||
r = AuthScope.ANY_REALM;
|
||||
}
|
||||
|
||||
String s = scheme;
|
||||
if (StringUtil.isBlank(scheme) || Constants.NTLM.equals(scheme)) {
|
||||
s = AuthScope.ANY_SCHEME;
|
||||
}
|
||||
authScope = new AuthScope(hostname, p, r, s);
|
||||
}
|
||||
return authScope;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> getConfigParameterMap(final ConfigName name) {
|
||||
return Collections.emptyMap();
|
||||
|
|
Loading…
Add table
Reference in a new issue