diff --git a/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java index 2aa06792a..c58ccc951 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java @@ -16,6 +16,7 @@ package org.codelibs.fess.ds.impl; import java.io.InputStream; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -25,7 +26,12 @@ import java.util.function.Consumer; import org.codelibs.core.lang.StringUtil; import org.codelibs.elasticsearch.runner.net.Curl; import org.codelibs.elasticsearch.runner.net.CurlResponse; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.crawler.client.http.HcHttpClient; +import org.codelibs.fess.crawler.client.http.RequestHeader; import org.codelibs.fess.ds.IndexUpdateCallback; +import org.codelibs.fess.es.config.exentity.CrawlingConfig; +import org.codelibs.fess.es.config.exentity.CrawlingConfigWrapper; import org.codelibs.fess.es.config.exentity.DataConfig; import org.codelibs.fess.util.ComponentUtil; import org.elasticsearch.common.xcontent.json.JsonXContent; @@ -62,14 +68,30 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { return; } + final CrawlingConfig crawlingConfig = new CrawlingConfigWrapper(dataConfig) { + @Override + public Map initializeClientFactory(CrawlerClientFactory crawlerClientFactory) { + final Map paramMap = super.initializeClientFactory(crawlerClientFactory); + List headerList = new ArrayList<>(); + RequestHeader[] headers = (RequestHeader[]) paramMap.get(HcHttpClient.REQUERT_HEADERS_PROPERTY); + if (headers != null) { + for (RequestHeader header : headers) { + headerList.add(header); + } + } + headerList.add(new RequestHeader("Authorization", "token " + authToken)); + paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, headerList.toArray(new RequestHeader[headerList.size()])); + return paramMap; + } + }; for (final Map repository : repositoryList) { try { final String name = (String) repository.get("name"); final String owner = (String) repository.get("owner"); repository.get("is_private"); - collectFileNames(rootURL, authToken, owner, name, "", 0, readInterval, path -> { - storeFileContent(rootURL, authToken, owner, name, path, dataConfig, callback, paramMap, scriptMap, defaultDataMap); + collectFileNames(rootURL, authToken, owner, name, StringUtil.EMPTY, 0, readInterval, path -> { + storeFileContent(rootURL, authToken, owner, name, path, crawlingConfig, callback, paramMap, scriptMap, defaultDataMap); if (readInterval > 0) { sleep(readInterval); } @@ -125,23 +147,20 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { } private void storeFileContent(final String rootURL, final String authToken, final String owner, final String name, final String path, - final DataConfig dataConfig, final IndexUpdateCallback callback, final Map paramMap, + final CrawlingConfig crawlingConfig, final IndexUpdateCallback callback, final Map paramMap, final Map scriptMap, final Map defaultDataMap) { final String url = rootURL + owner + "/" + name + "/blob/master/" + path; - try (CurlResponse curlResponse = Curl.get(url).param("raw", "true").header("Authorization", "token " + authToken).execute()) { + if (logger.isInfoEnabled()) { logger.info("Get a content from " + url); - final Map dataMap = new HashMap<>(); - dataMap.putAll(defaultDataMap); - dataMap.putAll(ComponentUtil.getDocumentHelper().processRequest(dataConfig, paramMap.get("crawlingInfoId"), url)); - // TODO scriptMap - - callback.store(paramMap, dataMap); - - } catch (final Exception e) { - // TODO CrawlingAccessException? - logger.warn("Failed to parse " + url, e); } + final Map dataMap = new HashMap<>(); + dataMap.putAll(defaultDataMap); + dataMap.putAll(ComponentUtil.getDocumentHelper().processRequest(crawlingConfig, paramMap.get("crawlingInfoId"), url + "?raw=true")); + // TODO scriptMap + + callback.store(paramMap, dataMap); + return; } diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/CrawlingConfig.java b/src/main/java/org/codelibs/fess/es/config/exentity/CrawlingConfig.java index e1376506f..c55d1186a 100644 --- a/src/main/java/org/codelibs/fess/es/config/exentity/CrawlingConfig.java +++ b/src/main/java/org/codelibs/fess/es/config/exentity/CrawlingConfig.java @@ -37,7 +37,7 @@ public interface CrawlingConfig { Integer getTimeToLive(); - void initializeClientFactory(CrawlerClientFactory crawlerClientFactory); + Map initializeClientFactory(CrawlerClientFactory crawlerClientFactory); Map getConfigParameterMap(ConfigName name); diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/CrawlingConfigWrapper.java b/src/main/java/org/codelibs/fess/es/config/exentity/CrawlingConfigWrapper.java new file mode 100644 index 000000000..148a59afb --- /dev/null +++ b/src/main/java/org/codelibs/fess/es/config/exentity/CrawlingConfigWrapper.java @@ -0,0 +1,54 @@ +package org.codelibs.fess.es.config.exentity; + +import java.util.Map; + +import org.codelibs.fess.crawler.client.CrawlerClientFactory; + +public class CrawlingConfigWrapper implements CrawlingConfig { + + private CrawlingConfig crawlingConfig; + + public CrawlingConfigWrapper(final CrawlingConfig crawlingConfig) { + this.crawlingConfig = crawlingConfig; + } + + public String getId() { + return crawlingConfig.getId(); + } + + public String getName() { + return crawlingConfig.getName(); + } + + public String[] getPermissions() { + return crawlingConfig.getPermissions(); + } + + public String[] getLabelTypeValues() { + return crawlingConfig.getLabelTypeValues(); + } + + public String getDocumentBoost() { + return crawlingConfig.getDocumentBoost(); + } + + public String getIndexingTarget(String input) { + return crawlingConfig.getIndexingTarget(input); + } + + public String getConfigId() { + return crawlingConfig.getConfigId(); + } + + public Integer getTimeToLive() { + return crawlingConfig.getTimeToLive(); + } + + public Map initializeClientFactory(CrawlerClientFactory crawlerClientFactory) { + return crawlingConfig.initializeClientFactory(crawlerClientFactory); + } + + public Map getConfigParameterMap(ConfigName name) { + return crawlingConfig.getConfigParameterMap(name); + } +} diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/DataConfig.java b/src/main/java/org/codelibs/fess/es/config/exentity/DataConfig.java index b52c5edc1..d65b21f5e 100644 --- a/src/main/java/org/codelibs/fess/es/config/exentity/DataConfig.java +++ b/src/main/java/org/codelibs/fess/es/config/exentity/DataConfig.java @@ -182,7 +182,7 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig { } @Override - public void initializeClientFactory(final CrawlerClientFactory crawlerClientFactory) { + public Map initializeClientFactory(final CrawlerClientFactory crawlerClientFactory) { final Map paramMap = getHandlerParameterMap(); final Map factoryParamMap = new HashMap<>(); @@ -353,6 +353,7 @@ public class DataConfig extends BsDataConfig implements CrawlingConfig { } } + return factoryParamMap; } @Override diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java b/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java index 6541deef1..5a3feab68 100644 --- a/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java +++ b/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java @@ -198,7 +198,7 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig { } @Override - public void initializeClientFactory(final CrawlerClientFactory clientFactory) { + public Map initializeClientFactory(final CrawlerClientFactory clientFactory) { final FileAuthenticationService fileAuthenticationService = ComponentUtil.getComponent(FileAuthenticationService.class); // Parameters @@ -237,6 +237,7 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig { paramMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY, smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()])); paramMap.put(FtpClient.FTP_AUTHENTICATIONS_PROPERTY, ftpAuthList.toArray(new FtpAuthentication[ftpAuthList.size()])); + return paramMap; } @Override diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java b/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java index 0d5a59958..41c6fdfc8 100644 --- a/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java +++ b/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java @@ -197,7 +197,7 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig { } @Override - public void initializeClientFactory(final CrawlerClientFactory clientFactory) { + public Map initializeClientFactory(final CrawlerClientFactory clientFactory) { final WebAuthenticationService webAuthenticationService = ComponentUtil.getComponent(WebAuthenticationService.class); final RequestHeaderService requestHeaderService = ComponentUtil.getComponent(RequestHeaderService.class); final FessConfig fessConfig = ComponentUtil.getFessConfig(); @@ -237,6 +237,7 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig { paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()])); + return paramMap; } @Override