diff --git a/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java index 2a77ea175..95a2bbdc5 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java @@ -60,20 +60,27 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { final String rootURL = getRootURL(paramMap); final String authToken = getAuthToken(paramMap); - final List sourceLabels = getSourceLabelList(rootURL, authToken); final long readInterval = getReadInterval(paramMap); + // Non-emptiness Check for URL and Token if (rootURL.isEmpty() || authToken.isEmpty()) { logger.warn("parameter \"" + TOKEN_PARAM + "\" and \"" + GITBUCKET_URL_PARAM + "\" are required"); return; } + // Get List of Repositories final List> repositoryList = getRepositoryList(rootURL, authToken); if (repositoryList.isEmpty()) { logger.warn("Token is invalid or no Repository"); return; } + // Get Labels + final Map pluginInfo = getFessPluginInfo(rootURL, authToken); + final String sourceLabel = pluginInfo.get("source_label"); + final String issueLabel = pluginInfo.get("issue_label"); + final String wikiLabel = pluginInfo.get("wiki_label"); + final CrawlingConfig crawlingConfig = new CrawlingConfigWrapper(dataConfig) { @Override public Map initializeClientFactory(final CrawlerClientFactory crawlerClientFactory) { @@ -91,14 +98,20 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { return paramMap; } }; + + // Crawl each repository for (final Map repository : repositoryList) { try { final String owner = (String) repository.get("owner"); final String name = (String) repository.get("name"); final String refStr = getGitRef(rootURL, authToken, owner, name, "master"); + final int issueCount = (int) repository.get("issue_count"); + final int pullCount = (int) repository.get("pull_count"); final List roleList = createRoleList(owner, repository); - collectFileNames( + logger.info("Crawl " + owner + "/" + name); + // crawl and store file contents recursively + crawlFileContents( rootURL, authToken, owner, @@ -108,12 +121,29 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { 0, readInterval, path -> { - storeFileContent(rootURL, authToken, sourceLabels, owner, name, refStr, roleList, path, crawlingConfig, + storeFileContent(rootURL, authToken, sourceLabel, owner, name, refStr, roleList, path, crawlingConfig, callback, paramMap, scriptMap, defaultDataMap); if (readInterval > 0) { sleep(readInterval); } }); + + logger.info("Crawl issues in " + owner + "/" + name); + // store issues + for (int issueId = 1; issueId <= issueCount + pullCount; issueId++) { + storeIssueById(rootURL, authToken, issueLabel, owner, name, new Integer(issueId), roleList, crawlingConfig, callback, + paramMap, scriptMap, defaultDataMap); + + if (readInterval > 0) { + sleep(readInterval); + } + } + + logger.info("Crawl Wiki in " + owner + "/" + name); + // crawl Wiki + storeWikiContents(rootURL, authToken, wikiLabel, owner, name, roleList, crawlingConfig, callback, paramMap, scriptMap, + defaultDataMap, readInterval); + } catch (final Exception e) { logger.warn("Failed to access to " + repository, e); } @@ -139,6 +169,21 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { return StringUtil.EMPTY; } + protected Map getFessPluginInfo(final String rootURL, final String authToken) { + final String url = rootURL + "api/v3/fess/info"; + try (CurlResponse curlResponse = Curl.get(url).header("Authorization", "token " + authToken).execute()) { + @SuppressWarnings({ "rawtypes", "unchecked" }) + final Map map = (Map) curlResponse.getContentAsMap(); + assert (map.containsKey("version")); + assert (map.containsKey("source_label") && map.containsKey("wiki_label") & map.containsKey("issue_label")); + return map; + + } catch (final Exception e) { + logger.warn("Failed to access to " + rootURL, e); + return Collections.emptyMap(); + } + } + protected List getSourceLabelList(final String rootURL, final String authToken) { final String url = rootURL + "api/v3/fess/label"; try (CurlResponse curlResponse = Curl.get(url).header("Authorization", "token " + authToken).execute()) { @@ -207,7 +252,7 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { } } - private void storeFileContent(final String rootURL, final String authToken, final List sourceLabels, final String owner, + private void storeFileContent(final String rootURL, final String authToken, final String sourceLabel, final String owner, final String name, final String refStr, final List roleList, final String path, final CrawlingConfig crawlingConfig, final IndexUpdateCallback callback, final Map paramMap, final Map scriptMap, final Map defaultDataMap) { @@ -224,7 +269,7 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { dataMap.put("url", viewUrl); dataMap.put("role", roleList); - dataMap.put("label", sourceLabels); + dataMap.put("label", Collections.singletonList(sourceLabel)); // TODO scriptMap @@ -233,7 +278,95 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { return; } - protected void collectFileNames(final String rootURL, final String authToken, final String owner, final String name, + private void storeIssueById(final String rootURL, final String authToken, final String issueLabel, final String owner, + final String name, final Integer issueId, final List roleList, final CrawlingConfig crawlingConfig, + final IndexUpdateCallback callback, final Map paramMap, final Map scriptMap, + final Map defaultDataMap) { + + final String issueUrl = rootURL + "api/v3/repos/" + owner + "/" + name + "/issues/" + issueId.toString(); + // final String commentsUrl = issueUrl + "/comments"; + final String viewUrl = rootURL + owner + "/" + name + "/issues/" + issueId.toString(); + + if (logger.isInfoEnabled()) { + logger.info("Get a content from " + issueUrl); + } + + final Map dataMap = new HashMap<>(); + String contentStr = ""; + dataMap.putAll(defaultDataMap); + + // Get issue description + // FIXME: Use `ComponentUtil.getDocumentHelper().processRequest` instead of `Curl.get` + try (CurlResponse curlResponse = Curl.get(issueUrl).header("Authorization", "token " + authToken).execute()) { + final Map map = curlResponse.getContentAsMap(); + dataMap.put("title", map.getOrDefault("title", "")); + contentStr = (String) map.getOrDefault("body", ""); + } catch (final Exception e) { + logger.warn("Failed to access to " + issueUrl, e); + } + + // FIXME: Get issue comments from `commentsUrl` + // How to parse JSON-style list? + + dataMap.put("content", contentStr); + dataMap.put("url", viewUrl); + dataMap.put("role", roleList); + dataMap.put("label", Collections.singletonList(issueLabel)); + + // TODO scriptMap + + callback.store(paramMap, dataMap); + + return; + } + + @SuppressWarnings("unchecked") + private void storeWikiContents(final String rootURL, final String authToken, final String wikiLabel, final String owner, + final String name, final List roleList, final CrawlingConfig crawlingConfig, final IndexUpdateCallback callback, + final Map paramMap, final Map scriptMap, final Map defaultDataMap, + final long readInterval) { + final String wikiUrl = rootURL + "api/v3/fess/" + owner + "/" + name + "/wiki"; + + List pageList = Collections.emptyList(); + + // Get list of pages + try (CurlResponse curlResponse = Curl.get(wikiUrl).header("Authorization", "token " + authToken).execute()) { + final Map map = curlResponse.getContentAsMap(); + pageList = (List) map.get("pages"); + } catch (final Exception e) { + logger.warn("Failed to access to " + wikiUrl, e); + } + + for (String page : pageList) { + // FIXME: URL encoding (e.g. page name that contains spaces) + final String pageUrl = wikiUrl + "/contents/" + page + ".md"; + final String viewUrl = rootURL + owner + "/" + name + "/wiki/" + page; + + if (logger.isInfoEnabled()) { + logger.info("Get a content from " + pageUrl); + } + + final Map dataMap = new HashMap<>(); + dataMap.putAll(defaultDataMap); + dataMap.putAll(ComponentUtil.getDocumentHelper().processRequest(crawlingConfig, paramMap.get("crawlingInfoId"), pageUrl)); + + dataMap.put("url", viewUrl); + dataMap.put("role", roleList); + dataMap.put("label", Collections.singletonList(wikiLabel)); + + // TODO scriptMap + + callback.store(paramMap, dataMap); + logger.info("Stored " + pageUrl); + + if (readInterval > 0) { + sleep(readInterval); + } + } + + } + + protected void crawlFileContents(final String rootURL, final String authToken, final String owner, final String name, final String refStr, final String path, final int depth, final long readInterval, final Consumer consumer) { if (MAX_DEPTH <= depth) { @@ -258,7 +391,7 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { if (readInterval > 0) { sleep(readInterval); } - collectFileNames(rootURL, authToken, owner, name, refStr, newPath, depth + 1, readInterval, consumer); + crawlFileContents(rootURL, authToken, owner, name, refStr, newPath, depth + 1, readInterval, consumer); break; } } @@ -266,5 +399,4 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { logger.warn("Failed to access to " + url, e); } } - }