diff --git a/pom.xml b/pom.xml
index 0abad3004..45e02f113 100644
--- a/pom.xml
+++ b/pom.xml
@@ -57,7 +57,7 @@
0.6.0F
- 1.0.11-SNAPSHOT
+ 1.0.11
2.3.0
diff --git a/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java
new file mode 100644
index 000000000..71e117b20
--- /dev/null
+++ b/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2012-2016 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.ds.impl;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.io.FilenameUtils;
+import org.codelibs.core.lang.StringUtil;
+import org.codelibs.elasticsearch.runner.net.Curl;
+import org.codelibs.elasticsearch.runner.net.CurlResponse;
+import org.codelibs.fess.ds.IndexUpdateCallback;
+import org.codelibs.fess.es.config.exentity.DataConfig;
+import org.elasticsearch.common.xcontent.json.JsonXContent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author Keiichi Watanabe
+ */
+public class GitBucketDataStoreImpl extends AbstractDataStoreImpl {
+ private static final Logger logger = LoggerFactory.getLogger(CsvDataStoreImpl.class);
+
+ private static final int MAX_DEPTH = 20;
+
+ protected static final String TOKEN_PARAM = "token";
+ protected static final String GITBUCKET_URL_PARAM = "url";
+
+ @Override
+ protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map paramMap,
+ final Map scriptMap, final Map defaultDataMap) {
+
+ final String rootURL = getRootURL(paramMap);
+ final String authToken = getAuthToken(paramMap);
+ final long readInterval = getReadInterval(paramMap);
+
+ if (rootURL.isEmpty() || authToken.isEmpty()) {
+ logger.warn("parameter \"" + TOKEN_PARAM + "\" and \"" + GITBUCKET_URL_PARAM + "\" are required");
+ return;
+ }
+
+ final List> repositoryList = getRepositoryList(rootURL, authToken);
+ if (repositoryList.isEmpty()) {
+ logger.warn("Token is invalid or no Repository");
+ return;
+ }
+
+ for (final Map repository : repositoryList) {
+ try {
+ final String name = (String) repository.get("name");
+ final String owner = (String) repository.get("owner");
+ final boolean isPrivate = (boolean) repository.get("is_private"); // TODO Use this info for roles
+
+ List pathList = collectFileNames(rootURL, authToken, owner, name, "", 0, readInterval);
+ for (String path : pathList) {
+ storeFileContent(rootURL, authToken, owner, name, path, dataConfig, callback, paramMap, scriptMap, defaultDataMap);
+ if (readInterval > 0) {
+ sleep(readInterval);
+ }
+ }
+ } catch (Exception e) {
+ logger.warn("Failed to access to " + repository, e);
+ }
+ }
+
+ }
+
+ protected String getRootURL(final Map paramMap) {
+ if (paramMap.containsKey(GITBUCKET_URL_PARAM)) {
+ String url = paramMap.get(GITBUCKET_URL_PARAM);
+ if (url.charAt(url.length() - 1) != '/') {
+ url += "/";
+ }
+ return url;
+ }
+ return StringUtil.EMPTY;
+ }
+
+ protected String getAuthToken(final Map paramMap) {
+ if (paramMap.containsKey(TOKEN_PARAM)) {
+ return paramMap.get(TOKEN_PARAM);
+ }
+ return StringUtil.EMPTY;
+ }
+
+ protected List> getRepositoryList(final String rootURL, final String authToken) {
+ final String url = rootURL + "api/v3/fess/repos";
+ try (CurlResponse curlResponse = Curl.get(url).header("Authorization", "token " + authToken).execute()) {
+ final String content = curlResponse.getContentAsString();
+ final Map map = curlResponse.getContentAsMap();
+ assert (map.containsKey("repositories"));
+ final List> repoList = (List>) map.get("repositories");
+ return repoList;
+ } catch (Exception e) {
+ logger.warn("Failed to access to " + rootURL, e);
+ return Collections.emptyList();
+ }
+ }
+
+ private List parseList(final InputStream is) { // TODO This function should be moved to CurlResponse
+ try {
+ return JsonXContent.jsonXContent.createParser(is).list();
+ } catch (final Exception e) {
+ return Collections.emptyList();
+ }
+ }
+
+ private void storeFileContent(final String rootURL, final String authToken, final String owner, final String name, final String path,
+ final DataConfig dataConfig, final IndexUpdateCallback callback, final Map paramMap,
+ final Map scriptMap, final Map defaultDataMap) {
+ final String url = rootURL + owner + "/" + name + "/blob/master/" + path;
+ final String filename = FilenameUtils.getName(url);
+
+ try (CurlResponse curlResponse = Curl.get(url).param("raw", "true").header("Authorization", "token " + authToken).execute()) {
+ logger.info("Get a content from " + url);
+ // TODO Use DoucmentHelper#processRequest and scriptMap
+ final Map dataMap = new HashMap<>();
+ dataMap.putAll(defaultDataMap);
+ dataMap.put("title", owner + "/" + name + " : " + filename);
+ dataMap.put("url", url);
+ dataMap.put("content", curlResponse.getContentAsString());
+ dataMap.put("label", "GitBucket"); // TODO role
+
+ callback.store(paramMap, dataMap);
+
+ } catch (Exception e) {
+ // TODO CrawlingAccessException?
+ logger.warn("Failed to parse " + url, e);
+ }
+ return;
+ }
+
+ protected List collectFileNames(final String rootURL, final String authToken, final String owner, final String name,
+ final String path, final int depth, final long readInterval) {
+
+ if (MAX_DEPTH <= depth) {
+ return Collections.emptyList();
+ }
+
+ List resultList = new ArrayList();
+ final String url = rootURL + "api/v3/repos/" + owner + "/" + name + "/contents/" + path;
+
+ try (CurlResponse curlResponse = Curl.get(url).header("Authorization", "token " + authToken).execute()) {
+ final InputStream iStream = curlResponse.getContentAsStream();
+ List fileList = parseList(iStream);
+
+ for (int i = 0; i < fileList.size(); ++i) {
+ Map file = (Map) fileList.get(i);
+ final String newPath = path.isEmpty() ? file.get("name") : path + "/" + file.get("name");
+ switch (file.get("type")) {
+ case "file":
+ resultList.add(newPath);
+ break;
+ case "dir":
+ if (readInterval > 0) {
+ sleep(readInterval);
+ }
+ resultList.addAll(collectFileNames(rootURL, authToken, owner, name, newPath, depth + 1, readInterval));
+ break;
+ }
+ }
+ } catch (Exception e) {
+ logger.warn("Failed to access to " + url, e);
+ }
+ return resultList;
+ }
+
+}
diff --git a/src/main/resources/fess_ds.xml b/src/main/resources/fess_ds.xml
index e619d59b8..742f825bd 100644
--- a/src/main/resources/fess_ds.xml
+++ b/src/main/resources/fess_ds.xml
@@ -23,12 +23,16 @@
"EsListDataStore"
esListDataStore
+
+ "GitBucketDataStore"
+ gitBucketDataStore
+
-
@@ -38,6 +42,8 @@
+
+
diff --git a/src/main/resources/suggest/fess-suggest-default-analyzer.json b/src/main/resources/suggest/fess-suggest-default-analyzer.json
index 3625fe0a4..7e3322227 100644
--- a/src/main/resources/suggest/fess-suggest-default-analyzer.json
+++ b/src/main/resources/suggest/fess-suggest-default-analyzer.json
@@ -67,6 +67,25 @@
"tokenizer" : "standard",
"filter" : ["lowercase", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "english_keywords"]
},
+ "reading_analyzer_ko" : {
+ "type" : "custom",
+ "tokenizer" : "fess_korean_tokenizer"
+ },
+ "reading_term_analyzer_ko" : {
+ "type" : "custom",
+ "tokenizer" : "fess_korean_tokenizer"
+ },
+ "normalize_analyzer_ko" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_ko" : {
+ "type" : "custom",
+ "tokenizer" : "fess_korean_tokenizer",
+ "filter" : ["lowercase", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "english_keywords"]
+ },
"reading_analyzer_ar" : {
"type" : "custom",
"tokenizer" : "standard"
@@ -86,6 +105,25 @@
"tokenizer" : "standard",
"filter" : ["lowercase", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "arabic_stop", "arabic_normalization", "arabic_keywords"]
},
+ "reading_analyzer_bg" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_bg" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_bg" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_bg" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter", "bulgarian_stop", "bulgarian_keywords", "bulgarian_stemmer"]
+ },
"reading_analyzer_ca" : {
"type" : "custom",
"tokenizer" : "standard"
@@ -503,6 +541,386 @@
"type" : "custom",
"tokenizer" : "thai",
"filter" : ["lowercase", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "thai_stop"]
+ },
+ "reading_analyzer_bn" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_bn" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_bn" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_bn" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_et" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_et" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_et" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_et" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_gu" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_gu" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_gu" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_gu" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_he" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_he" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_he" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_he" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_hi" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_hi" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_hi" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_hi" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_hr" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_hr" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_hr" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_hr" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_mk" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_mk" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_mk" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_mk" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_ml" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_ml" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_ml" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_ml" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_pa" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_pa" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_pa" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_pa" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_pl" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_pl" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_pl" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_pl" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_si" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_si" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_si" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_si" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_sq" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_sq" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_sq" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_sq" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_ta" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_ta" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_ta" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_ta" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_te" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_te" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_te" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_te" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_tl" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_tl" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_tl" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_tl" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_uk" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_uk" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_uk" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_uk" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_ur" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_ur" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_ur" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_ur" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_vi" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_vi" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_vi" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_vi" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_zh-cn" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_zh-cn" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_zh-cn" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_zh-cn" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
+ },
+ "reading_analyzer_zh-tw" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "reading_term_analyzer_zh-tw" : {
+ "type" : "custom",
+ "tokenizer" : "standard"
+ },
+ "normalize_analyzer_zh-tw" : {
+ "type" : "custom",
+ "tokenizer" : "keyword",
+ "char_filter" : ["mapping_char"],
+ "filter" : ["lowercase"]
+ },
+ "contents_analyzer_zh-tw" : {
+ "type" : "custom",
+ "tokenizer" : "standard",
+ "filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
}
},
"char_filter" : {
@@ -631,6 +1049,18 @@
"type": "stemmer",
"language": "arabic"
},
+ "bulgarian_stop": {
+ "type": "stop",
+ "stopwords": "_bulgarian_"
+ },
+ "bulgarian_keywords": {
+ "type": "keyword_marker",
+ "keywords": ["Добър ден"]
+ },
+ "bulgarian_stemmer": {
+ "type": "stemmer",
+ "language": "bulgarian"
+ },
"catalan_elision": {
"type": "elision",
"articles": [ "d", "l", "m", "n", "s", "t"]