Merge branch '10.3.x' of github.com:codelibs/fess into 10.3.x
This commit is contained in:
commit
9b0ee4f4b8
4 changed files with 623 additions and 2 deletions
2
pom.xml
2
pom.xml
|
@ -57,7 +57,7 @@
|
|||
<utflute.version>0.6.0F</utflute.version>
|
||||
|
||||
<!-- Crawler -->
|
||||
<crawler.version>1.0.11-SNAPSHOT</crawler.version>
|
||||
<crawler.version>1.0.11</crawler.version>
|
||||
|
||||
<!-- Suggest -->
|
||||
<suggest.version>2.3.0</suggest.version>
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright 2012-2016 CodeLibs Project and the Others.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific language
|
||||
* governing permissions and limitations under the License.
|
||||
*/
|
||||
package org.codelibs.fess.ds.impl;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.elasticsearch.runner.net.Curl;
|
||||
import org.codelibs.elasticsearch.runner.net.CurlResponse;
|
||||
import org.codelibs.fess.ds.IndexUpdateCallback;
|
||||
import org.codelibs.fess.es.config.exentity.DataConfig;
|
||||
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author Keiichi Watanabe
|
||||
*/
|
||||
public class GitBucketDataStoreImpl extends AbstractDataStoreImpl {
|
||||
private static final Logger logger = LoggerFactory.getLogger(CsvDataStoreImpl.class);
|
||||
|
||||
private static final int MAX_DEPTH = 20;
|
||||
|
||||
protected static final String TOKEN_PARAM = "token";
|
||||
protected static final String GITBUCKET_URL_PARAM = "url";
|
||||
|
||||
@Override
|
||||
protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap,
|
||||
final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap) {
|
||||
|
||||
final String rootURL = getRootURL(paramMap);
|
||||
final String authToken = getAuthToken(paramMap);
|
||||
final long readInterval = getReadInterval(paramMap);
|
||||
|
||||
if (rootURL.isEmpty() || authToken.isEmpty()) {
|
||||
logger.warn("parameter \"" + TOKEN_PARAM + "\" and \"" + GITBUCKET_URL_PARAM + "\" are required");
|
||||
return;
|
||||
}
|
||||
|
||||
final List<Map<String, Object>> repositoryList = getRepositoryList(rootURL, authToken);
|
||||
if (repositoryList.isEmpty()) {
|
||||
logger.warn("Token is invalid or no Repository");
|
||||
return;
|
||||
}
|
||||
|
||||
for (final Map<String, Object> repository : repositoryList) {
|
||||
try {
|
||||
final String name = (String) repository.get("name");
|
||||
final String owner = (String) repository.get("owner");
|
||||
final boolean isPrivate = (boolean) repository.get("is_private"); // TODO Use this info for roles
|
||||
|
||||
List<String> pathList = collectFileNames(rootURL, authToken, owner, name, "", 0, readInterval);
|
||||
for (String path : pathList) {
|
||||
storeFileContent(rootURL, authToken, owner, name, path, dataConfig, callback, paramMap, scriptMap, defaultDataMap);
|
||||
if (readInterval > 0) {
|
||||
sleep(readInterval);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to access to " + repository, e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected String getRootURL(final Map<String, String> paramMap) {
|
||||
if (paramMap.containsKey(GITBUCKET_URL_PARAM)) {
|
||||
String url = paramMap.get(GITBUCKET_URL_PARAM);
|
||||
if (url.charAt(url.length() - 1) != '/') {
|
||||
url += "/";
|
||||
}
|
||||
return url;
|
||||
}
|
||||
return StringUtil.EMPTY;
|
||||
}
|
||||
|
||||
protected String getAuthToken(final Map<String, String> paramMap) {
|
||||
if (paramMap.containsKey(TOKEN_PARAM)) {
|
||||
return paramMap.get(TOKEN_PARAM);
|
||||
}
|
||||
return StringUtil.EMPTY;
|
||||
}
|
||||
|
||||
protected List<Map<String, Object>> getRepositoryList(final String rootURL, final String authToken) {
|
||||
final String url = rootURL + "api/v3/fess/repos";
|
||||
try (CurlResponse curlResponse = Curl.get(url).header("Authorization", "token " + authToken).execute()) {
|
||||
final String content = curlResponse.getContentAsString();
|
||||
final Map<String, Object> map = curlResponse.getContentAsMap();
|
||||
assert (map.containsKey("repositories"));
|
||||
final List<Map<String, Object>> repoList = (List<Map<String, Object>>) map.get("repositories");
|
||||
return repoList;
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to access to " + rootURL, e);
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
||||
private List<Object> parseList(final InputStream is) { // TODO This function should be moved to CurlResponse
|
||||
try {
|
||||
return JsonXContent.jsonXContent.createParser(is).list();
|
||||
} catch (final Exception e) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
||||
private void storeFileContent(final String rootURL, final String authToken, final String owner, final String name, final String path,
|
||||
final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap,
|
||||
final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap) {
|
||||
final String url = rootURL + owner + "/" + name + "/blob/master/" + path;
|
||||
final String filename = FilenameUtils.getName(url);
|
||||
|
||||
try (CurlResponse curlResponse = Curl.get(url).param("raw", "true").header("Authorization", "token " + authToken).execute()) {
|
||||
logger.info("Get a content from " + url);
|
||||
// TODO Use DoucmentHelper#processRequest and scriptMap
|
||||
final Map<String, Object> dataMap = new HashMap<>();
|
||||
dataMap.putAll(defaultDataMap);
|
||||
dataMap.put("title", owner + "/" + name + " : " + filename);
|
||||
dataMap.put("url", url);
|
||||
dataMap.put("content", curlResponse.getContentAsString());
|
||||
dataMap.put("label", "GitBucket"); // TODO role
|
||||
|
||||
callback.store(paramMap, dataMap);
|
||||
|
||||
} catch (Exception e) {
|
||||
// TODO CrawlingAccessException?
|
||||
logger.warn("Failed to parse " + url, e);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
protected List<String> collectFileNames(final String rootURL, final String authToken, final String owner, final String name,
|
||||
final String path, final int depth, final long readInterval) {
|
||||
|
||||
if (MAX_DEPTH <= depth) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
List<String> resultList = new ArrayList<String>();
|
||||
final String url = rootURL + "api/v3/repos/" + owner + "/" + name + "/contents/" + path;
|
||||
|
||||
try (CurlResponse curlResponse = Curl.get(url).header("Authorization", "token " + authToken).execute()) {
|
||||
final InputStream iStream = curlResponse.getContentAsStream();
|
||||
List<Object> fileList = parseList(iStream);
|
||||
|
||||
for (int i = 0; i < fileList.size(); ++i) {
|
||||
Map<String, String> file = (Map<String, String>) fileList.get(i);
|
||||
final String newPath = path.isEmpty() ? file.get("name") : path + "/" + file.get("name");
|
||||
switch (file.get("type")) {
|
||||
case "file":
|
||||
resultList.add(newPath);
|
||||
break;
|
||||
case "dir":
|
||||
if (readInterval > 0) {
|
||||
sleep(readInterval);
|
||||
}
|
||||
resultList.addAll(collectFileNames(rootURL, authToken, owner, name, newPath, depth + 1, readInterval));
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to access to " + url, e);
|
||||
}
|
||||
return resultList;
|
||||
}
|
||||
|
||||
}
|
|
@ -23,12 +23,16 @@
|
|||
<arg>"EsListDataStore"</arg>
|
||||
<arg>esListDataStore</arg>
|
||||
</postConstruct>
|
||||
<postConstruct name="add">
|
||||
<arg>"GitBucketDataStore"</arg>
|
||||
<arg>gitBucketDataStore</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
|
||||
<component name="databaseDataStore" class="org.codelibs.fess.ds.impl.DatabaseDataStoreImpl">
|
||||
</component>
|
||||
<component name="csvDataStore" class="org.codelibs.fess.ds.impl.CsvDataStoreImpl">
|
||||
<!--
|
||||
<!--
|
||||
<property name="csvFileSuffixs">new String[] { ".csv", ".tsv" }</property>
|
||||
-->
|
||||
</component>
|
||||
|
@ -38,6 +42,8 @@
|
|||
</component>
|
||||
<component name="esListDataStore" class="org.codelibs.fess.ds.impl.EsListDataStoreImpl">
|
||||
</component>
|
||||
<component name="gitBucketDataStore" class="org.codelibs.fess.ds.impl.GitBucketDataStoreImpl">
|
||||
</component>
|
||||
|
||||
<component name="indexUpdateCallback" class="org.codelibs.fess.ds.impl.IndexUpdateCallbackImpl" instance="prototype">
|
||||
</component>
|
||||
|
|
|
@ -67,6 +67,25 @@
|
|||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "english_keywords"]
|
||||
},
|
||||
"reading_analyzer_ko" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "fess_korean_tokenizer"
|
||||
},
|
||||
"reading_term_analyzer_ko" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "fess_korean_tokenizer"
|
||||
},
|
||||
"normalize_analyzer_ko" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_ko" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "fess_korean_tokenizer",
|
||||
"filter" : ["lowercase", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "english_keywords"]
|
||||
},
|
||||
"reading_analyzer_ar" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
|
@ -86,6 +105,25 @@
|
|||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "arabic_stop", "arabic_normalization", "arabic_keywords"]
|
||||
},
|
||||
"reading_analyzer_bg" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_bg" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_bg" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_bg" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter", "bulgarian_stop", "bulgarian_keywords", "bulgarian_stemmer"]
|
||||
},
|
||||
"reading_analyzer_ca" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
|
@ -503,6 +541,386 @@
|
|||
"type" : "custom",
|
||||
"tokenizer" : "thai",
|
||||
"filter" : ["lowercase", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "thai_stop"]
|
||||
},
|
||||
"reading_analyzer_bn" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_bn" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_bn" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_bn" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_et" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_et" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_et" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_et" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_gu" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_gu" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_gu" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_gu" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_he" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_he" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_he" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_he" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_hi" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_hi" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_hi" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_hi" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_hr" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_hr" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_hr" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_hr" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_mk" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_mk" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_mk" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_mk" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_ml" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_ml" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_ml" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_ml" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_pa" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_pa" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_pa" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_pa" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_pl" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_pl" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_pl" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_pl" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_si" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_si" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_si" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_si" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_sq" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_sq" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_sq" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_sq" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_ta" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_ta" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_ta" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_ta" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_te" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_te" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_te" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_te" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_tl" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_tl" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_tl" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_tl" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_uk" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_uk" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_uk" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_uk" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_ur" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_ur" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_ur" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_ur" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_vi" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_vi" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_vi" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_vi" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_zh-cn" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_zh-cn" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_zh-cn" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_zh-cn" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
},
|
||||
"reading_analyzer_zh-tw" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"reading_term_analyzer_zh-tw" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard"
|
||||
},
|
||||
"normalize_analyzer_zh-tw" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "keyword",
|
||||
"char_filter" : ["mapping_char"],
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"contents_analyzer_zh-tw" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "standard",
|
||||
"filter" : ["lowercase", "content_length_filter", "limit_token_count_filter"]
|
||||
}
|
||||
},
|
||||
"char_filter" : {
|
||||
|
@ -631,6 +1049,18 @@
|
|||
"type": "stemmer",
|
||||
"language": "arabic"
|
||||
},
|
||||
"bulgarian_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_bulgarian_"
|
||||
},
|
||||
"bulgarian_keywords": {
|
||||
"type": "keyword_marker",
|
||||
"keywords": ["Добър ден"]
|
||||
},
|
||||
"bulgarian_stemmer": {
|
||||
"type": "stemmer",
|
||||
"language": "bulgarian"
|
||||
},
|
||||
"catalan_elision": {
|
||||
"type": "elision",
|
||||
"articles": [ "d", "l", "m", "n", "s", "t"]
|
||||
|
|
Loading…
Add table
Reference in a new issue