fix #579 I18n for Suggest

This commit is contained in:
yfujita 2016-08-05 18:47:25 +09:00
parent f786f771de
commit 224d3c0bc6
3 changed files with 932 additions and 187 deletions

View file

@ -44,6 +44,7 @@ import org.codelibs.fess.suggest.Suggester;
import org.codelibs.fess.suggest.constants.FieldNames;
import org.codelibs.fess.suggest.entity.SuggestItem;
import org.codelibs.fess.suggest.index.contents.document.ESSourceReader;
import org.codelibs.fess.suggest.settings.AnalyzerSettings;
import org.codelibs.fess.suggest.settings.SuggestSettings;
import org.codelibs.fess.suggest.util.SuggestUtil;
import org.codelibs.fess.util.ComponentUtil;

View file

@ -1,187 +0,0 @@
{
"analysis" : {
"tokenizer" : {
"fess_japanese_normal" : {
"type" : "fess_japanese_tokenizer",
"mode" : "normal",
"discard_punctuation" : "false"
}
},
"analyzer" : {
"reading_analyzer" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["reading_form"]
},
"reading_term_analyzer" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal"
},
"normalize_analyzer" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase"]
},
"contents_analyzer" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["stopword_en_filter", "pos_filter", "content_length_filter", "limit_token_count_filter"]
},
"reading_analyzer_ja" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["reading_form"]
},
"reading_term_analyzer_ja" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal"
},
"normalize_analyzer_ja" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase"]
},
"contents_analyzer_ja" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["stopword_en_filter", "pos_filter", "content_length_filter", "limit_token_count_filter"]
},
"reading_analyzer_en" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_en" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_en" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase"]
},
"contents_analyzer_en" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter"]
}
},
"char_filter" : {
"mapping_char" : {
"type" : "mapping",
"mappings" : ["ガ=>ガ", "ギ=>ギ", "グ=>グ", "ゲ=>ゲ", "ゴ=>ゴ", "ザ=>ザ", "ジ=>ジ", "ズ=>ズ", "ゼ=>ゼ", "ゾ=>ゾ", "ダ=>ダ", "ヂ=>ヂ", "ヅ=>ヅ",
"デ=>デ", "ド=>ド", "バ=>バ", "ビ=>ビ", "ブ=>ブ", "ベ=>ベ", "ボ=>ボ", "。=>。", "「=>「", "」=>」", "、=>、", "・=>・", "ヲ=>ヲ", "ァ=>ァ", "ィ=>ィ", "ゥ=>ゥ", "ェ=>ェ", "ォ=>ォ", "ャ=>ャ", "ュ=>ュ", "ョ=>ョ", "ッ=>ッ", "ア=>ア",
"イ=>イ", "ウ=>ウ", "エ=>エ", "オ=>オ", "カ=>カ", "キ=>キ", "ク=>ク", "ケ=>ケ", "コ=>コ", "サ=>サ", "シ=>シ", "ス=>ス", "セ=>セ", "ソ=>ソ", "タ=>タ", "チ=>チ", "ツ=>ツ", "テ=>テ", "ト=>ト", "ナ=>ナ", "ニ=>ニ", "ヌ=>ヌ", "ネ=>ネ", "ノ=>", "ハ=>ハ",
"ヒ=>ヒ", "フ=>フ", "ヘ=>ヘ", "ホ=>ホ", "マ=>マ", "ミ=>ミ", "ム=>ム", "メ=>メ", "モ=>モ", "ヤ=>ヤ", "ユ=>ユ", "ヨ=>ヨ", "ラ=>ラ", "リ=>リ", "ル=>ル", "レ=>レ", "ロ=>ロ", "ワ=>ワ", "ン=>ン",
"=>a", "=>b","=>c","=>d","=>e","=>f","=>g","=>h","=>i","=>j","=>k","=>l","=>m","=>n","=>o","=>p","=>q","=>r","=>s",
"=>t","=>u","=>v","=>w","=>x", "=>y", "=>z",
"=>A", "=>B","=>C","=>D","=>E","=>F","=>g","=>H","=>I","=>j","=>k","=>L","=>M","=>N","=>O","=>P","=>Q","=>R","=>S",
"=>T","=>U","=>V","=>W","=>X", "=>Y", "=>Z",
"=>1", "=>2", "=>3", "=>4", "=>5", "=>6", "=>7", "=>8", "=>9", "=>0"
]
}
},
"filter" : {
"reading_form" : {
"type" : "fess_japanese_readingform"
},
"pos_filter" : {
"type" : "fess_japanese_part_of_speech",
"stoptags" : [
"その他",
"その他-間投",
"フィラー",
"感動詞",
"記号",
"記号-アルファベット",
"記号-一般",
"記号-括弧開",
"記号-括弧閉",
"記号-句点",
"記号-空白",
"記号-読点",
"形容詞",
"形容詞-接尾",
"形容詞-非自立",
"語断片",
"助詞",
"助詞-格助詞",
"助詞-格助詞-一般",
"助詞-格助詞-引用",
"助詞-格助詞-連語",
"助詞-間投助詞",
"助詞-係助詞",
"助詞-終助詞",
"助詞-接続助詞",
"助詞-特殊",
"助詞-副詞化",
"助詞-副助詞",
"助詞-副助詞/並立助詞/終助詞",
"助詞-並立助詞",
"助詞-連体化",
"助動詞",
"接続詞",
"接頭詞",
"接頭詞-形容詞接続",
"接頭詞-数接続",
"接頭詞-動詞接続",
"接頭詞-名詞接続",
"動詞",
"動詞-自立",
"動詞-接尾",
"動詞-非自立",
"非言語音",
"副詞",
"副詞-一般",
"副詞-助詞類接続",
"名詞-ナイ形容詞語幹",
"名詞-引用文字列",
"名詞-形容動詞語幹",
"名詞-数",
"名詞-接続詞的",
"名詞-接尾",
"名詞-接尾-サ変接続",
"名詞-接尾-一般",
"名詞-接尾-形容動詞語幹",
"名詞-接尾-助数詞",
"名詞-接尾-助動詞語幹",
"名詞-接尾-人名",
"名詞-接尾-地域",
"名詞-接尾-特殊",
"名詞-接尾-副詞可能",
"名詞-代名詞",
"名詞-代名詞-一般",
"名詞-代名詞-縮約",
"名詞-動詞非自立的",
"名詞-特殊",
"名詞-特殊-助動詞語幹",
"名詞-非自立",
"名詞-非自立-一般",
"名詞-非自立-形容動詞語幹",
"名詞-非自立-助動詞語幹",
"名詞-非自立-副詞可能",
"名詞-副詞可能",
"連体詞"
]
},
"stopword_en_filter": {
"type": "stop",
"stopwords": "_english_"
},
"content_length_filter": {
"type": "length",
"max": 30
},
"limit_token_count_filter": {
"type": "limit",
"max_token_count": 10000
},
"stemmer_en_filter": {
"type": "stemmer",
"name": "english"
}
}
}
}

View file

@ -0,0 +1,931 @@
{
"analysis" : {
"tokenizer" : {
"fess_japanese_normal" : {
"type" : "fess_japanese_tokenizer",
"mode" : "normal",
"discard_punctuation" : "false"
}
},
"analyzer" : {
"reading_analyzer" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["reading_form"]
},
"reading_term_analyzer" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal"
},
"normalize_analyzer" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase"]
},
"contents_analyzer" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["stopword_en_filter", "pos_filter", "content_length_filter", "limit_token_count_filter"]
},
"reading_analyzer_ja" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["reading_form"]
},
"reading_term_analyzer_ja" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal"
},
"normalize_analyzer_ja" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase"]
},
"contents_analyzer_ja" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["stopword_en_filter", "pos_filter", "content_length_filter", "limit_token_count_filter"]
},
"reading_analyzer_en" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_en" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_en" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase"]
},
"contents_analyzer_en" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter"]
},
"reading_analyzer_ar" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_ar" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_ar" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "arabic_normalization", "arabic_stemmer"]
},
"contents_analyzer_ar" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "arabic_stop", "arabic_normalization", "arabic_keywords", "arabic_stemmer"]
},
"reading_analyzer_ca" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_ca" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_ca" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "catalan_elision", "catalan_stemmer"]
},
"contents_analyzer_ca" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "catalan_elision", "catalan_stop", "catalan_keywords", "catalan_stemmer"]
},
"reading_analyzer_cs" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_cs" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_cs" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "czech_stemmer"]
},
"contents_analyzer_cs" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "czech_stop", "czech_keywords", "czech_stemmer"]
},
"reading_analyzer_da" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_da" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_da" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "danish_stemmer"]
},
"contents_analyzer_da" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "danish_stop", "danish_keywords", "danish_stemmer"]
},
"reading_analyzer_nl" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_nl" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_nl" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "dutch_override", "dutch_stemmer"]
},
"contents_analyzer_nl" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "dutch_stop", "dutch_keywords", "dutch_override", "dutch_stemmer"]
},
"reading_analyzer_fi" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_fi" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_fi" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "finnish_stemmer"]
},
"contents_analyzer_fi" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "finnish_stop", "finnish_keywords", "finnish_stemmer"]
},
"reading_analyzer_fr" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_fr" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_fr" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "french_elision", "finnish_stemmer"]
},
"contents_analyzer_fr" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "french_elision", "french_stop", "french_keywords", "french_stemmer"]
},
"reading_analyzer_de" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_de" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_de" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "german_normalization", "german_stemmer"]
},
"contents_analyzer_de" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "german_stop", "german_keywords", "german_normalization", "german_stemmer"]
},
"reading_analyzer_el" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_el" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_el" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["greek_lowercase", "lowercase", "stemmer_en_filter", "greek_stemmer"]
},
"contents_analyzer_el" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "greek_stop", "greek_keywords", "greek_stemmer"]
},
"reading_analyzer_hu" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_hu" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_hu" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "hungarian_stemmer"]
},
"contents_analyzer_hu" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "hungarian_stop", "hungarian_keywords", "hungarian_stemmer"]
},
"reading_analyzer_id" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_id" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_id" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "indonesian_stemmer"]
},
"contents_analyzer_id" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "indonesian_stop", "indonesian_keywords", "indonesian_stemmer"]
},
"reading_analyzer_it" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_it" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_it" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "italian_elision", "italian_stemmer"]
},
"contents_analyzer_it" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "italian_elision", "italian_stop", "italian_keywords", "italian_stemmer"]
},
"reading_analyzer_lv" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_lv" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_lv" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "latvian_stemmer"]
},
"contents_analyzer_lv" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "latvian_stop", "latvian_keywords", "latvian_stemmer"]
},
"reading_analyzer_lt" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_lt" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_lt" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "lithuanian_stemmer"]
},
"contents_analyzer_lt" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "lithuanian_stop", "lithuanian_keywords", "lithuanian_stemmer"]
},
"reading_analyzer_no" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_no" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_no" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "norwegian_stemmer"]
},
"contents_analyzer_no" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "norwegian_stop", "norwegian_keywords", "norwegian_stemmer"]
},
"reading_analyzer_fa" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_fa" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_fa" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "arabic_normalization", "persian_normalization"]
},
"contents_analyzer_fa" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "arabic_normalization", "persian_normalization", "persian_stop"]
},
"reading_analyzer_pt" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_pt" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_pt" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "portuguese_stemmer"]
},
"contents_analyzer_pt" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "portuguese_stop", "portuguese_keywords", "portuguese_stemmer"]
},
"reading_analyzer_ro" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_ro" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_ro" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "romanian_stemmer"]
},
"contents_analyzer_ro" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "romanian_stop", "romanian_keywords", "romanian_stemmer"]
},
"reading_analyzer_ru" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_ru" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_ru" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "russian_stemmer"]
},
"contents_analyzer_ru" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "russian_stop", "russian_keywords", "russian_stemmer"]
},
"reading_analyzer_es" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_es" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_es" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "spanish_stemmer"]
},
"contents_analyzer_es" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "spanish_stop", "spanish_keywords", "spanish_stemmer"]
},
"reading_analyzer_sv" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_sv" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_sv" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "swedish_stemmer"]
},
"contents_analyzer_sv" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "swedish_stop", "swedish_keywords", "swedish_stemmer"]
},
"reading_analyzer_tr" : {
"type" : "custom",
"tokenizer" : "standard"
},
"reading_term_analyzer_tr" : {
"type" : "custom",
"tokenizer" : "standard"
},
"normalize_analyzer_tr" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter", "apostrophe", "turkish_lowercase", "turkish_stemmer"]
},
"contents_analyzer_tr" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "apostrophe", "turkish_lowercase", "turkish_stop", "turkish_keywords", "turkish_stemmer"]
},
"reading_analyzer_th" : {
"type" : "custom",
"tokenizer" : "thai"
},
"reading_term_analyzer_th" : {
"type" : "custom",
"tokenizer" : "thai"
},
"normalize_analyzer_th" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase", "stemmer_en_filter"]
},
"contents_analyzer_th" : {
"type" : "custom",
"tokenizer" : "thai",
"filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "thai_stop"]
}
},
"char_filter" : {
"mapping_char" : {
"type" : "mapping",
"mappings" : ["ガ=>ガ", "ギ=>ギ", "グ=>グ", "ゲ=>ゲ", "ゴ=>ゴ", "ザ=>ザ", "ジ=>ジ", "ズ=>ズ", "ゼ=>ゼ", "ゾ=>ゾ", "ダ=>ダ", "ヂ=>ヂ", "ヅ=>ヅ",
"デ=>デ", "ド=>ド", "バ=>バ", "ビ=>ビ", "ブ=>ブ", "ベ=>ベ", "ボ=>ボ", "。=>。", "「=>「", "」=>」", "、=>、", "・=>・", "ヲ=>ヲ", "ァ=>ァ", "ィ=>ィ", "ゥ=>ゥ", "ェ=>ェ", "ォ=>ォ", "ャ=>ャ", "ュ=>ュ", "ョ=>ョ", "ッ=>ッ", "ア=>ア",
"イ=>イ", "ウ=>ウ", "エ=>エ", "オ=>オ", "カ=>カ", "キ=>キ", "ク=>ク", "ケ=>ケ", "コ=>コ", "サ=>サ", "シ=>シ", "ス=>ス", "セ=>セ", "ソ=>ソ", "タ=>タ", "チ=>チ", "ツ=>ツ", "テ=>テ", "ト=>ト", "ナ=>ナ", "ニ=>ニ", "ヌ=>ヌ", "ネ=>ネ", "ノ=>", "ハ=>ハ",
"ヒ=>ヒ", "フ=>フ", "ヘ=>ヘ", "ホ=>ホ", "マ=>マ", "ミ=>ミ", "ム=>ム", "メ=>メ", "モ=>モ", "ヤ=>ヤ", "ユ=>ユ", "ヨ=>ヨ", "ラ=>ラ", "リ=>リ", "ル=>ル", "レ=>レ", "ロ=>ロ", "ワ=>ワ", "ン=>ン",
"=>a", "=>b","=>c","=>d","=>e","=>f","=>g","=>h","=>i","=>j","=>k","=>l","=>m","=>n","=>o","=>p","=>q","=>r","=>s",
"=>t","=>u","=>v","=>w","=>x", "=>y", "=>z",
"=>A", "=>B","=>C","=>D","=>E","=>F","=>g","=>H","=>I","=>j","=>k","=>L","=>M","=>N","=>O","=>P","=>Q","=>R","=>S",
"=>T","=>U","=>V","=>W","=>X", "=>Y", "=>Z",
"=>1", "=>2", "=>3", "=>4", "=>5", "=>6", "=>7", "=>8", "=>9", "=>0"
]
}
},
"filter" : {
"reading_form" : {
"type" : "fess_japanese_readingform"
},
"pos_filter" : {
"type" : "fess_japanese_part_of_speech",
"stoptags" : [
"その他",
"その他-間投",
"フィラー",
"感動詞",
"記号",
"記号-アルファベット",
"記号-一般",
"記号-括弧開",
"記号-括弧閉",
"記号-句点",
"記号-空白",
"記号-読点",
"形容詞",
"形容詞-接尾",
"形容詞-非自立",
"語断片",
"助詞",
"助詞-格助詞",
"助詞-格助詞-一般",
"助詞-格助詞-引用",
"助詞-格助詞-連語",
"助詞-間投助詞",
"助詞-係助詞",
"助詞-終助詞",
"助詞-接続助詞",
"助詞-特殊",
"助詞-副詞化",
"助詞-副助詞",
"助詞-副助詞/並立助詞/終助詞",
"助詞-並立助詞",
"助詞-連体化",
"助動詞",
"接続詞",
"接頭詞",
"接頭詞-形容詞接続",
"接頭詞-数接続",
"接頭詞-動詞接続",
"接頭詞-名詞接続",
"動詞",
"動詞-自立",
"動詞-接尾",
"動詞-非自立",
"非言語音",
"副詞",
"副詞-一般",
"副詞-助詞類接続",
"名詞-ナイ形容詞語幹",
"名詞-引用文字列",
"名詞-形容動詞語幹",
"名詞-数",
"名詞-接続詞的",
"名詞-接尾",
"名詞-接尾-サ変接続",
"名詞-接尾-一般",
"名詞-接尾-形容動詞語幹",
"名詞-接尾-助数詞",
"名詞-接尾-助動詞語幹",
"名詞-接尾-人名",
"名詞-接尾-地域",
"名詞-接尾-特殊",
"名詞-接尾-副詞可能",
"名詞-代名詞",
"名詞-代名詞-一般",
"名詞-代名詞-縮約",
"名詞-動詞非自立的",
"名詞-特殊",
"名詞-特殊-助動詞語幹",
"名詞-非自立",
"名詞-非自立-一般",
"名詞-非自立-形容動詞語幹",
"名詞-非自立-助動詞語幹",
"名詞-非自立-副詞可能",
"名詞-副詞可能",
"連体詞"
]
},
"stopword_en_filter": {
"type": "stop",
"stopwords": "_english_"
},
"content_length_filter": {
"type": "length",
"max": 30
},
"limit_token_count_filter": {
"type": "limit",
"max_token_count": 10000
},
"stemmer_en_filter": {
"type": "stemmer",
"name": "english"
},
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
},
"arabic_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}ar/protwords.txt"
},
"arabic_stemmer": {
"type": "stemmer",
"language": "arabic"
},
"catalan_elision": {
"type": "elision",
"articles": [ "d", "l", "m", "n", "s", "t"]
},
"catalan_stop": {
"type": "stop",
"stopwords": "_catalan_"
},
"catalan_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}ca/protwords.txt"
},
"catalan_stemmer": {
"type": "stemmer",
"language": "catalan"
},
"czech_stop": {
"type": "stop",
"stopwords": "_czech_"
},
"czech_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}cs/protwords.txt"
},
"czech_stemmer": {
"type": "stemmer",
"language": "czech"
},
"danish_stop": {
"type": "stop",
"stopwords": "_danish_"
},
"danish_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}da/protwords.txt"
},
"danish_stemmer": {
"type": "stemmer",
"language": "danish"
},
"dutch_stop": {
"type": "stop",
"stopwords": "_dutch_"
},
"dutch_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}nl/protwords.txt"
},
"dutch_stemmer": {
"type": "stemmer",
"language": "dutch"
},
"dutch_override": {
"type": "stemmer_override",
"rules": [
"fiets=>fiets",
"bromfiets=>bromfiets",
"ei=>eier",
"kind=>kinder"
]
},
"english_keywords": {
"type": "keyword_marker",
"keywords": ["hello"]
},
"finnish_stop": {
"type": "stop",
"stopwords": "_finnish_"
},
"finnish_keywords": {
"type": "keyword_marker",
"keywords": ["Hei"]
},
"finnish_stemmer": {
"type": "stemmer",
"language": "finnish"
},
"french_elision": {
"type": "elision",
"articles_case": true,
"articles": [
"l", "m", "t", "qu", "n", "s",
"j", "d", "c", "jusqu", "quoiqu",
"lorsqu", "puisqu"
]
},
"french_stop": {
"type": "stop",
"stopwords": "_french_"
},
"french_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}fr/protwords.txt"
},
"french_stemmer": {
"type": "stemmer",
"language": "light_french"
},
"german_stop": {
"type": "stop",
"stopwords": "_german_"
},
"german_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}de/protwords.txt"
},
"german_stemmer": {
"type": "stemmer",
"language": "light_german"
},
"greek_stop": {
"type": "stop",
"stopwords": "_greek_"
},
"greek_lowercase": {
"type": "lowercase",
"language": "greek"
},
"greek_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}el/protwords.txt"
},
"greek_stemmer": {
"type": "stemmer",
"language": "greek"
},
"hindi_stop": {
"type": "stop",
"stopwords": "_hindi_"
},
"hungarian_stop": {
"type": "stop",
"stopwords": "_hungarian_"
},
"hungarian_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}hu/protwords.txt"
},
"hungarian_stemmer": {
"type": "stemmer",
"language": "hungarian"
},
"indonesian_stop": {
"type": "stop",
"stopwords": "_indonesian_"
},
"indonesian_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}id/protwords.txt"
},
"indonesian_stemmer": {
"type": "stemmer",
"language": "indonesian"
},
"italian_elision": {
"type": "elision",
"articles": [
"c", "l", "all", "dall", "dell",
"nell", "sull", "coll", "pell",
"gl", "agl", "dagl", "degl", "negl",
"sugl", "un", "m", "t", "s", "v", "d"
]
},
"italian_stop": {
"type": "stop",
"stopwords": "_italian_"
},
"italian_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}it/protwords.txt"
},
"italian_stemmer": {
"type": "stemmer",
"language": "light_italian"
},
"latvian_stop": {
"type": "stop",
"stopwords": "_latvian_"
},
"latvian_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}lv/protwords.txt"
},
"latvian_stemmer": {
"type": "stemmer",
"language": "latvian"
},
"lithuanian_stop": {
"type": "stop",
"stopwords": "_lithuanian_"
},
"lithuanian_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}lt/protwords.txt"
},
"lithuanian_stemmer": {
"type": "stemmer",
"language": "lithuanian"
},
"norwegian_stop": {
"type": "stop",
"stopwords": "_norwegian_"
},
"norwegian_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}no/protwords.txt"
},
"norwegian_stemmer": {
"type": "stemmer",
"language": "norwegian"
},
"persian_stop": {
"type": "stop",
"stopwords": "_persian_"
},
"portuguese_stop": {
"type": "stop",
"stopwords": "_portuguese_"
},
"portuguese_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}pt/protwords.txt"
},
"portuguese_stemmer": {
"type": "stemmer",
"language": "light_portuguese"
},
"romanian_stop": {
"type": "stop",
"stopwords": "_romanian_"
},
"romanian_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}ro/protwords.txt"
},
"romanian_stemmer": {
"type": "stemmer",
"language": "romanian"
},
"russian_stop": {
"type": "stop",
"stopwords": "_russian_"
},
"russian_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}ru/protwords.txt"
},
"russian_stemmer": {
"type": "stemmer",
"language": "russian"
},
"spanish_stop": {
"type": "stop",
"stopwords": "_spanish_"
},
"spanish_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}es/protwords.txt"
},
"spanish_stemmer": {
"type": "stemmer",
"language": "light_spanish"
},
"swedish_stop": {
"type": "stop",
"stopwords": "_swedish_"
},
"swedish_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}sv/protwords.txt"
},
"swedish_stemmer": {
"type": "stemmer",
"language": "swedish"
},
"turkish_stop": {
"type": "stop",
"stopwords": "_turkish_"
},
"turkish_lowercase": {
"type": "lowercase",
"language": "turkish"
},
"turkish_keywords": {
"type": "keyword_marker",
"keywords_path": "${fess.dictionary.path}tr/protwords.txt"
},
"turkish_stemmer": {
"type": "stemmer",
"language": "turkish"
},
"thai_stop": {
"type": "stop",
"stopwords": "_thai_"
}
}
}
}