diff --git a/src/main/java/org/codelibs/fess/helper/SuggestHelper.java b/src/main/java/org/codelibs/fess/helper/SuggestHelper.java index e59726b8e..6adbec7d8 100644 --- a/src/main/java/org/codelibs/fess/helper/SuggestHelper.java +++ b/src/main/java/org/codelibs/fess/helper/SuggestHelper.java @@ -44,6 +44,7 @@ import org.codelibs.fess.suggest.Suggester; import org.codelibs.fess.suggest.constants.FieldNames; import org.codelibs.fess.suggest.entity.SuggestItem; import org.codelibs.fess.suggest.index.contents.document.ESSourceReader; +import org.codelibs.fess.suggest.settings.AnalyzerSettings; import org.codelibs.fess.suggest.settings.SuggestSettings; import org.codelibs.fess.suggest.util.SuggestUtil; import org.codelibs.fess.util.ComponentUtil; diff --git a/src/main/resources/fess-suggest-default-analyzer.json b/src/main/resources/fess-suggest-default-analyzer.json deleted file mode 100644 index 2cf013585..000000000 --- a/src/main/resources/fess-suggest-default-analyzer.json +++ /dev/null @@ -1,187 +0,0 @@ -{ - "analysis" : { - "tokenizer" : { - "fess_japanese_normal" : { - "type" : "fess_japanese_tokenizer", - "mode" : "normal", - "discard_punctuation" : "false" - } - }, - "analyzer" : { - "reading_analyzer" : { - "type" : "custom", - "tokenizer" : "fess_japanese_normal", - "filter" : ["reading_form"] - }, - "reading_term_analyzer" : { - "type" : "custom", - "tokenizer" : "fess_japanese_normal" - }, - "normalize_analyzer" : { - "type" : "custom", - "tokenizer" : "keyword", - "char_filter" : ["mapping_char"], - "filter" : ["lowercase"] - }, - "contents_analyzer" : { - "type" : "custom", - "tokenizer" : "fess_japanese_normal", - "filter" : ["stopword_en_filter", "pos_filter", "content_length_filter", "limit_token_count_filter"] - }, - "reading_analyzer_ja" : { - "type" : "custom", - "tokenizer" : "fess_japanese_normal", - "filter" : ["reading_form"] - }, - "reading_term_analyzer_ja" : { - "type" : "custom", - "tokenizer" : "fess_japanese_normal" - }, - "normalize_analyzer_ja" : { - "type" : "custom", - "tokenizer" : "keyword", - "char_filter" : ["mapping_char"], - "filter" : ["lowercase"] - }, - "contents_analyzer_ja" : { - "type" : "custom", - "tokenizer" : "fess_japanese_normal", - "filter" : ["stopword_en_filter", "pos_filter", "content_length_filter", "limit_token_count_filter"] - }, - "reading_analyzer_en" : { - "type" : "custom", - "tokenizer" : "standard" - }, - "reading_term_analyzer_en" : { - "type" : "custom", - "tokenizer" : "standard" - }, - "normalize_analyzer_en" : { - "type" : "custom", - "tokenizer" : "keyword", - "char_filter" : ["mapping_char"], - "filter" : ["lowercase"] - }, - "contents_analyzer_en" : { - "type" : "custom", - "tokenizer" : "standard", - "filter" : ["stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter"] - } - }, - "char_filter" : { - "mapping_char" : { - "type" : "mapping", - "mappings" : ["ガ=>ガ", "ギ=>ギ", "グ=>グ", "ゲ=>ゲ", "ゴ=>ゴ", "ザ=>ザ", "ジ=>ジ", "ズ=>ズ", "ゼ=>ゼ", "ゾ=>ゾ", "ダ=>ダ", "ヂ=>ヂ", "ヅ=>ヅ", - "デ=>デ", "ド=>ド", "バ=>バ", "ビ=>ビ", "ブ=>ブ", "ベ=>ベ", "ボ=>ボ", "。=>。", "「=>「", "」=>」", "、=>、", "・=>・", "ヲ=>ヲ", "ァ=>ァ", "ィ=>ィ", "ゥ=>ゥ", "ェ=>ェ", "ォ=>ォ", "ャ=>ャ", "ュ=>ュ", "ョ=>ョ", "ッ=>ッ", "ア=>ア", - "イ=>イ", "ウ=>ウ", "エ=>エ", "オ=>オ", "カ=>カ", "キ=>キ", "ク=>ク", "ケ=>ケ", "コ=>コ", "サ=>サ", "シ=>シ", "ス=>ス", "セ=>セ", "ソ=>ソ", "タ=>タ", "チ=>チ", "ツ=>ツ", "テ=>テ", "ト=>ト", "ナ=>ナ", "ニ=>ニ", "ヌ=>ヌ", "ネ=>ネ", "ノ=>ノ", "ハ=>ハ", - "ヒ=>ヒ", "フ=>フ", "ヘ=>ヘ", "ホ=>ホ", "マ=>マ", "ミ=>ミ", "ム=>ム", "メ=>メ", "モ=>モ", "ヤ=>ヤ", "ユ=>ユ", "ヨ=>ヨ", "ラ=>ラ", "リ=>リ", "ル=>ル", "レ=>レ", "ロ=>ロ", "ワ=>ワ", "ン=>ン", - "a=>a", "b=>b","c=>c","d=>d","e=>e","f=>f","g=>g","h=>h","i=>i","j=>j","k=>k","l=>l","m=>m","n=>n","o=>o","p=>p","q=>q","r=>r","s=>s", - "t=>t","u=>u","v=>v","w=>w","x=>x", "y=>y", "z=>z", - "A=>A", "B=>B","C=>C","D=>D","E=>E","F=>F","G=>g","H=>H","I=>I","J=>j","K=>k","L=>L","M=>M","N=>N","O=>O","P=>P","Q=>Q","R=>R","S=>S", - "T=>T","U=>U","V=>V","W=>W","X=>X", "Y=>Y", "Z=>Z", - "1=>1", "2=>2", "3=>3", "4=>4", "5=>5", "6=>6", "7=>7", "8=>8", "9=>9", "0=>0" - ] - } - }, - "filter" : { - "reading_form" : { - "type" : "fess_japanese_readingform" - }, - "pos_filter" : { - "type" : "fess_japanese_part_of_speech", - "stoptags" : [ - "その他", - "その他-間投", - "フィラー", - "感動詞", - "記号", - "記号-アルファベット", - "記号-一般", - "記号-括弧開", - "記号-括弧閉", - "記号-句点", - "記号-空白", - "記号-読点", - "形容詞", - "形容詞-接尾", - "形容詞-非自立", - "語断片", - "助詞", - "助詞-格助詞", - "助詞-格助詞-一般", - "助詞-格助詞-引用", - "助詞-格助詞-連語", - "助詞-間投助詞", - "助詞-係助詞", - "助詞-終助詞", - "助詞-接続助詞", - "助詞-特殊", - "助詞-副詞化", - "助詞-副助詞", - "助詞-副助詞/並立助詞/終助詞", - "助詞-並立助詞", - "助詞-連体化", - "助動詞", - "接続詞", - "接頭詞", - "接頭詞-形容詞接続", - "接頭詞-数接続", - "接頭詞-動詞接続", - "接頭詞-名詞接続", - "動詞", - "動詞-自立", - "動詞-接尾", - "動詞-非自立", - "非言語音", - "副詞", - "副詞-一般", - "副詞-助詞類接続", - "名詞-ナイ形容詞語幹", - "名詞-引用文字列", - "名詞-形容動詞語幹", - "名詞-数", - "名詞-接続詞的", - "名詞-接尾", - "名詞-接尾-サ変接続", - "名詞-接尾-一般", - "名詞-接尾-形容動詞語幹", - "名詞-接尾-助数詞", - "名詞-接尾-助動詞語幹", - "名詞-接尾-人名", - "名詞-接尾-地域", - "名詞-接尾-特殊", - "名詞-接尾-副詞可能", - "名詞-代名詞", - "名詞-代名詞-一般", - "名詞-代名詞-縮約", - "名詞-動詞非自立的", - "名詞-特殊", - "名詞-特殊-助動詞語幹", - "名詞-非自立", - "名詞-非自立-一般", - "名詞-非自立-形容動詞語幹", - "名詞-非自立-助動詞語幹", - "名詞-非自立-副詞可能", - "名詞-副詞可能", - "連体詞" - ] - }, - "stopword_en_filter": { - "type": "stop", - "stopwords": "_english_" - }, - "content_length_filter": { - "type": "length", - "max": 30 - }, - "limit_token_count_filter": { - "type": "limit", - "max_token_count": 10000 - }, - "stemmer_en_filter": { - "type": "stemmer", - "name": "english" - } - } - } -} diff --git a/src/main/resources/suggest/fess-suggest-default-analyzer.json b/src/main/resources/suggest/fess-suggest-default-analyzer.json new file mode 100644 index 000000000..bf055ac52 --- /dev/null +++ b/src/main/resources/suggest/fess-suggest-default-analyzer.json @@ -0,0 +1,931 @@ +{ + "analysis" : { + "tokenizer" : { + "fess_japanese_normal" : { + "type" : "fess_japanese_tokenizer", + "mode" : "normal", + "discard_punctuation" : "false" + } + }, + "analyzer" : { + "reading_analyzer" : { + "type" : "custom", + "tokenizer" : "fess_japanese_normal", + "filter" : ["reading_form"] + }, + "reading_term_analyzer" : { + "type" : "custom", + "tokenizer" : "fess_japanese_normal" + }, + "normalize_analyzer" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase"] + }, + "contents_analyzer" : { + "type" : "custom", + "tokenizer" : "fess_japanese_normal", + "filter" : ["stopword_en_filter", "pos_filter", "content_length_filter", "limit_token_count_filter"] + }, + "reading_analyzer_ja" : { + "type" : "custom", + "tokenizer" : "fess_japanese_normal", + "filter" : ["reading_form"] + }, + "reading_term_analyzer_ja" : { + "type" : "custom", + "tokenizer" : "fess_japanese_normal" + }, + "normalize_analyzer_ja" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase"] + }, + "contents_analyzer_ja" : { + "type" : "custom", + "tokenizer" : "fess_japanese_normal", + "filter" : ["stopword_en_filter", "pos_filter", "content_length_filter", "limit_token_count_filter"] + }, + "reading_analyzer_en" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_en" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_en" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase"] + }, + "contents_analyzer_en" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter"] + }, + "reading_analyzer_ar" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_ar" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_ar" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "arabic_normalization", "arabic_stemmer"] + }, + "contents_analyzer_ar" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "arabic_stop", "arabic_normalization", "arabic_keywords", "arabic_stemmer"] + }, + "reading_analyzer_ca" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_ca" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_ca" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "catalan_elision", "catalan_stemmer"] + }, + "contents_analyzer_ca" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "catalan_elision", "catalan_stop", "catalan_keywords", "catalan_stemmer"] + }, + "reading_analyzer_cs" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_cs" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_cs" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "czech_stemmer"] + }, + "contents_analyzer_cs" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "czech_stop", "czech_keywords", "czech_stemmer"] + }, + "reading_analyzer_da" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_da" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_da" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "danish_stemmer"] + }, + "contents_analyzer_da" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "danish_stop", "danish_keywords", "danish_stemmer"] + }, + "reading_analyzer_nl" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_nl" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_nl" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "dutch_override", "dutch_stemmer"] + }, + "contents_analyzer_nl" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "dutch_stop", "dutch_keywords", "dutch_override", "dutch_stemmer"] + }, + "reading_analyzer_fi" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_fi" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_fi" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "finnish_stemmer"] + }, + "contents_analyzer_fi" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "finnish_stop", "finnish_keywords", "finnish_stemmer"] + }, + "reading_analyzer_fr" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_fr" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_fr" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "french_elision", "finnish_stemmer"] + }, + "contents_analyzer_fr" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "french_elision", "french_stop", "french_keywords", "french_stemmer"] + }, + "reading_analyzer_de" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_de" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_de" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "german_normalization", "german_stemmer"] + }, + "contents_analyzer_de" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "german_stop", "german_keywords", "german_normalization", "german_stemmer"] + }, + "reading_analyzer_el" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_el" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_el" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["greek_lowercase", "lowercase", "stemmer_en_filter", "greek_stemmer"] + }, + "contents_analyzer_el" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "greek_stop", "greek_keywords", "greek_stemmer"] + }, + "reading_analyzer_hu" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_hu" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_hu" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "hungarian_stemmer"] + }, + "contents_analyzer_hu" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "hungarian_stop", "hungarian_keywords", "hungarian_stemmer"] + }, + "reading_analyzer_id" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_id" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_id" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "indonesian_stemmer"] + }, + "contents_analyzer_id" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "indonesian_stop", "indonesian_keywords", "indonesian_stemmer"] + }, + "reading_analyzer_it" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_it" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_it" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "italian_elision", "italian_stemmer"] + }, + "contents_analyzer_it" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "italian_elision", "italian_stop", "italian_keywords", "italian_stemmer"] + }, + "reading_analyzer_lv" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_lv" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_lv" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "latvian_stemmer"] + }, + "contents_analyzer_lv" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "latvian_stop", "latvian_keywords", "latvian_stemmer"] + }, + "reading_analyzer_lt" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_lt" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_lt" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "lithuanian_stemmer"] + }, + "contents_analyzer_lt" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "lithuanian_stop", "lithuanian_keywords", "lithuanian_stemmer"] + }, + "reading_analyzer_no" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_no" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_no" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "norwegian_stemmer"] + }, + "contents_analyzer_no" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "norwegian_stop", "norwegian_keywords", "norwegian_stemmer"] + }, + "reading_analyzer_fa" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_fa" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_fa" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "arabic_normalization", "persian_normalization"] + }, + "contents_analyzer_fa" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "arabic_normalization", "persian_normalization", "persian_stop"] + }, + "reading_analyzer_pt" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_pt" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_pt" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "portuguese_stemmer"] + }, + "contents_analyzer_pt" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "portuguese_stop", "portuguese_keywords", "portuguese_stemmer"] + }, + "reading_analyzer_ro" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_ro" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_ro" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "romanian_stemmer"] + }, + "contents_analyzer_ro" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "romanian_stop", "romanian_keywords", "romanian_stemmer"] + }, + "reading_analyzer_ru" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_ru" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_ru" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "russian_stemmer"] + }, + "contents_analyzer_ru" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "russian_stop", "russian_keywords", "russian_stemmer"] + }, + "reading_analyzer_es" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_es" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_es" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "spanish_stemmer"] + }, + "contents_analyzer_es" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "spanish_stop", "spanish_keywords", "spanish_stemmer"] + }, + "reading_analyzer_sv" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_sv" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_sv" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "swedish_stemmer"] + }, + "contents_analyzer_sv" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "swedish_stop", "swedish_keywords", "swedish_stemmer"] + }, + "reading_analyzer_tr" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "reading_term_analyzer_tr" : { + "type" : "custom", + "tokenizer" : "standard" + }, + "normalize_analyzer_tr" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter", "apostrophe", "turkish_lowercase", "turkish_stemmer"] + }, + "contents_analyzer_tr" : { + "type" : "custom", + "tokenizer" : "standard", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "apostrophe", "turkish_lowercase", "turkish_stop", "turkish_keywords", "turkish_stemmer"] + }, + "reading_analyzer_th" : { + "type" : "custom", + "tokenizer" : "thai" + }, + "reading_term_analyzer_th" : { + "type" : "custom", + "tokenizer" : "thai" + }, + "normalize_analyzer_th" : { + "type" : "custom", + "tokenizer" : "keyword", + "char_filter" : ["mapping_char"], + "filter" : ["lowercase", "stemmer_en_filter"] + }, + "contents_analyzer_th" : { + "type" : "custom", + "tokenizer" : "thai", + "filter" : ["lowercase", "stemmer_en_filter", "stopword_en_filter", "content_length_filter", "limit_token_count_filter", "thai_stop"] + } + }, + "char_filter" : { + "mapping_char" : { + "type" : "mapping", + "mappings" : ["ガ=>ガ", "ギ=>ギ", "グ=>グ", "ゲ=>ゲ", "ゴ=>ゴ", "ザ=>ザ", "ジ=>ジ", "ズ=>ズ", "ゼ=>ゼ", "ゾ=>ゾ", "ダ=>ダ", "ヂ=>ヂ", "ヅ=>ヅ", + "デ=>デ", "ド=>ド", "バ=>バ", "ビ=>ビ", "ブ=>ブ", "ベ=>ベ", "ボ=>ボ", "。=>。", "「=>「", "」=>」", "、=>、", "・=>・", "ヲ=>ヲ", "ァ=>ァ", "ィ=>ィ", "ゥ=>ゥ", "ェ=>ェ", "ォ=>ォ", "ャ=>ャ", "ュ=>ュ", "ョ=>ョ", "ッ=>ッ", "ア=>ア", + "イ=>イ", "ウ=>ウ", "エ=>エ", "オ=>オ", "カ=>カ", "キ=>キ", "ク=>ク", "ケ=>ケ", "コ=>コ", "サ=>サ", "シ=>シ", "ス=>ス", "セ=>セ", "ソ=>ソ", "タ=>タ", "チ=>チ", "ツ=>ツ", "テ=>テ", "ト=>ト", "ナ=>ナ", "ニ=>ニ", "ヌ=>ヌ", "ネ=>ネ", "ノ=>ノ", "ハ=>ハ", + "ヒ=>ヒ", "フ=>フ", "ヘ=>ヘ", "ホ=>ホ", "マ=>マ", "ミ=>ミ", "ム=>ム", "メ=>メ", "モ=>モ", "ヤ=>ヤ", "ユ=>ユ", "ヨ=>ヨ", "ラ=>ラ", "リ=>リ", "ル=>ル", "レ=>レ", "ロ=>ロ", "ワ=>ワ", "ン=>ン", + "a=>a", "b=>b","c=>c","d=>d","e=>e","f=>f","g=>g","h=>h","i=>i","j=>j","k=>k","l=>l","m=>m","n=>n","o=>o","p=>p","q=>q","r=>r","s=>s", + "t=>t","u=>u","v=>v","w=>w","x=>x", "y=>y", "z=>z", + "A=>A", "B=>B","C=>C","D=>D","E=>E","F=>F","G=>g","H=>H","I=>I","J=>j","K=>k","L=>L","M=>M","N=>N","O=>O","P=>P","Q=>Q","R=>R","S=>S", + "T=>T","U=>U","V=>V","W=>W","X=>X", "Y=>Y", "Z=>Z", + "1=>1", "2=>2", "3=>3", "4=>4", "5=>5", "6=>6", "7=>7", "8=>8", "9=>9", "0=>0" + ] + } + }, + "filter" : { + "reading_form" : { + "type" : "fess_japanese_readingform" + }, + "pos_filter" : { + "type" : "fess_japanese_part_of_speech", + "stoptags" : [ + "その他", + "その他-間投", + "フィラー", + "感動詞", + "記号", + "記号-アルファベット", + "記号-一般", + "記号-括弧開", + "記号-括弧閉", + "記号-句点", + "記号-空白", + "記号-読点", + "形容詞", + "形容詞-接尾", + "形容詞-非自立", + "語断片", + "助詞", + "助詞-格助詞", + "助詞-格助詞-一般", + "助詞-格助詞-引用", + "助詞-格助詞-連語", + "助詞-間投助詞", + "助詞-係助詞", + "助詞-終助詞", + "助詞-接続助詞", + "助詞-特殊", + "助詞-副詞化", + "助詞-副助詞", + "助詞-副助詞/並立助詞/終助詞", + "助詞-並立助詞", + "助詞-連体化", + "助動詞", + "接続詞", + "接頭詞", + "接頭詞-形容詞接続", + "接頭詞-数接続", + "接頭詞-動詞接続", + "接頭詞-名詞接続", + "動詞", + "動詞-自立", + "動詞-接尾", + "動詞-非自立", + "非言語音", + "副詞", + "副詞-一般", + "副詞-助詞類接続", + "名詞-ナイ形容詞語幹", + "名詞-引用文字列", + "名詞-形容動詞語幹", + "名詞-数", + "名詞-接続詞的", + "名詞-接尾", + "名詞-接尾-サ変接続", + "名詞-接尾-一般", + "名詞-接尾-形容動詞語幹", + "名詞-接尾-助数詞", + "名詞-接尾-助動詞語幹", + "名詞-接尾-人名", + "名詞-接尾-地域", + "名詞-接尾-特殊", + "名詞-接尾-副詞可能", + "名詞-代名詞", + "名詞-代名詞-一般", + "名詞-代名詞-縮約", + "名詞-動詞非自立的", + "名詞-特殊", + "名詞-特殊-助動詞語幹", + "名詞-非自立", + "名詞-非自立-一般", + "名詞-非自立-形容動詞語幹", + "名詞-非自立-助動詞語幹", + "名詞-非自立-副詞可能", + "名詞-副詞可能", + "連体詞" + ] + }, + "stopword_en_filter": { + "type": "stop", + "stopwords": "_english_" + }, + "content_length_filter": { + "type": "length", + "max": 30 + }, + "limit_token_count_filter": { + "type": "limit", + "max_token_count": 10000 + }, + "stemmer_en_filter": { + "type": "stemmer", + "name": "english" + }, + "arabic_stop": { + "type": "stop", + "stopwords": "_arabic_" + }, + "arabic_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}ar/protwords.txt" + }, + "arabic_stemmer": { + "type": "stemmer", + "language": "arabic" + }, + "catalan_elision": { + "type": "elision", + "articles": [ "d", "l", "m", "n", "s", "t"] + }, + "catalan_stop": { + "type": "stop", + "stopwords": "_catalan_" + }, + "catalan_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}ca/protwords.txt" + }, + "catalan_stemmer": { + "type": "stemmer", + "language": "catalan" + }, + "czech_stop": { + "type": "stop", + "stopwords": "_czech_" + }, + "czech_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}cs/protwords.txt" + }, + "czech_stemmer": { + "type": "stemmer", + "language": "czech" + }, + "danish_stop": { + "type": "stop", + "stopwords": "_danish_" + }, + "danish_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}da/protwords.txt" + }, + "danish_stemmer": { + "type": "stemmer", + "language": "danish" + }, + "dutch_stop": { + "type": "stop", + "stopwords": "_dutch_" + }, + "dutch_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}nl/protwords.txt" + }, + "dutch_stemmer": { + "type": "stemmer", + "language": "dutch" + }, + "dutch_override": { + "type": "stemmer_override", + "rules": [ + "fiets=>fiets", + "bromfiets=>bromfiets", + "ei=>eier", + "kind=>kinder" + ] + }, + "english_keywords": { + "type": "keyword_marker", + "keywords": ["hello"] + }, + "finnish_stop": { + "type": "stop", + "stopwords": "_finnish_" + }, + "finnish_keywords": { + "type": "keyword_marker", + "keywords": ["Hei"] + }, + "finnish_stemmer": { + "type": "stemmer", + "language": "finnish" + }, + "french_elision": { + "type": "elision", + "articles_case": true, + "articles": [ + "l", "m", "t", "qu", "n", "s", + "j", "d", "c", "jusqu", "quoiqu", + "lorsqu", "puisqu" + ] + }, + "french_stop": { + "type": "stop", + "stopwords": "_french_" + }, + "french_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}fr/protwords.txt" + }, + "french_stemmer": { + "type": "stemmer", + "language": "light_french" + }, + "german_stop": { + "type": "stop", + "stopwords": "_german_" + }, + "german_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}de/protwords.txt" + }, + "german_stemmer": { + "type": "stemmer", + "language": "light_german" + }, + "greek_stop": { + "type": "stop", + "stopwords": "_greek_" + }, + "greek_lowercase": { + "type": "lowercase", + "language": "greek" + }, + "greek_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}el/protwords.txt" + }, + "greek_stemmer": { + "type": "stemmer", + "language": "greek" + }, + "hindi_stop": { + "type": "stop", + "stopwords": "_hindi_" + }, + "hungarian_stop": { + "type": "stop", + "stopwords": "_hungarian_" + }, + "hungarian_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}hu/protwords.txt" + }, + "hungarian_stemmer": { + "type": "stemmer", + "language": "hungarian" + }, + "indonesian_stop": { + "type": "stop", + "stopwords": "_indonesian_" + }, + "indonesian_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}id/protwords.txt" + }, + "indonesian_stemmer": { + "type": "stemmer", + "language": "indonesian" + }, + "italian_elision": { + "type": "elision", + "articles": [ + "c", "l", "all", "dall", "dell", + "nell", "sull", "coll", "pell", + "gl", "agl", "dagl", "degl", "negl", + "sugl", "un", "m", "t", "s", "v", "d" + ] + }, + "italian_stop": { + "type": "stop", + "stopwords": "_italian_" + }, + "italian_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}it/protwords.txt" + }, + "italian_stemmer": { + "type": "stemmer", + "language": "light_italian" + }, + "latvian_stop": { + "type": "stop", + "stopwords": "_latvian_" + }, + "latvian_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}lv/protwords.txt" + }, + "latvian_stemmer": { + "type": "stemmer", + "language": "latvian" + }, + "lithuanian_stop": { + "type": "stop", + "stopwords": "_lithuanian_" + }, + "lithuanian_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}lt/protwords.txt" + }, + "lithuanian_stemmer": { + "type": "stemmer", + "language": "lithuanian" + }, + "norwegian_stop": { + "type": "stop", + "stopwords": "_norwegian_" + }, + "norwegian_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}no/protwords.txt" + }, + "norwegian_stemmer": { + "type": "stemmer", + "language": "norwegian" + }, + "persian_stop": { + "type": "stop", + "stopwords": "_persian_" + }, + "portuguese_stop": { + "type": "stop", + "stopwords": "_portuguese_" + }, + "portuguese_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}pt/protwords.txt" + }, + "portuguese_stemmer": { + "type": "stemmer", + "language": "light_portuguese" + }, + "romanian_stop": { + "type": "stop", + "stopwords": "_romanian_" + }, + "romanian_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}ro/protwords.txt" + }, + "romanian_stemmer": { + "type": "stemmer", + "language": "romanian" + }, + "russian_stop": { + "type": "stop", + "stopwords": "_russian_" + }, + "russian_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}ru/protwords.txt" + }, + "russian_stemmer": { + "type": "stemmer", + "language": "russian" + }, + "spanish_stop": { + "type": "stop", + "stopwords": "_spanish_" + }, + "spanish_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}es/protwords.txt" + }, + "spanish_stemmer": { + "type": "stemmer", + "language": "light_spanish" + }, + "swedish_stop": { + "type": "stop", + "stopwords": "_swedish_" + }, + "swedish_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}sv/protwords.txt" + }, + "swedish_stemmer": { + "type": "stemmer", + "language": "swedish" + }, + "turkish_stop": { + "type": "stop", + "stopwords": "_turkish_" + }, + "turkish_lowercase": { + "type": "lowercase", + "language": "turkish" + }, + "turkish_keywords": { + "type": "keyword_marker", + "keywords_path": "${fess.dictionary.path}tr/protwords.txt" + }, + "turkish_stemmer": { + "type": "stemmer", + "language": "turkish" + }, + "thai_stop": { + "type": "stop", + "stopwords": "_thai_" + } + } + } +}