fix #580 replace with analysis-fess plugin

This commit is contained in:
Shinsuke Sugaya 2016-07-15 22:50:40 +09:00
parent 7d1115dd44
commit df0479655e
3 changed files with 161 additions and 32 deletions

View file

@ -11,14 +11,14 @@
<mkdir dir="${target.dir}" />
<delete dir="${plugins.dir}" />
<mkdir dir="${plugins.dir}" />
<!-- analysis-kuromoji-neologd -->
<!-- analysis-fess -->
<antcall target="install.plugin">
<param name="repo.url" value="${maven.release.repo.url}" />
<param name="repo.url" value="${maven.snapshot.repo.url}" />
<param name="plugin.groupId" value="org/codelibs" />
<param name="plugin.name.prefix" value="elasticsearch-" />
<param name="plugin.name" value="analysis-kuromoji-neologd" />
<param name="plugin.version" value="2.3.1" />
<param name="plugin.zip.version" value="2.3.1" />
<param name="plugin.name" value="analysis-fess" />
<param name="plugin.version" value="2.3.0-SNAPSHOT" />
<param name="plugin.zip.version" value="2.3.0-20160714.212731-1" />
</antcall>
<!-- analysis-ja -->
<antcall target="install.plugin">
@ -74,21 +74,6 @@
<param name="plugin.version" value="2.3.0" />
<param name="plugin.zip.version" value="2.3.0" />
</antcall>
<!-- seunjeon -->
<antcall target="install.plugin">
<param name="repo.url" value="${maven.release.repo.url}" />
<param name="plugin.groupId" value="org/bitbucket/eunjeon" />
<param name="plugin.name.prefix" value="elasticsearch-" />
<param name="plugin.name" value="analysis-seunjeon" />
<param name="plugin.version" value="${elasticsearch.version}.0" />
<param name="plugin.zip.version" value="${elasticsearch.version}.0" />
</antcall>
<jar destfile="${plugins.dir}/analysis-seunjeon/elasticsearch-analysis-seunjeon-${elasticsearch.version}.jar">
<zipfileset
src="${plugins.dir}/analysis-seunjeon/elasticsearch-analysis-seunjeon-assembly-${elasticsearch.version}.0.jar"
excludes="org/slf4j/**"/>
</jar>
<delete file="${plugins.dir}/analysis-seunjeon/elasticsearch-analysis-seunjeon-assembly-${elasticsearch.version}.0.jar"/>
<!-- kopf -->
<get dest="${target.dir}">
<url url="http://maven.codelibs.org/archive/elasticsearch/plugin/kopf/elasticsearch-kopf-2.0.1.0.zip" />

View file

@ -0,0 +1,144 @@
{
"analysis" : {
"tokenizer" : {
"fess_japanese_normal" : {
"type" : "fess_japanese_tokenizer",
"mode" : "normal",
"discard_punctuation" : "false"
}
},
"analyzer" : {
"reading_analyzer" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["reading_form"]
},
"reading_term_analyzer" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal"
},
"normalize_analyzer" : {
"type" : "custom",
"tokenizer" : "keyword",
"char_filter" : ["mapping_char"],
"filter" : ["lowercase"]
},
"contents_analyzer" : {
"type" : "custom",
"tokenizer" : "fess_japanese_normal",
"filter" : ["stopword_en_filter", "pos_filter", "content_length_filter", "limit_token_count_filter"]
}
},
"char_filter" : {
"mapping_char" : {
"type" : "mapping",
"mappings" : ["ガ=>ガ", "ギ=>ギ", "グ=>グ", "ゲ=>ゲ", "ゴ=>ゴ", "ザ=>ザ", "ジ=>ジ", "ズ=>ズ", "ゼ=>ゼ", "ゾ=>ゾ", "ダ=>ダ", "ヂ=>ヂ", "ヅ=>ヅ",
"デ=>デ", "ド=>ド", "バ=>バ", "ビ=>ビ", "ブ=>ブ", "ベ=>ベ", "ボ=>ボ", "。=>。", "「=>「", "」=>」", "、=>、", "・=>・", "ヲ=>ヲ", "ァ=>ァ", "ィ=>ィ", "ゥ=>ゥ", "ェ=>ェ", "ォ=>ォ", "ャ=>ャ", "ュ=>ュ", "ョ=>ョ", "ッ=>ッ", "ア=>ア",
"イ=>イ", "ウ=>ウ", "エ=>エ", "オ=>オ", "カ=>カ", "キ=>キ", "ク=>ク", "ケ=>ケ", "コ=>コ", "サ=>サ", "シ=>シ", "ス=>ス", "セ=>セ", "ソ=>ソ", "タ=>タ", "チ=>チ", "ツ=>ツ", "テ=>テ", "ト=>ト", "ナ=>ナ", "ニ=>ニ", "ヌ=>ヌ", "ネ=>ネ", "ノ=>", "ハ=>ハ",
"ヒ=>ヒ", "フ=>フ", "ヘ=>ヘ", "ホ=>ホ", "マ=>マ", "ミ=>ミ", "ム=>ム", "メ=>メ", "モ=>モ", "ヤ=>ヤ", "ユ=>ユ", "ヨ=>ヨ", "ラ=>ラ", "リ=>リ", "ル=>ル", "レ=>レ", "ロ=>ロ", "ワ=>ワ", "ン=>ン",
"=>a", "=>b","=>c","=>d","=>e","=>f","=>g","=>h","=>i","=>j","=>k","=>l","=>m","=>n","=>o","=>p","=>q","=>r","=>s",
"=>t","=>u","=>v","=>w","=>x", "=>y", "=>z",
"=>A", "=>B","=>C","=>D","=>E","=>F","=>g","=>H","=>I","=>j","=>k","=>L","=>M","=>N","=>O","=>P","=>Q","=>R","=>S",
"=>T","=>U","=>V","=>W","=>X", "=>Y", "=>Z",
"=>1", "=>2", "=>3", "=>4", "=>5", "=>6", "=>7", "=>8", "=>9", "=>0"
]
}
},
"filter" : {
"reading_form" : {
"type" : "fess_japanese_readingform"
},
"pos_filter" : {
"type" : "fess_japanese_part_of_speech",
"stoptags" : [
"その他",
"その他-間投",
"フィラー",
"感動詞",
"記号",
"記号-アルファベット",
"記号-一般",
"記号-括弧開",
"記号-括弧閉",
"記号-句点",
"記号-空白",
"記号-読点",
"形容詞",
"形容詞-接尾",
"形容詞-非自立",
"語断片",
"助詞",
"助詞-格助詞",
"助詞-格助詞-一般",
"助詞-格助詞-引用",
"助詞-格助詞-連語",
"助詞-間投助詞",
"助詞-係助詞",
"助詞-終助詞",
"助詞-接続助詞",
"助詞-特殊",
"助詞-副詞化",
"助詞-副助詞",
"助詞-副助詞/並立助詞/終助詞",
"助詞-並立助詞",
"助詞-連体化",
"助動詞",
"接続詞",
"接頭詞",
"接頭詞-形容詞接続",
"接頭詞-数接続",
"接頭詞-動詞接続",
"接頭詞-名詞接続",
"動詞",
"動詞-自立",
"動詞-接尾",
"動詞-非自立",
"非言語音",
"副詞",
"副詞-一般",
"副詞-助詞類接続",
"名詞-ナイ形容詞語幹",
"名詞-引用文字列",
"名詞-形容動詞語幹",
"名詞-数",
"名詞-接続詞的",
"名詞-接尾",
"名詞-接尾-サ変接続",
"名詞-接尾-一般",
"名詞-接尾-形容動詞語幹",
"名詞-接尾-助数詞",
"名詞-接尾-助動詞語幹",
"名詞-接尾-人名",
"名詞-接尾-地域",
"名詞-接尾-特殊",
"名詞-接尾-副詞可能",
"名詞-代名詞",
"名詞-代名詞-一般",
"名詞-代名詞-縮約",
"名詞-動詞非自立的",
"名詞-特殊",
"名詞-特殊-助動詞語幹",
"名詞-非自立",
"名詞-非自立-一般",
"名詞-非自立-形容動詞語幹",
"名詞-非自立-助動詞語幹",
"名詞-非自立-副詞可能",
"名詞-副詞可能",
"連体詞"
]
},
"stopword_en_filter": {
"type": "stop",
"stopwords": "_english_"
},
"content_length_filter": {
"type": "length",
"max": 30
},
"limit_token_count_filter": {
"type": "limit",
"max_token_count": 10000
}
}
}
}

View file

@ -34,8 +34,8 @@
"type": "stop",
"stopwords": "_english_"
},
"kuromoji_neologd_pos_filter" : {
"type" : "kuromoji_neologd_part_of_speech",
"japanese_pos_filter" : {
"type" : "fess_japanese_part_of_speech",
"stoptags" : [
"その他",
"その他-間投",
@ -405,15 +405,15 @@
}
},
"tokenizer": {
"kuromoji_neologd_tokenizer": {
"type": "reloadable_kuromoji_neologd_tokenizer",
"japanese_tokenizer": {
"type": "fess_japanese_reloadable_tokenizer",
"mode": "normal",
"user_dictionary": "${fess.dictionary.path}ja/kuromoji.txt",
"discard_punctuation": false,
"reload_interval":"1m"
},
"seunjeon_default_tokenizer": {
"type": "seunjeon_tokenizer",
"korean_tokenizer": {
"type": "fess_korean_tokenizer",
"user_dict_path": "${fess.dictionary.path}ko/seunjeon.txt"
},
"unigram_synonym_tokenizer": {
@ -436,14 +436,14 @@
"type": "custom",
"char_filter": [
"mapping_ja_filter",
"kuromoji_neologd_iteration_mark"
"fess_japanese_iteration_mark"
],
"tokenizer": "kuromoji_neologd_tokenizer",
"tokenizer": "japanese_tokenizer",
"filter": [
"truncate10_filter",
"kuromoji_neologd_baseform",
"kuromoji_neologd_stemmer",
"kuromoji_neologd_pos_filter",
"fess_japanese_baseform",
"fess_japanese_stemmer",
"japanese_pos_filter",
"lowercase"
]
},
@ -458,7 +458,7 @@
},
"korean_analyzer": {
"type": "custom",
"tokenizer":"seunjeon_default_tokenizer"
"tokenizer":"korean_tokenizer"
},
"german_analyzer": {
"type": "custom",