parent
f0d3c6f40e
commit
85d8395299
6 changed files with 161 additions and 3 deletions
|
@ -20,7 +20,7 @@
|
|||
<param name="plugin.name.prefix" value="elasticsearch-" />
|
||||
<param name="plugin.name" value="analysis-fess" />
|
||||
<param name="plugin.version" value="5.4.2-SNAPSHOT" />
|
||||
<param name="plugin.zip.version" value="5.4.2-20170616.220752-1" />
|
||||
<param name="plugin.zip.version" value="5.4.2-20170617.065236-3" />
|
||||
</antcall>
|
||||
<!-- analysis-ja -->
|
||||
<antcall target="install.plugin">
|
||||
|
|
|
@ -148,6 +148,14 @@
|
|||
<arg>"fess"</arg>
|
||||
<arg>"tr/protwords.txt"</arg>
|
||||
</postConstruct>
|
||||
<postConstruct name="addConfigFile">
|
||||
<arg>"fess"</arg>
|
||||
<arg>"zh-cn/stopwords.txt"</arg>
|
||||
</postConstruct>
|
||||
<postConstruct name="addConfigFile">
|
||||
<arg>"fess"</arg>
|
||||
<arg>"zh-tw/stopwords.txt"</arg>
|
||||
</postConstruct>
|
||||
<!-- fess index -->
|
||||
<postConstruct name="addIndexConfig">
|
||||
<arg>"fess/doc"</arg>
|
||||
|
|
|
@ -15,6 +15,10 @@
|
|||
"type": "mapping",
|
||||
"mappings_path": "${fess.dictionary.path}ja/mapping.txt"
|
||||
},
|
||||
"traditional_chinese_convert": {
|
||||
"type": "fess_traditional_chinese_convert",
|
||||
"convert_type": "t2s"
|
||||
},
|
||||
"zero_width_spaces": {
|
||||
"type": "mapping",
|
||||
"mappings": [ "\\u200C=> "]
|
||||
|
@ -431,6 +435,10 @@
|
|||
"type": "stemmer",
|
||||
"language": "russian"
|
||||
},
|
||||
"simplified_chinese_stop": {
|
||||
"type": "stop",
|
||||
"stopwords_path": "${fess.dictionary.path}zh-cn/stopwords.txt"
|
||||
},
|
||||
"sorani_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_sorani_"
|
||||
|
@ -471,6 +479,10 @@
|
|||
"type": "stop",
|
||||
"stopwords": "_thai_"
|
||||
},
|
||||
"traditional_chinese_stop": {
|
||||
"type": "stop",
|
||||
"stopwords_path": "${fess.dictionary.path}zh-tw/stopwords.txt"
|
||||
},
|
||||
"turkish_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_turkish_"
|
||||
|
@ -524,6 +536,9 @@
|
|||
"pos_tagging": false,
|
||||
"user_dict_path": "${fess.dictionary.path}ko/seunjeon.txt"
|
||||
},
|
||||
"simplified_chinese_tokenizer": {
|
||||
"type": "fess_simplified_chinese_tokenizer"
|
||||
},
|
||||
"vietnamese_tokenizer": {
|
||||
"type": "fess_vietnamese_tokenizer",
|
||||
"sentence_detector": false,
|
||||
|
@ -865,6 +880,14 @@
|
|||
"russian_stemmer"
|
||||
]
|
||||
},
|
||||
"simplified_chinese_analyzer": {
|
||||
"tokenizer": "simplified_chinese_tokenizer",
|
||||
"filter": [
|
||||
"truncate10_filter",
|
||||
"lowercase",
|
||||
"simplified_chinese_stop"
|
||||
]
|
||||
},
|
||||
"sorani_analyzer": {
|
||||
"tokenizer": "standard",
|
||||
"filter": [
|
||||
|
@ -907,6 +930,15 @@
|
|||
"thai_stop"
|
||||
]
|
||||
},
|
||||
"traditional_chinese_analyzer": {
|
||||
"char_filter": [ "traditional_chinese_convert" ],
|
||||
"tokenizer": "simplified_chinese_tokenizer",
|
||||
"filter": [
|
||||
"truncate10_filter",
|
||||
"lowercase",
|
||||
"traditional_chinese_stop"
|
||||
]
|
||||
},
|
||||
"turkish_analyzer": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
|
|
|
@ -471,7 +471,7 @@
|
|||
"match": "*_zh-cn",
|
||||
"mapping": {
|
||||
"type": "text",
|
||||
"analyzer": "empty_analyzer"
|
||||
"analyzer": "simplified_chinese_analyzer"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -480,7 +480,7 @@
|
|||
"match": "*_zh-tw",
|
||||
"mapping": {
|
||||
"type": "text",
|
||||
"analyzer": "empty_analyzer"
|
||||
"analyzer": "traditional_chinese_analyzer"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
59
src/main/resources/fess_indices/fess/zh-cn/stopwords.txt
Normal file
59
src/main/resources/fess_indices/fess/zh-cn/stopwords.txt
Normal file
|
@ -0,0 +1,59 @@
|
|||
# Punctuation tokens to remove
|
||||
,
|
||||
.
|
||||
`
|
||||
-
|
||||
_
|
||||
=
|
||||
?
|
||||
'
|
||||
|
|
||||
"
|
||||
(
|
||||
)
|
||||
{
|
||||
}
|
||||
[
|
||||
]
|
||||
<
|
||||
>
|
||||
*
|
||||
#
|
||||
&
|
||||
^
|
||||
$
|
||||
@
|
||||
!
|
||||
~
|
||||
:
|
||||
;
|
||||
+
|
||||
/
|
||||
\
|
||||
《
|
||||
》
|
||||
—
|
||||
-
|
||||
,
|
||||
。
|
||||
、
|
||||
:
|
||||
;
|
||||
!
|
||||
·
|
||||
?
|
||||
“
|
||||
”
|
||||
)
|
||||
(
|
||||
【
|
||||
】
|
||||
[
|
||||
]
|
||||
●
|
||||
# the line below contains an IDEOGRAPHIC SPACE character (Used as a space in Chinese)
|
||||
|
||||
|
||||
# English Stop Words
|
||||
|
||||
# Chinese Stop Words
|
59
src/main/resources/fess_indices/fess/zh-tw/stopwords.txt
Normal file
59
src/main/resources/fess_indices/fess/zh-tw/stopwords.txt
Normal file
|
@ -0,0 +1,59 @@
|
|||
# Punctuation tokens to remove
|
||||
,
|
||||
.
|
||||
`
|
||||
-
|
||||
_
|
||||
=
|
||||
?
|
||||
'
|
||||
|
|
||||
"
|
||||
(
|
||||
)
|
||||
{
|
||||
}
|
||||
[
|
||||
]
|
||||
<
|
||||
>
|
||||
*
|
||||
#
|
||||
&
|
||||
^
|
||||
$
|
||||
@
|
||||
!
|
||||
~
|
||||
:
|
||||
;
|
||||
+
|
||||
/
|
||||
\
|
||||
《
|
||||
》
|
||||
—
|
||||
-
|
||||
,
|
||||
。
|
||||
、
|
||||
:
|
||||
;
|
||||
!
|
||||
·
|
||||
?
|
||||
“
|
||||
”
|
||||
)
|
||||
(
|
||||
【
|
||||
】
|
||||
[
|
||||
]
|
||||
●
|
||||
# the line below contains an IDEOGRAPHIC SPACE character (Used as a space in Chinese)
|
||||
|
||||
|
||||
# English Stop Words
|
||||
|
||||
# Chinese Stop Words
|
Loading…
Add table
Reference in a new issue