Merge pull request #1770 from morishima-k/dictionary_file

add stemmer_override and stopwords
This commit is contained in:
Shinsuke Sugaya 2018-07-20 22:04:43 +09:00 committed by GitHub
commit 957176519c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
63 changed files with 5938 additions and 34 deletions

View file

@ -164,10 +164,146 @@
<arg>"fess"</arg>
<arg>"tr/protwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ar/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"bg/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ca/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ckb-iq/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"cs/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"da/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"de/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"el/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"en/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"en-ie/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"es/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"eu/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"fa/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"fi/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"fr/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"gl/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"hi/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"hu/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"hy/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"id/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"it/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ja/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ko/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"lt/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"lv/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"nl/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"no/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"pt-br/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"pt/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ro/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ru/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"sv/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"th/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"tr/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"vi/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"zh-cn/stopwords.txt"</arg>
@ -176,14 +312,154 @@
<arg>"fess"</arg>
<arg>"zh-tw/stopwords.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ar/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"bg/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ca/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ckb-iq/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"cs/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"da/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"de/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"el/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"en/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"en-ie/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"es/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"eu/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"fa/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"fi/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"fr/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"gl/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"hi/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"hu/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"hy/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"id/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"it/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ja/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ko/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"lt/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"lv/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"nl/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"no/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"pt-br/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"pt/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ro/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ru/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"sv/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"th/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"tr/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"vi/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"zh-cn/stemmer_override.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"zh-tw/stemmer_override.txt"</arg>
</postConstruct>
<!-- fess index -->
<postConstruct name="addIndexConfig">
<arg>"fess/doc"</arg>

View file

@ -54,7 +54,7 @@
},
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
"stopwords_path": "${fess.dictionary.path}ar/stopwords.txt"
},
"arabic_keywords": {
"type": "keyword_marker",
@ -64,9 +64,13 @@
"type": "stemmer",
"language": "arabic"
},
"arabic_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}ar/stemmer_override.txt"
},
"armenian_stop": {
"type": "stop",
"stopwords": "_armenian_"
"stopwords_path": "${fess.dictionary.path}hy/stopwords.txt"
},
"armenian_keywords": {
"type": "keyword_marker",
@ -76,9 +80,13 @@
"type": "stemmer",
"language": "armenian"
},
"armenian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}hy/stemmer_override.txt"
},
"basque_stop": {
"type": "stop",
"stopwords": "_basque_"
"stopwords_path": "${fess.dictionary.path}eu/stopwords.txt"
},
"basque_keywords": {
"type": "keyword_marker",
@ -88,9 +96,13 @@
"type": "stemmer",
"language": "basque"
},
"basque_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}eu/stemmer_override.txt"
},
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
"stopwords_path": "${fess.dictionary.path}pt-br/stopwords.txt"
},
"brazilian_keywords": {
"type": "keyword_marker",
@ -100,9 +112,13 @@
"type": "stemmer",
"language": "brazilian"
},
"brazilian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}pt-br/stemmer_override.txt"
},
"bulgarian_stop": {
"type": "stop",
"stopwords": "_bulgarian_"
"stopwords_path": "${fess.dictionary.path}bg/stopwords.txt"
},
"bulgarian_keywords": {
"type": "keyword_marker",
@ -112,13 +128,17 @@
"type": "stemmer",
"language": "bulgarian"
},
"bulgarian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}bg/stemmer_override.txt"
},
"catalan_elision": {
"type": "elision",
"type": "elision",
"articles": [ "d", "l", "m", "n", "s", "t"]
},
"catalan_stop": {
"type": "stop",
"stopwords": "_catalan_"
"stopwords_path": "${fess.dictionary.path}ca/stopwords.txt"
},
"catalan_keywords": {
"type": "keyword_marker",
@ -128,9 +148,13 @@
"type": "stemmer",
"language": "catalan"
},
"catalan_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}ca/stemmer_override.txt"
},
"czech_stop": {
"type": "stop",
"stopwords": "_czech_"
"stopwords_path": "${fess.dictionary.path}cs/stopwords.txt"
},
"czech_keywords": {
"type": "keyword_marker",
@ -140,9 +164,13 @@
"type": "stemmer",
"language": "czech"
},
"czech_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}cs/stemmer_override.txt"
},
"danish_stop": {
"type": "stop",
"stopwords": "_danish_"
"stopwords_path": "${fess.dictionary.path}da/stopwords.txt"
},
"danish_keywords": {
"type": "keyword_marker",
@ -152,9 +180,13 @@
"type": "stemmer",
"language": "danish"
},
"danish_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}da/stemmer_override.txt"
},
"dutch_stop": {
"type": "stop",
"stopwords": "_dutch_"
"stopwords_path": "${fess.dictionary.path}nl/stopwords.txt"
},
"dutch_keywords": {
"type": "keyword_marker",
@ -174,7 +206,7 @@
},
"finnish_stop": {
"type": "stop",
"stopwords": "_finnish_"
"stopwords_path": "${fess.dictionary.path}fi/stopwords.txt"
},
"finnish_keywords": {
"type": "keyword_marker",
@ -184,6 +216,10 @@
"type": "stemmer",
"language": "finnish"
},
"finnish_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}fi/stemmer_override.txt"
},
"french_elision": {
"type": "elision",
"articles_case": true,
@ -195,7 +231,7 @@
},
"french_stop": {
"type": "stop",
"stopwords": "_french_"
"stopwords_path": "${fess.dictionary.path}fr/stopwords.txt"
},
"french_keywords": {
"type": "keyword_marker",
@ -205,9 +241,13 @@
"type": "stemmer",
"language": "light_french"
},
"french_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}fr/stemmer_override.txt"
},
"galician_stop": {
"type": "stop",
"stopwords": "_galician_"
"stopwords_path": "${fess.dictionary.path}gl/stopwords.txt"
},
"galician_keywords": {
"type": "keyword_marker",
@ -217,9 +257,13 @@
"type": "stemmer",
"language": "galician"
},
"galician_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}gl/stemmer_override.txt"
},
"german_stop": {
"type": "stop",
"stopwords": "_german_"
"stopwords_path": "${fess.dictionary.path}de/stopwords.txt"
},
"german_keywords": {
"type": "keyword_marker",
@ -229,9 +273,13 @@
"type": "stemmer",
"language": "light_german"
},
"german_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}de/stemmer_override.txt"
},
"greek_stop": {
"type": "stop",
"stopwords": "_greek_"
"stopwords_path": "${fess.dictionary.path}el/stopwords.txt"
},
"greek_lowercase": {
"type": "lowercase",
@ -245,9 +293,13 @@
"type": "stemmer",
"language": "greek"
},
"greek_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}el/stemmer_override.txt"
},
"hindi_stop": {
"type": "stop",
"stopwords": "_hindi_"
"stopwords_path": "${fess.dictionary.path}hi/stopwords.txt"
},
"hindi_keywords": {
"type": "keyword_marker",
@ -257,9 +309,13 @@
"type": "stemmer",
"language": "hindi"
},
"hindi_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}hi/stemmer_override.txt"
},
"hungarian_stop": {
"type": "stop",
"stopwords": "_hungarian_"
"stopwords_path": "${fess.dictionary.path}hu/stopwords.txt"
},
"hungarian_keywords": {
"type": "keyword_marker",
@ -269,9 +325,13 @@
"type": "stemmer",
"language": "hungarian"
},
"hungarian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}hu/stemmer_override.txt"
},
"indonesian_stop": {
"type": "stop",
"stopwords": "_indonesian_"
"stopwords_path": "${fess.dictionary.path}id/stopwords.txt"
},
"indonesian_keywords": {
"type": "keyword_marker",
@ -281,13 +341,17 @@
"type": "stemmer",
"language": "indonesian"
},
"indonesian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}id/stemmer_override.txt"
},
"irish_elision": {
"type": "elision",
"articles": [ "h", "n", "t" ]
},
"irish_stop": {
"type": "stop",
"stopwords": "_irish_"
"stopwords_path": "${fess.dictionary.path}en-ie/stopwords.txt"
},
"irish_lowercase": {
"type": "lowercase",
@ -301,6 +365,10 @@
"type": "stemmer",
"language": "irish"
},
"irish_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}en-ie/stemmer_override.txt"
},
"italian_elision": {
"type": "elision",
"articles": [
@ -312,7 +380,7 @@
},
"italian_stop": {
"type": "stop",
"stopwords": "_italian_"
"stopwords_path": "${fess.dictionary.path}it/stopwords.txt"
},
"italian_keywords": {
"type": "keyword_marker",
@ -322,6 +390,10 @@
"type": "stemmer",
"language": "light_italian"
},
"italian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}it/stemmer_override.txt"
},
"japanese_pos_filter" : {
"type" : "fess_japanese_part_of_speech",
"stoptags" : [
@ -370,7 +442,7 @@
},
"latvian_stop": {
"type": "stop",
"stopwords": "_latvian_"
"stopwords_path": "${fess.dictionary.path}lv/stopwords.txt"
},
"latvian_keywords": {
"type": "keyword_marker",
@ -380,9 +452,13 @@
"type": "stemmer",
"language": "latvian"
},
"latvian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}lv/stemmer_override.txt"
},
"lithuanian_stop": {
"type": "stop",
"stopwords": "_lithuanian_"
"stopwords_path": "${fess.dictionary.path}lt/stopwords.txt"
},
"lithuanian_keywords": {
"type": "keyword_marker",
@ -392,9 +468,13 @@
"type": "stemmer",
"language": "lithuanian"
},
"lithuanian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}lt/stemmer_override.txt"
},
"norwegian_stop": {
"type": "stop",
"stopwords": "_norwegian_"
"stopwords_path": "${fess.dictionary.path}no/stopwords.txt"
},
"norwegian_keywords": {
"type": "keyword_marker",
@ -404,13 +484,17 @@
"type": "stemmer",
"language": "norwegian"
},
"norwegian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}no/stemmer_override.txt"
},
"persian_stop": {
"type": "stop",
"stopwords": "_persian_"
"stopwords_path": "${fess.dictionary.path}fa/stopwords.txt"
},
"portuguese_stop": {
"type": "stop",
"stopwords": "_portuguese_"
"stopwords_path": "${fess.dictionary.path}pt/stopwords.txt"
},
"portuguese_keywords": {
"type": "keyword_marker",
@ -420,9 +504,13 @@
"type": "stemmer",
"language": "light_portuguese"
},
"portuguese_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}pt/stemmer_override.txt"
},
"romanian_stop": {
"type": "stop",
"stopwords": "_romanian_"
"stopwords_path": "${fess.dictionary.path}ro/stopwords.txt"
},
"romanian_keywords": {
"type": "keyword_marker",
@ -432,9 +520,13 @@
"type": "stemmer",
"language": "romanian"
},
"romanian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}ro/stemmer_override.txt"
},
"russian_stop": {
"type": "stop",
"stopwords": "_russian_"
"stopwords_path": "${fess.dictionary.path}ru/stopwords.txt"
},
"russian_keywords": {
"type": "keyword_marker",
@ -444,13 +536,17 @@
"type": "stemmer",
"language": "russian"
},
"russian_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}ru/stemmer_override.txt"
},
"simplified_chinese_stop": {
"type": "stop",
"stopwords_path": "${fess.dictionary.path}zh-cn/stopwords.txt"
},
"sorani_stop": {
"type": "stop",
"stopwords": "_sorani_"
"stopwords_path": "${fess.dictionary.path}ckb-iq/stopwords.txt"
},
"sorani_keywords": {
"type": "keyword_marker",
@ -460,9 +556,13 @@
"type": "stemmer",
"language": "sorani"
},
"sorani_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}ckb-iq/stemmer_override.txt"
},
"spanish_stop": {
"type": "stop",
"stopwords": "_spanish_"
"stopwords_path": "${fess.dictionary.path}es/stopwords.txt"
},
"spanish_keywords": {
"type": "keyword_marker",
@ -472,9 +572,13 @@
"type": "stemmer",
"language": "light_spanish"
},
"spanish_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}es/stemmer_override.txt"
},
"swedish_stop": {
"type": "stop",
"stopwords": "_swedish_"
"stopwords_path": "${fess.dictionary.path}sv/stopwords.txt"
},
"swedish_keywords": {
"type": "keyword_marker",
@ -484,9 +588,13 @@
"type": "stemmer",
"language": "swedish"
},
"swedish_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}sv/stemmer_override.txt"
},
"thai_stop": {
"type": "stop",
"stopwords": "_thai_"
"stopwords_path": "${fess.dictionary.path}th/stopwords.txt"
},
"traditional_chinese_stop": {
"type": "stop",
@ -494,7 +602,7 @@
},
"turkish_stop": {
"type": "stop",
"stopwords": "_turkish_"
"stopwords_path": "${fess.dictionary.path}tr/stopwords.txt"
},
"turkish_lowercase": {
"type": "lowercase",
@ -508,9 +616,13 @@
"type": "stemmer",
"language": "turkish"
},
"turkish_override": {
"type": "stemmer_override",
"rules_path": "${fess.dictionary.path}tr/stemmer_override.txt"
},
"vietnamese_stop": {
"type": "stop",
"stopwords": ["bị", "bởi", "cả", "các", "cái", "cần", "càng", "chỉ", "chiếc", "cho", "chứ", "chưa", "chuyện", "có", "có thể", "cứ", "của", "cùng", "cũng", "đã", "đang", "đây", "để", "đến nỗi", "đều", "điều", "do", "đó", "được", "dưới", "gì", "khi", "không", "là", "lại", "lên", "lúc", "mà", "mỗi", "một cách", "này", "nên", "nếu", "ngay", "nhiều", "như", "nhưng", "những", "nơi", "nữa", "phải", "qua", "ra", "rằng", "rằng", "rất", "rất", "rồi", "sau", "sẽ", "so", "sự", "tại", "theo", "thì", "trên", "trước", "từ", "từng", "và", "vẫn", "vào", "vậy", "vì", "việc", "với", "vừa"]
"stopwords_path": "${fess.dictionary.path}vi/stopwords.txt"
},
"truncate10_filter" : {
"type" : "truncate",
@ -578,6 +690,7 @@
"arabic_stop",
"arabic_normalization",
"arabic_keywords",
"arabic_override",
"arabic_stemmer"
]
},
@ -588,6 +701,7 @@
"lowercase",
"armenian_stop",
"armenian_keywords",
"armenian_override",
"armenian_stemmer"
]
},
@ -598,6 +712,7 @@
"lowercase",
"basque_stop",
"basque_keywords",
"basque_override",
"basque_stemmer"
]
},
@ -608,6 +723,7 @@
"lowercase",
"brazilian_stop",
"brazilian_keywords",
"brazilian_override",
"brazilian_stemmer"
]
},
@ -618,6 +734,7 @@
"lowercase",
"bulgarian_stop",
"bulgarian_keywords",
"bulgarian_override",
"bulgarian_stemmer"
]
},
@ -630,6 +747,7 @@
"lowercase",
"catalan_stop",
"catalan_keywords",
"catalan_override",
"catalan_stemmer"
]
},
@ -641,6 +759,7 @@
"lowercase",
"czech_stop",
"czech_keywords",
"czech_override",
"czech_stemmer"
]
},
@ -652,6 +771,7 @@
"lowercase",
"danish_stop",
"danish_keywords",
"danish_override",
"danish_stemmer"
]
},
@ -686,6 +806,7 @@
"lowercase",
"finnish_stop",
"finnish_keywords",
"finnish_override",
"finnish_stemmer"
]
},
@ -698,6 +819,7 @@
"lowercase",
"french_stop",
"french_keywords",
"french_override",
"french_stemmer"
]
},
@ -708,6 +830,7 @@
"lowercase",
"galician_stop",
"galician_keywords",
"galician_override",
"galician_stemmer"
]
},
@ -719,6 +842,7 @@
"lowercase",
"german_stop",
"german_normalization",
"german_override",
"german_stemmer"
]
},
@ -730,6 +854,7 @@
"greek_lowercase",
"greek_stop",
"greek_keywords",
"greek_override",
"greek_stemmer"
]
},
@ -743,6 +868,7 @@
"hindi_normalization",
"hindi_stop",
"hindi_keywords",
"hindi_override",
"hindi_stemmer"
]
},
@ -754,6 +880,7 @@
"lowercase",
"hungarian_stop",
"hungarian_keywords",
"hungarian_override",
"hungarian_stemmer"
]
},
@ -765,6 +892,7 @@
"lowercase",
"indonesian_stop",
"indonesian_keywords",
"indonesian_override",
"indonesian_stemmer"
]
},
@ -776,6 +904,7 @@
"irish_elision",
"irish_lowercase",
"irish_keywords",
"irish_override",
"irish_stemmer"
]
},
@ -788,6 +917,7 @@
"lowercase",
"italian_stop",
"italian_keywords",
"italian_override",
"italian_stemmer"
]
},
@ -822,6 +952,7 @@
"lowercase",
"latvian_stop",
"latvian_keywords",
"latvian_override",
"latvian_stemmer"
]
},
@ -833,6 +964,7 @@
"lowercase",
"lithuanian_stop",
"lithuanian_keywords",
"lithuanian_override",
"lithuanian_stemmer"
]
},
@ -844,6 +976,7 @@
"lowercase",
"norwegian_stop",
"norwegian_keywords",
"norwegian_override",
"norwegian_stemmer"
]
},
@ -867,6 +1000,7 @@
"lowercase",
"portuguese_stop",
"portuguese_keywords",
"portuguese_override",
"portuguese_stemmer"
]
},
@ -877,6 +1011,7 @@
"truncate20_filter",
"lowercase",
"romanian_stop",
"romanian_override",
"romanian_stemmer"
]
},
@ -888,6 +1023,7 @@
"lowercase",
"russian_stop",
"russian_keywords",
"russian_override",
"russian_stemmer"
]
},
@ -907,6 +1043,7 @@
"lowercase",
"sorani_stop",
"sorani_keywords",
"sorani_override",
"sorani_stemmer"
]
},
@ -918,6 +1055,7 @@
"lowercase",
"spanish_stop",
"spanish_keywords",
"spanish_override",
"spanish_stemmer"
]
},
@ -929,6 +1067,7 @@
"lowercase",
"swedish_stop",
"swedish_keywords",
"swedish_override",
"swedish_stemmer"
]
},
@ -959,6 +1098,7 @@
"turkish_lowercase",
"turkish_stop",
"turkish_keywords",
"turkish_override",
"turkish_stemmer"
]
},

View file

@ -0,0 +1,120 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt
من
ومن
منها
منه
في
وفي
فيها
فيه
و
ف
ثم
او
أو
ب
بها
به
ا
أ
اى
اي
أي
أى
لا
ولا
الا
ألا
إلا
لكن
ما
وما
كما
فما
عن
مع
اذا
إذا
ان
أن
إن
انها
أنها
إنها
انه
أنه
إنه
بان
بأن
فان
فأن
وان
وأن
وإن
التى
التي
الذى
الذي
الذين
الى
الي
إلى
إلي
على
عليها
عليه
اما
أما
إما
ايضا
أيضا
كل
وكل
لم
ولم
لن
ولن
هى
هي
هو
وهى
وهي
وهو
فهى
فهي
فهو
انت
أنت
لك
لها
له
هذه
هذا
تلك
ذلك
هناك
كانت
كان
يكون
تكون
وكانت
وكان
غير
بعض
قد
نحو
بين
بينما
منذ
ضمن
حيث
الان
الآن
خلال
بعد
قبل
حتى
عند
عندما
لدى
جميع

View file

@ -0,0 +1,191 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt
а
аз
ако
ала
бе
без
беше
би
бил
била
били
било
близо
бъдат
бъде
бяха
в
вас
ваш
ваша
вероятно
вече
взема
ви
вие
винаги
все
всеки
всички
всичко
всяка
във
въпреки
върху
г
ги
главно
го
д
да
дали
до
докато
докога
дори
досега
доста
е
едва
един
ето
за
зад
заедно
заради
засега
затова
защо
защото
и
из
или
им
има
имат
иска
й
каза
как
каква
какво
както
какъв
като
кога
когато
което
които
кой
който
колко
която
къде
където
към
ли
м
ме
между
мен
ми
мнозина
мога
могат
може
моля
момента
му
н
на
над
назад
най
направи
напред
например
нас
не
него
нея
ни
ние
никой
нито
но
някои
някой
няма
обаче
около
освен
особено
от
отгоре
отново
още
пак
по
повече
повечето
под
поне
поради
после
почти
прави
пред
преди
през
при
пък
първо
с
са
само
се
сега
си
скоро
след
сме
според
сред
срещу
сте
съм
със
също
т
тази
така
такива
такъв
там
твой
те
тези
ти
тн
то
това
тогава
този
той
толкова
точно
трябва
тук
тъй
тя
тях
у
харесва
ч
че
често
чрез
ще
щом
я

View file

@ -0,0 +1,220 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ca/stopwords.txt
a
abans
ací
ah
així
això
al
als
aleshores
algun
alguna
algunes
alguns
alhora
allà
allí
allò
altra
altre
altres
amb
ambdós
ambdues
apa
aquell
aquella
aquelles
aquells
aquest
aquesta
aquestes
aquests
aquí
baix
cada
cadascú
cadascuna
cadascunes
cadascuns
com
contra
d'un
d'una
d'unes
d'uns
dalt
de
del
dels
des
després
dins
dintre
donat
doncs
durant
e
eh
el
els
em
en
encara
ens
entre
érem
eren
éreu
es
és
esta
està
estàvem
estaven
estàveu
esteu
et
etc
ets
fins
fora
gairebé
ha
han
has
havia
he
hem
heu
hi
ho
i
igual
iguals
ja
l'hi
la
les
li
li'n
llavors
m'he
ma
mal
malgrat
mateix
mateixa
mateixes
mateixos
me
mentre
més
meu
meus
meva
meves
molt
molta
moltes
molts
mon
mons
n'he
n'hi
ne
ni
no
nogensmenys
només
nosaltres
nostra
nostre
nostres
o
oh
oi
on
pas
pel
pels
per
però
perquè
poc
poca
pocs
poques
potser
propi
qual
quals
quan
quant
que
què
quelcom
qui
quin
quina
quines
quins
s'ha
s'han
sa
semblant
semblants
ses
seu
seus
seva
seva
seves
si
sobre
sobretot
sóc
solament
sols
son
són
sons
sota
sou
t'ha
t'han
t'he
ta
tal
també
tampoc
tan
tant
tanta
tantes
teu
teus
teva
teves
ton
tons
tot
tota
totes
tots
un
una
unes
uns
us
va
vaig
vam
van
vas
veu
vosaltres
vostra
vostre
vostres

View file

@ -0,0 +1,64 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ckb/stopwords.txt
و
کە
ی
کرد
ئەوەی
سەر
دوو
هەروەها
لەو
دەکات
چەند
هەر
ئەو
ئەم
من
ئێمە
تۆ
ئێوە
ئەو
ئەوان
بە
پێ
بەبێ
بەدەم
بەلای
بەپێی
بەرلە
بەرەوی
بەرەوە
بەردەم
بێ
بێجگە
بۆ
دە
تێ
دەگەڵ
دوای
جگە
لە
لێ
لەبەر
لەبەینی
لەبابەت
لەبارەی
لەباتی
لەبن
لەبرێتی
لەدەم
لەگەڵ
لەلایەن
لەناو
لەنێو
لەپێناوی
لەرەوی
لەرێ
لەرێگا
لەسەر
لەژێر
ناو
نێوان
پاش
پێش
وەک

View file

@ -0,0 +1,173 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/cz/stopwords.txt
a
s
k
o
i
u
v
z
dnes
cz
tímto
budeš
budem
byli
jseš
můj
svým
ta
tomto
tohle
tuto
tyto
jej
zda
proč
máte
tato
kam
tohoto
kdo
kteří
mi
nám
tom
tomuto
mít
nic
proto
kterou
byla
toho
protože
asi
ho
naši
napište
re
což
tím
takže
svých
její
svými
jste
aj
tu
tedy
teto
bylo
kde
ke
pravé
ji
nad
nejsou
či
pod
téma
mezi
přes
ty
pak
vám
ani
když
však
neg
jsem
tento
článku
články
aby
jsme
před
pta
jejich
byl
ještě
bez
také
pouze
první
vaše
která
nás
nový
tipy
pokud
může
strana
jeho
své
jiné
zprávy
nové
není
vás
jen
podle
zde
být
více
bude
již
než
který
by
které
co
nebo
ten
tak
při
od
po
jsou
jak
další
ale
si
se
ve
to
jako
za
zpět
ze
do
pro
je
na
atd
atp
jakmile
přičemž
on
ona
ono
oni
ony
my
vy
ji
mne
jemu
tomu
těm
těmu
němu
němuž
jehož
jíž
jelikož
jež
jakož
načež

View file

@ -0,0 +1,95 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/danish_stop.txt
og
i
jeg
det
at
en
den
til
er
som
pa
de
med
han
af
for
ikke
der
var
mig
sig
men
et
har
om
vi
min
havde
ham
hun
nu
over
da
fra
du
ud
sin
dem
os
op
man
hans
hvor
eller
hvad
skal
selv
her
alle
vil
blev
kunne
ind
nar
vare
dog
noget
ville
jo
deres
efter
ned
skulle
denne
end
dette
mit
ogsa
under
have
dig
anden
hende
mine
alt
meget
sit
sine
vor
mod
disse
hvis
din
nogle
hos
blive
mange
ad
bliver
hendes
varet
thi
jer
sadan

View file

@ -0,0 +1,232 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/german_stop.txt
aber
alle
allem
allen
aller
alles
als
also
am
an
ander
andere
anderem
anderen
anderer
anderes
anderm
andern
anderr
anders
auch
auf
aus
bei
bin
bis
bist
da
damit
dann
der
den
des
dem
die
das
das
derselbe
derselben
denselben
desselben
demselben
dieselbe
dieselben
dasselbe
dazu
dein
deine
deinem
deinen
deiner
deines
denn
derer
dessen
dich
dir
du
dies
diese
diesem
diesen
dieser
dieses
doch
dort
durch
ein
eine
einem
einen
einer
eines
einig
einige
einigem
einigen
einiger
einiges
einmal
er
ihn
ihm
es
etwas
euer
eure
eurem
euren
eurer
eures
fur
gegen
gewesen
hab
habe
haben
hat
hatte
hatten
hier
hin
hinter
ich
mich
mir
ihr
ihre
ihrem
ihren
ihrer
ihres
euch
im
in
indem
ins
ist
jede
jedem
jeden
jeder
jedes
jene
jenem
jenen
jener
jenes
jetzt
kann
kein
keine
keinem
keinen
keiner
keines
konnen
konnte
machen
man
manche
manchem
manchen
mancher
manches
mein
meine
meinem
meinen
meiner
meines
mit
muss
musste
nach
nicht
nichts
noch
nun
nur
ob
oder
ohne
sehr
sein
seine
seinem
seinen
seiner
seines
selbst
sich
sie
ihnen
sind
so
solche
solchem
solchen
solcher
solches
soll
sollte
sondern
sonst
uber
um
und
uns
unse
unsem
unsen
unser
unses
unter
viel
vom
von
vor
wahrend
war
waren
warst
was
weg
weil
weiter
welche
welchem
welchen
welcher
welches
wenn
werde
werden
wie
wieder
will
wir
wird
wirst
wo
wollen
wollte
wurde
wurden
zu
zum
zur
zwar
zwischen

View file

@ -0,0 +1,76 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/el/stopwords.txt
ο
η
το
οι
τα
του
τησ
των
τον
την
και
κι
κ
ειμαι
εισαι
ειναι
ειμαστε
ειστε
στο
στον
στη
στην
μα
αλλα
απο
για
προσ
με
σε
ωσ
παρα
αντι
κατα
μετα
θα
να
δε
δεν
μη
μην
επι
ενω
εαν
αν
τοτε
που
πωσ
ποιοσ
ποια
ποιο
ποιοι
ποιεσ
ποιων
ποιουσ
αυτοσ
αυτη
αυτο
αυτοι
αυτων
αυτουσ
αυτεσ
αυτα
εκεινοσ
εκεινη
εκεινο
εκεινοι
εκεινεσ
εκεινα
εκεινων
εκεινουσ
οπωσ
ομωσ
ισωσ
οσο
οτι

View file

@ -0,0 +1,110 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt
a
ach
ag
agus
an
aon
ar
arna
as
b'
ba
beirt
bhúr
caoga
ceathair
ceathrar
chomh
chtó
chuig
chun
cois
céad
cúig
cúigear
d'
daichead
dar
de
deich
deichniúr
den
dhá
do
don
dtí
dár
faoi
faoin
faoina
faoinár
fara
fiche
gach
gan
go
gur
haon
hocht
i
iad
idir
in
ina
ins
inár
is
le
leis
lena
lenár
m'
mar
mo
na
nach
naoi
naonúr
níor
nócha
ocht
ochtar
os
roimh
sa
seacht
seachtar
seachtó
seasca
seisear
siad
sibh
sinn
sna
tar
thar
thú
triúr
trí
trína
trínár
tríocha
um
ár
é
éis
í
ó
ón
óna
ónár

View file

@ -25,7 +25,7 @@ the
their
then
there
these,
these
they
this
to

View file

@ -0,0 +1,309 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/spanish_stop.txt
de
la
que
el
en
y
a
los
del
se
las
por
un
para
con
no
una
su
al
lo
como
mas
pero
sus
le
ya
o
este
si
porque
esta
entre
cuando
muy
sin
sobre
tambien
me
hasta
hay
donde
quien
desde
todo
nos
durante
todos
uno
les
ni
contra
otros
ese
eso
ante
ellos
e
esto
mi
antes
algunos
que
unos
yo
otro
otras
otra
el
tanto
esa
estos
mucho
quienes
nada
muchos
cual
poco
ella
estar
estas
algunas
algo
nosotros
mi
mis
tu
te
ti
tu
tus
ellas
nosotras
vosotros
vosotras
os
mio
mia
mios
mias
tuyo
tuya
tuyos
tuyas
suyo
suya
suyos
suyas
nuestro
nuestra
nuestros
nuestras
vuestro
vuestra
vuestros
vuestras
esos
esas
estoy
estas
esta
estamos
estais
estan
este
estes
estemos
esteis
esten
estare
estaras
estara
estaremos
estareis
estaran
estaria
estarias
estariamos
estariais
estarian
estaba
estabas
estabamos
estabais
estaban
estuve
estuviste
estuvo
estuvimos
estuvisteis
estuvieron
estuviera
estuvieras
estuvieramos
estuvierais
estuvieran
estuviese
estuvieses
estuviesemos
estuvieseis
estuviesen
estando
estado
estada
estados
estadas
estad
he
has
ha
hemos
habeis
han
haya
hayas
hayamos
hayais
hayan
habre
habras
habra
habremos
habreis
habran
habria
habrias
habriamos
habriais
habrian
habia
habias
habiamos
habiais
habian
hube
hubiste
hubo
hubimos
hubisteis
hubieron
hubiera
hubieras
hubieramos
hubierais
hubieran
hubiese
hubieses
hubiesemos
hubieseis
hubiesen
habiendo
habido
habida
habidos
habidas
soy
eres
es
somos
sois
son
sea
seas
seamos
seais
sean
sere
seras
sera
seremos
sereis
seran
seria
serias
seriamos
seriais
serian
era
eras
eramos
erais
eran
fui
fuiste
fue
fuimos
fuisteis
fueron
fuera
fueras
fueramos
fuerais
fueran
fuese
fueses
fuesemos
fueseis
fuesen
siendo
sido
tengo
tienes
tiene
tenemos
teneis
tienen
tenga
tengas
tengamos
tengais
tengan
tendre
tendras
tendra
tendremos
tendreis
tendran
tendria
tendrias
tendriamos
tendriais
tendrian
tenia
tenias
teniamos
teniais
tenian
tuve
tuviste
tuvo
tuvimos
tuvisteis
tuvieron
tuviera
tuvieras
tuvieramos
tuvierais
tuvieran
tuviese
tuvieses
tuviesemos
tuvieseis
tuviesen
teniendo
tenido
tenida
tenidos
tenidas
tened

View file

@ -0,0 +1,99 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/eu/stopwords.txt
al
anitz
arabera
asko
baina
bat
batean
batek
bati
batzuei
batzuek
batzuetan
batzuk
bera
beraiek
berau
berauek
bere
berori
beroriek
beste
bezala
da
dago
dira
ditu
du
dute
edo
egin
ere
eta
eurak
ez
gainera
gu
gutxi
guzti
haiei
haiek
haietan
hainbeste
hala
han
handik
hango
hara
hari
hark
hartan
hau
hauei
hauek
hauetan
hemen
hemendik
hemengo
hi
hona
honek
honela
honetan
honi
hor
hori
horiei
horiek
horietan
horko
horra
horrek
horrela
horretan
horri
hortik
hura
izan
ni
noiz
nola
non
nondik
nongo
nor
nora
ze
zein
zen
zenbait
zenbat
zer
zergatik
ziren
zituen
zu
zuek
zuen
zuten

View file

@ -0,0 +1,309 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt
انان
نداشته
سراسر
خياه
ايشان
وي
تاكنون
بيشتري
دوم
پس
ناشي
وگو
يا
داشتند
سپس
هنگام
هرگز
پنج
نشان
امسال
ديگر
گروهي
شدند
چطور
ده
و
دو
نخستين
ولي
چرا
چه
وسط
ه
كدام
قابل
يك
رفت
هفت
همچنين
در
هزار
بله
بلي
شايد
اما
شناسي
گرفته
دهد
داشته
دانست
داشتن
خواهيم
ميليارد
وقتيكه
امد
خواهد
جز
اورده
شده
بلكه
خدمات
شدن
برخي
نبود
بسياري
جلوگيري
حق
كردند
نوعي
بعري
نكرده
نظير
نبايد
بوده
بودن
داد
اورد
هست
جايي
شود
دنبال
داده
بايد
سابق
هيچ
همان
انجا
كمتر
كجاست
گردد
كسي
تر
مردم
تان
دادن
بودند
سري
جدا
ندارند
مگر
يكديگر
دارد
دهند
بنابراين
هنگامي
سمت
جا
انچه
خود
دادند
زياد
دارند
اثر
بدون
بهترين
بيشتر
البته
به
براساس
بيرون
كرد
بعضي
گرفت
توي
اي
ميليون
او
جريان
تول
بر
مانند
برابر
باشيم
مدتي
گويند
اكنون
تا
تنها
جديد
چند
بي
نشده
كردن
كردم
گويد
كرده
كنيم
نمي
نزد
روي
قصد
فقط
بالاي
ديگران
اين
ديروز
توسط
سوم
ايم
دانند
سوي
استفاده
شما
كنار
داريم
ساخته
طور
امده
رفته
نخست
بيست
نزديك
طي
كنيد
از
انها
تمامي
داشت
يكي
طريق
اش
چيست
روب
نمايد
گفت
چندين
چيزي
تواند
ام
ايا
با
ان
ايد
ترين
اينكه
ديگري
راه
هايي
بروز
همچنان
پاعين
كس
حدود
مختلف
مقابل
چيز
گيرد
ندارد
ضد
همچون
سازي
شان
مورد
باره
مرسي
خويش
برخوردار
چون
خارج
شش
هنوز
تحت
ضمن
هستيم
گفته
فكر
بسيار
پيش
براي
روزهاي
انكه
نخواهد
بالا
كل
وقتي
كي
چنين
كه
گيري
نيست
است
كجا
كند
نيز
يابد
بندي
حتي
توانند
عقب
خواست
كنند
بين
تمام
همه
ما
باشند
مثل
شد
اري
باشد
اره
طبق
بعد
اگر
صورت
غير
جاي
بيش
ريزي
اند
زيرا
چگونه
بار
لطفا
مي
درباره
من
ديده
همين
گذاري
برداري
علت
گذاشته
هم
فوق
نه
ها
شوند
اباد
همواره
هر
اول
خواهند
چهار
نام
امروز
مان
هاي
قبل
كنم
سعي
تازه
را
هستند
زير
جلوي
عنوان
بود

View file

@ -0,0 +1,236 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/finnish_stop.txt
olla
olen
olet
on
olemme
olette
ovat
ole
oli
olisi
olisit
olisin
olisimme
olisitte
olisivat
olit
olin
olimme
olitte
olivat
ollut
olleet
en
et
ei
emme
ette
eivat
mina
minun
minut
minua
minussa
minusta
minuun
minulla
minulta
minulle
sina
sinun
sinut
sinua
sinussa
sinusta
sinuun
sinulla
sinulta
sinulle
han
hanen
hanet
hanta
hanessa
hanesta
haneen
hanella
hanelta
hanelle
me
meidan
meidat
meita
meissa
meista
meihin
meilla
meilta
meille
te
teidan
teidat
teita
teissa
teista
teihin
teilla
teilta
teille
he
heidan
heidat
heita
heissa
heista
heihin
heilla
heilta
heille
tama
taman
tata
tassa
tasta
tahan
talla
talta
talle
tana
taksi
tuo
tuon
tuota
tuossa
tuosta
tuohon
tuolla
tuolta
tuolle
tuona
tuoksi
se
sen
sita
siina
siita
siihen
silla
silta
sille
sina
siksi
nama
naiden
naita
naissa
naista
naihin
nailla
nailta
naille
naina
naiksi
nuo
noiden
noita
noissa
noista
noihin
noilla
noilta
noille
noina
noiksi
ne
niiden
niita
niissa
niista
niihin
niilla
niilta
niille
niina
niiksi
kuka
kenen
kenet
keta
kenessa
kenesta
keneen
kenella
kenelta
kenelle
kenena
keneksi
ketka
keiden
ketka
keita
keissa
keista
keihin
keilla
keilta
keille
keina
keiksi
mika
minka
minka
mita
missa
mista
mihin
milla
milta
mille
mina
miksi
mitka
joka
jonka
jota
jossa
josta
johon
jolla
jolta
jolle
jona
joksi
jotka
joiden
joita
joissa
joista
joihin
joilla
joilta
joille
joina
joiksi
etta
ja
jos
koska
kuin
mutta
niin
seka
silla
tai
vaan
vai
vaikka
kanssa
mukaan
noin
poikki
yli
kun
niin
nyt
itse

View file

@ -0,0 +1,165 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/french_stop.txt
au
aux
avec
ce
ces
dans
de
des
du
elle
en
et
eux
il
je
la
le
leur
lui
ma
mais
me
meme
mes
moi
mon
ne
nos
notre
nous
on
ou
par
pas
pour
qu
que
qui
sa
se
ses
son
sur
ta
te
tes
toi
ton
tu
un
une
vos
votre
vous
c
d
j
l
a
m
n
s
t
y
ete
etee
etees
etes
etant
suis
es
est
sommes
etes
sont
serai
seras
sera
serons
serez
seront
serais
serait
serions
seriez
seraient
etais
etait
etions
etiez
etaient
fus
fut
fumes
futes
furent
sois
soit
soyons
soyez
soient
fusse
fusses
fut
fussions
fussiez
fussent
ayant
eu
eue
eues
eus
ai
as
avons
avez
ont
aurai
auras
aura
aurons
aurez
auront
aurais
aurait
aurions
auriez
auraient
avais
avait
avions
aviez
avaient
eut
eumes
eutes
eurent
aie
aies
ait
ayons
ayez
aient
eusse
eusses
eut
eussions
eussiez
eussent
ceci
cela
cela
cet
cette
ici
ils
les
leurs
quel
quels
quelle
quelles
sans
soi

View file

@ -0,0 +1,161 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt
a
aínda
alí
aquel
aquela
aquelas
aqueles
aquilo
aquí
ao
aos
as
así
á
ben
cando
che
co
coa
comigo
con
connosco
contigo
convosco
coas
cos
cun
cuns
cunha
cunhas
da
dalgunha
dalgunhas
dalgún
dalgúns
das
de
del
dela
delas
deles
desde
deste
do
dos
dun
duns
dunha
dunhas
e
el
ela
elas
eles
en
era
eran
esa
esas
ese
eses
esta
estar
estaba
está
están
este
estes
estiven
estou
eu
é
facer
foi
foron
fun
había
hai
iso
isto
la
las
lle
lles
lo
los
mais
me
meu
meus
min
miña
miñas
moi
na
nas
neste
nin
no
non
nos
nosa
nosas
noso
nosos
nós
nun
nunha
nuns
nunhas
o
os
ou
ó
ós
para
pero
pode
pois
pola
polas
polo
polos
por
que
se
senón
ser
seu
seus
sexa
sido
sobre
súa
súas
tamén
tan
te
ten
teñen
teño
ter
teu
teus
ti
tido
tiña
tiven
túa
túas
un
unha
unhas
uns
vos
vosa
vosas
voso
vosos
vós

View file

@ -0,0 +1,229 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt
अंदर
अत
अपना
अपनी
अपने
अभी
आदि
आप
इत्यादि
इन
इनका
इन्हीं
इन्हें
इन्हों
इस
इसका
इसकी
इसके
इसमें
इसी
इसे
उन
उनका
उनकी
उनके
उनको
उन्हीं
उन्हें
उन्हों
उस
उसके
उसी
उसे
एक
एवं
एस
ऐसे
और
कई
कर
करता
करते
करना
करने
करें
कहते
कहा
का
काफ़ी
कि
कितना
किन्हें
किन्हों
किया
किर
किस
किसी
किसे
की
कुछ
कुल
के
को
कोई
कौन
कौनसा
गया
घर
जब
जहाँ
जा
जितना
जिन
जिन्हें
जिन्हों
जिस
जिसे
जीधर
जैसा
जैसे
जो
तक
तब
तरह
तिन
तिन्हें
तिन्हों
तिस
तिसे
तो
था
थी
थे
दबारा
दिया
दुसरा
दूसरे
दो
द्वारा
नहीं
ना
निहायत
नीचे
ने
पर
पर
पहले
पूरा
पे
फिर
बनी
बही
बहुत
बाद
बाला
बिलकुल
भी
भीतर
मगर
मानो
मे
में
यदि
यह
यहाँ
यही
या
यिह
ये
रखें
रहा
रहे
ऱ्वासा
लिए
लिये
लेकिन
वर्ग
वह
वह
वहाँ
वहीं
वाले
वुह
वे
वग़ैरह
संग
सकता
सकते
सबसे
सभी
साथ
साबुत
साभ
सारा
से
सो
ही
हुआ
हुई
हुए
है
हैं
हो
होता
होती
होते
होना
होने
# additional normalized forms of the above
अपनि
जेसे
होति
सभि
तिंहों
इंहों
दवारा
इसि
किंहें
थि
उंहों
ओर
जिंहें
वहिं
अभि
बनि
हि
उंहिं
उंहें
हें
वगेरह
एसे
रवासा
कोन
निचे
काफि
उसि
पुरा
भितर
हे
बहि
वहां
कोइ
यहां
जिंहों
तिंहें
किसि
कइ
यहि
इंहिं
जिधर
इंहें
अदि
इतयादि
हुइ
कोनसा
इसकि
दुसरे
जहां
अप
किंहों
उनकि
भि
वरग
हुअ
जेसा
नहिं

View file

@ -0,0 +1,200 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/hungarian_stop.txt
a
ahogy
ahol
aki
akik
akkor
alatt
által
általában
amely
amelyek
amelyekben
amelyeket
amelyet
amelynek
ami
amit
amolyan
amíg
amikor
át
abban
ahhoz
annak
arra
arról
az
azok
azon
azt
azzal
azért
aztán
azután
azonban
bár
be
belül
benne
cikk
cikkek
cikkeket
csak
de
e
eddig
egész
egy
egyes
egyetlen
egyéb
egyik
egyre
ekkor
el
elég
ellen
elő
először
előtt
első
én
éppen
ebben
ehhez
emilyen
ennek
erre
ez
ezt
ezek
ezen
ezzel
ezért
és
fel
felé
hanem
hiszen
hogy
hogyan
igen
így
illetve
ill.
ill
ilyen
ilyenkor
ison
ismét
itt
jól
jobban
kell
kellett
keresztül
keressünk
ki
kívül
között
közül
legalább
lehet
lehetett
legyen
lenne
lenni
lesz
lett
maga
magát
majd
majd
már
más
másik
meg
még
mellett
mert
mely
melyek
mi
mit
míg
miért
milyen
mikor
minden
mindent
mindenki
mindig
mint
mintha
mivel
most
nagy
nagyobb
nagyon
ne
néha
nekem
neki
nem
néhány
nélkül
nincs
olyan
ott
össze
ő
ők
őket
pedig
persze
s
saját
sem
semmi
sok
sokat
sokkal
számára
szemben
szerint
szinte
talán
tehát
teljes
tovább
továbbá
több
úgy
ugyanis
új
újabb
újra
után
utána
utolsó
vagy
vagyis
valaki
valami
valamint
való
vagyok
van
vannak
volt
voltam
voltak
voltunk
vissza
vele
viszont
volna

View file

@ -0,0 +1,46 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/hy/stopwords.txt
այդ
այլ
այն
այս
դու
դուք
եմ
են
ենք
ես
եք
է
էի
էին
էինք
էիր
էիք
էր
ըստ
թ
ի
ին
իսկ
իր
կամ
համար
հետ
հետո
մենք
մեջ
մի
ն
նա
նաև
նրա
նրանք
որ
որը
որոնք
որպես
ու
ում
պիտի
վրա
և

View file

@ -0,0 +1,358 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/id/stopwords.txt
ada
adanya
adalah
adapun
agak
agaknya
agar
akan
akankah
akhirnya
aku
akulah
amat
amatlah
anda
andalah
antar
diantaranya
antara
antaranya
diantara
apa
apaan
mengapa
apabila
apakah
apalagi
apatah
atau
ataukah
ataupun
bagai
bagaikan
sebagai
sebagainya
bagaimana
bagaimanapun
sebagaimana
bagaimanakah
bagi
bahkan
bahwa
bahwasanya
sebaliknya
banyak
sebanyak
beberapa
seberapa
begini
beginian
beginikah
beginilah
sebegini
begitu
begitukah
begitulah
begitupun
sebegitu
belum
belumlah
sebelum
sebelumnya
sebenarnya
berapa
berapakah
berapalah
berapapun
betulkah
sebetulnya
biasa
biasanya
bila
bilakah
bisa
bisakah
sebisanya
boleh
bolehkah
bolehlah
buat
bukan
bukankah
bukanlah
bukannya
cuma
percuma
dahulu
dalam
dan
dapat
dari
daripada
dekat
demi
demikian
demikianlah
sedemikian
dengan
depan
di
dia
dialah
dini
diri
dirinya
terdiri
dong
dulu
enggak
enggaknya
entah
entahlah
terhadap
terhadapnya
hal
hampir
hanya
hanyalah
harus
haruslah
harusnya
seharusnya
hendak
hendaklah
hendaknya
hingga
sehingga
ia
ialah
ibarat
ingin
inginkah
inginkan
ini
inikah
inilah
itu
itukah
itulah
jangan
jangankan
janganlah
jika
jikalau
juga
justru
kala
kalau
kalaulah
kalaupun
kalian
kami
kamilah
kamu
kamulah
kan
kapan
kapankah
kapanpun
dikarenakan
karena
karenanya
ke
kecil
kemudian
kenapa
kepada
kepadanya
ketika
seketika
khususnya
kini
kinilah
kiranya
sekiranya
kita
kitalah
kok
lagi
lagian
selagi
lah
lain
lainnya
melainkan
selaku
lalu
melalui
terlalu
lama
lamanya
selama
selama
selamanya
lebih
terlebih
bermacam
macam
semacam
maka
makanya
makin
malah
malahan
mampu
mampukah
mana
manakala
manalagi
masih
masihkah
semasih
masing
mau
maupun
semaunya
memang
mereka
merekalah
meski
meskipun
semula
mungkin
mungkinkah
nah
namun
nanti
nantinya
nyaris
oleh
olehnya
seorang
seseorang
pada
padanya
padahal
paling
sepanjang
pantas
sepantasnya
sepantasnyalah
para
pasti
pastilah
per
pernah
pula
pun
merupakan
rupanya
serupa
saat
saatnya
sesaat
saja
sajalah
saling
bersama
sama
sesama
sambil
sampai
sana
sangat
sangatlah
saya
sayalah
se
sebab
sebabnya
sebuah
tersebut
tersebutlah
sedang
sedangkan
sedikit
sedikitnya
segala
segalanya
segera
sesegera
sejak
sejenak
sekali
sekalian
sekalipun
sesekali
sekaligus
sekarang
sekarang
sekitar
sekitarnya
sela
selain
selalu
seluruh
seluruhnya
semakin
sementara
sempat
semua
semuanya
sendiri
sendirinya
seolah
seperti
sepertinya
sering
seringnya
serta
siapa
siapakah
siapapun
disini
disinilah
sini
sinilah
sesuatu
sesuatunya
suatu
sesudah
sesudahnya
sudah
sudahkah
sudahlah
supaya
tadi
tadinya
tak
tanpa
setelah
telah
tentang
tentu
tentulah
tentunya
tertentu
seterusnya
tapi
tetapi
setiap
tiap
setidaknya
tidak
tidakkah
tidaklah
toh
waduh
wah
wahai
sewaktu
walau
walaupun
wong
yaitu
yakni
yang

View file

@ -0,0 +1,280 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/italian_stop.txt
ad
al
allo
ai
agli
all
agl
alla
alle
con
col
coi
da
dal
dallo
dai
dagli
dall
dagl
dalla
dalle
di
del
dello
dei
degli
dell
degl
della
delle
in
nel
nello
nei
negli
nell
negl
nella
nelle
su
sul
sullo
sui
sugli
sull
sugl
sulla
sulle
per
tra
contro
io
tu
lui
lei
noi
voi
loro
mio
mia
miei
mie
tuo
tua
tuoi
tue
suo
sua
suoi
sue
nostro
nostra
nostri
nostre
vostro
vostra
vostri
vostre
mi
ti
ci
vi
lo
la
li
le
gli
ne
il
un
uno
una
ma
ed
se
perche
anche
come
dov
dove
che
chi
cui
non
piu
quale
quanto
quanti
quanta
quante
quello
quelli
quella
quelle
questo
questi
questa
queste
si
tutto
tutti
a
c
e
i
l
o
ho
hai
ha
abbiamo
avete
hanno
abbia
abbiate
abbiano
avro
avrai
avra
avremo
avrete
avranno
avrei
avresti
avrebbe
avremmo
avreste
avrebbero
avevo
avevi
aveva
avevamo
avevate
avevano
ebbi
avesti
ebbe
avemmo
aveste
ebbero
avessi
avesse
avessimo
avessero
avendo
avuto
avuta
avuti
avute
sono
sei
e
siamo
siete
sia
siate
siano
saro
sarai
sara
saremo
sarete
saranno
sarei
saresti
sarebbe
saremmo
sareste
sarebbero
ero
eri
era
eravamo
eravate
erano
fui
fosti
fu
fummo
foste
furono
fossi
fosse
fossimo
fossero
essendo
faccio
fai
facciamo
fanno
faccia
facciate
facciano
faro
farai
fara
faremo
farete
faranno
farei
faresti
farebbe
faremmo
fareste
farebbero
facevo
facevi
faceva
facevamo
facevate
facevano
feci
facesti
fece
facemmo
faceste
fecero
facessi
facesse
facessimo
facessero
facendo
sto
stai
sta
stiamo
stanno
stia
stiate
stiano
staro
starai
stara
staremo
starete
staranno
starei
staresti
starebbe
staremmo
stareste
starebbero
stavo
stavi
stava
stavamo
stavate
stavano
stetti
stesti
stette
stemmo
steste
stettero
stessi
stesse
stessimo
stessero
stando

View file

@ -0,0 +1,126 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/lt/stopwords.txt
ant
apie
ar
arba
be
bei
bet
bus
būti
būtų
buvo
dėl
gali
į
iki
ir
ja
jai
jais
jam
jame
jas
jei
ji
jie
jiedu
jiedvi
jiedviem
jiedviese
jiems
jis
jo
jodviem
jog
joje
jomis
joms
jos
jose
judu
judvi
judviejų
jųdviejų
judviem
judviese
jumis
jums
jumyse
juo
juodu
juodviese
juos
juose
jus
jūs
jūsų
kad
kai
kaip
kas
kiek
kol
kur
kurie
kuris
man
mane
manęs
manimi
mano
manyje
mes
metu
mudu
mudvi
mudviejų
mudviem
mudviese
mumis
mums
mumyse
mus
mūsų
nei
nes
net
nors
nuo
o
pat
per
po
prie
prieš
sau
save
savęs
savimi
savo
savyje
su
tačiau
tada
tai
taip
tas
tau
tave
tavęs
tavimi
tavyje
ten
to
todėl
tu
tuo
visi
yra

View file

@ -0,0 +1,168 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/lv/stopwords.txt
aiz
ap
ar
apakš
ārpus
augšpus
bez
caur
dēļ
gar
iekš
iz
kopš
labad
lejpus
līdz
no
otrpus
pa
par
pār
pēc
pie
pirms
pret
priekš
starp
šaipus
uz
viņpus
virs
virspus
zem
apakšpus
# Conjunctions
un
bet
jo
ja
ka
lai
tomēr
tikko
turpretī
arī
kaut
gan
tādēļ
ne
tikvien
vien
ir
te
vai
kamēr
# Particles
ar
diezin
droši
diemžēl
nebūt
ik
it
taču
nu
pat
tiklab
iekšpus
nedz
tik
nevis
turpretim
jeb
iekam
iekām
iekāms
kolīdz
līdzko
tiklīdz
jebšu
tālab
tāpēc
nekā
itin
jau
jel
nezin
tad
tikai
vis
tak
iekams
vien
# modal verbs
būt
biju
biji
bija
bijām
bijāt
esmu
esi
esam
esat
būšu
būsi
būs
būsim
būsiet
tikt
tiku
tiki
tika
tikām
tikāt
tieku
tiec
tiek
tiekam
tiekat
tikšu
tiks
tiksim
tiksiet
tapt
tapi
tapāt
topat
tapšu
tapsi
taps
tapsim
tapsiet
kļūt
kļuvu
kļuvi
kļuva
kļuvām
kļuvāt
kļūstu
kļūsti
kļūst
kļūstam
kļūstat
kļūšu
kļūsi
kļūs
kļūsim
kļūsiet
# verbs
varēt
varēju
varējām
varēšu
varēsim
var
varēji
varējāt
varēsi
varēsiet
varat
varēja
varēs

View file

@ -0,0 +1,102 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/dutch_stop.txt
de
en
van
ik
te
dat
die
in
een
hij
het
niet
zijn
is
was
op
aan
met
als
voor
had
er
maar
om
hem
dan
zou
of
wat
mijn
men
dit
zo
door
over
ze
zich
bij
ook
tot
je
mij
uit
der
daar
haar
naar
heb
hoe
heeft
hebben
deze
u
want
nog
zal
me
zij
nu
ge
geen
omdat
iets
worden
toch
al
waren
veel
meer
doen
toen
moet
ben
zonder
kan
hun
dus
alles
onder
ja
eens
hier
wie
werd
altijd
doch
wordt
wezen
kunnen
ons
zelf
tegen
na
reeds
wil
kon
niets
uw
iemand
geweest
andere

View file

@ -0,0 +1,177 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/norwegian_stop.txt
og
i
jeg
det
at
en
et
den
til
er
som
pa
de
med
han
av
ikke
ikkje
der
sa
var
meg
seg
men
ett
har
om
vi
min
mitt
ha
hadde
hun
na
over
da
ved
fra
du
ut
sin
dem
oss
opp
man
kan
hans
hvor
eller
hva
skal
selv
sjol
her
alle
vil
bli
ble
blei
blitt
kunne
inn
nar
vare
kom
noen
noe
ville
dere
som
deres
kun
ja
etter
ned
skulle
denne
for
deg
si
sine
sitt
mot
a
meget
hvorfor
dette
disse
uten
hvordan
ingen
din
ditt
blir
samme
hvilken
hvilke
sann
inni
mellom
var
hver
hvem
vors
hvis
bade
bare
enn
fordi
for
mange
ogsa
slik
vart
vare
bae
begge
siden
dykk
dykkar
dei
deira
deires
deim
di
da
eg
ein
eit
eitt
elles
honom
hja
ho
hoe
henne
hennar
hennes
hoss
hossen
ikkje
ingi
inkje
korleis
korso
kva
kvar
kvarhelst
kven
kvi
kvifor
me
medan
mi
mine
mykje
no
nokon
noka
nokor
noko
nokre
si
sia
sidan
so
somt
somme
um
upp
vere
vore
verte
vort
varte
vart

View file

@ -0,0 +1,129 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/br/stopwords.txt
a
ainda
alem
ambas
ambos
antes
ao
aonde
aos
apos
aquele
aqueles
as
assim
com
como
contra
contudo
cuja
cujas
cujo
cujos
da
das
de
dela
dele
deles
demais
depois
desde
desta
deste
dispoe
dispoem
diversa
diversas
diversos
do
dos
durante
e
ela
elas
ele
eles
em
entao
entre
essa
essas
esse
esses
esta
estas
este
estes
ha
isso
isto
logo
mais
mas
mediante
menos
mesma
mesmas
mesmo
mesmos
na
nas
nao
nas
nem
nesse
neste
nos
o
os
ou
outra
outras
outro
outros
pelas
pelas
pelo
pelos
perante
pois
por
porque
portanto
proprio
propios
quais
qual
qualquer
quando
quanto
que
quem
quer
se
seja
sem
sendo
seu
seus
sob
sobre
sua
suas
tal
tambem
teu
teus
toda
todas
todo
todos
tua
tuas
tudo
um
uma
umas
uns

View file

@ -0,0 +1,204 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/portuguese_stop.txt
de
a
o
que
e
do
da
em
um
para
com
não
uma
os
no
se
na
por
mais
as
dos
como
mas
ao
ele
das
à
seu
sua
ou
quando
muito
nos
eu
também
pelo
pela
até
isso
ela
entre
depois
sem
mesmo
aos
seus
quem
nas
me
esse
eles
você
essa
num
nem
suas
meu
às
minha
numa
pelos
elas
qual
nós
lhe
deles
essas
esses
pelas
este
dele
tu
te
vocês
vos
lhes
meus
minhas
teu
tua
teus
tuas
nosso
nossa
nossos
nossas
dela
delas
esta
estes
estas
aquele
aquela
aqueles
aquelas
isto
aquilo
estou
está
estamos
estão
estive
esteve
estivemos
estiveram
estava
estávamos
estavam
estivera
estivéramos
esteja
estejamos
estejam
estivesse
estivéssemos
estivessem
estiver
estivermos
estiverem
hei
havemos
hão
houve
houvemos
houveram
houvera
houvéramos
haja
hajamos
hajam
houvesse
houvéssemos
houvessem
houver
houvermos
houverem
houverei
houverá
houveremos
houverão
houveria
houveríamos
houveriam
sou
somos
são
era
éramos
eram
fui
foi
fomos
foram
fora
fôramos
seja
sejamos
sejam
fosse
fôssemos
fossem
for
formos
forem
serei
será
seremos
serão
seria
seríamos
seriam
tenho
tem
temos
tém
tinha
tínhamos
tinham
tive
teve
tivemos
tiveram
tivera
tivéramos
tenha
tenhamos
tenham
tivesse
tivéssemos
tivessem
tiver
tivermos
tiverem
terei
terá
teremos
terão
teria
teríamos
teriam

View file

@ -0,0 +1,231 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt
acea
aceasta
această
aceea
acei
aceia
acel
acela
acele
acelea
acest
acesta
aceste
acestea
aceşti
aceştia
acolo
acum
ai
aia
aibă
aici
al
ăla
ale
alea
ălea
altceva
altcineva
am
ar
are
aşadar
asemenea
asta
ăsta
astăzi
astea
ăstea
ăştia
asupra
aţi
au
avea
avem
aveţi
azi
bine
bucur
bună
ca
căci
când
care
cărei
căror
cărui
cât
câte
câţi
către
câtva
ce
cel
ceva
chiar
cînd
cine
cineva
cît
cîte
cîţi
cîtva
contra
cu
cum
cumva
curând
curînd
da
dacă
dar
datorită
de
deci
deja
deoarece
departe
deşi
din
dinaintea
dintr
dintre
drept
după
ea
ei
el
ele
eram
este
eşti
eu
face
fără
fi
fie
fiecare
fii
fim
fiţi
iar
ieri
îi
îl
îmi
împotriva
în
înainte
înaintea
încât
încît
încotro
între
întrucât
întrucît
îţi
la
lângă
le
li
lîngă
lor
lui
mâine
mea
mei
mele
mereu
meu
mi
mine
mult
multă
mulţi
ne
nicăieri
nici
nimeni
nişte
noastră
noastre
noi
noştri
nostru
nu
ori
oricând
oricare
oricât
orice
oricînd
oricine
oricît
oricum
oriunde
până
pe
pentru
peste
pînă
poate
pot
prea
prima
primul
prin
printr
sa
săi
sale
sau
său
se
şi
sînt
sîntem
sînteţi
spre
sub
sunt
suntem
sunteţi
ta
tăi
tale
tău
te
ţi
ţie
tine
toată
toate
tot
toţi
totuşi
tu
un
una
unde
undeva
unei
unele
uneori
unor
vi
voastră
voastre
voi
voştri
vostru
vouă
vreo
vreun

View file

@ -0,0 +1,160 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/russian_stop.txt
и
в
во
не
что
он
на
я
с
со
как
а
то
все
она
так
его
но
да
ты
к
у
же
вы
за
бы
по
только
ее
мне
было
вот
от
меня
еще
нет
о
из
ему
теперь
когда
даже
ну
вдруг
ли
если
уже
или
ни
быть
был
него
до
вас
нибудь
опять
уж
вам
сказал
ведь
там
потом
себя
ничего
ей
может
они
тут
где
есть
надо
ней
для
мы
тебя
их
чем
была
сам
чтоб
без
будто
человек
чего
раз
тоже
себе
под
жизнь
будет
ж
тогда
кто
этот
говорил
того
потому
этого
какой
совсем
ним
здесь
этом
один
почти
мой
тем
чтобы
нее
кажется
сейчас
были
куда
зачем
сказать
всех
никогда
сегодня
можно
при
наконец
два
об
другой
хоть
после
над
больше
тот
через
эти
нас
про
всего
них
какая
много
разве
сказала
три
эту
моя
впрочем
хорошо
свою
этой
перед
иногда
лучше
чуть
том
нельзя
такой
им
более
всегда
конечно
всю
между

View file

@ -0,0 +1,115 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/swedish_stop.txt
och
det
att
i
en
jag
hon
som
han
den
med
var
sig
för
till
är
men
ett
om
hade
de
av
icke
mig
du
henne
sin
nu
har
inte
hans
honom
skulle
hennes
där
min
man
ej
vid
kunde
något
från
ut
när
efter
upp
vi
dem
vara
vad
över
än
dig
kan
sina
här
ha
mot
alla
under
någon
eller
allt
mycket
sedan
ju
denna
själv
detta
åt
utan
varit
hur
ingen
mitt
ni
bli
blev
oss
din
dessa
några
deras
blir
mina
samma
vilken
er
sådan
vår
blivit
dess
inom
mellan
sådant
varför
varje
vilka
ditt
vem
vilket
sitta
sådana
vart
dina
vars
vårt
våra
ert
era
vilkas

View file

@ -0,0 +1,116 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/th/stopwords.txt
ไว้
ไม่
ไป
ได้
ให้
ใน
โดย
แห่ง
แล้ว
และ
แรก
แบบ
แต่
เอง
เห็น
เลย
เริ่ม
เรา
เมื่อ
เพื่อ
เพราะ
เป็นการ
เป็น
เปิดเผย
เปิด
เนื่องจาก
เดียวกัน
เดียว
เช่น
เฉพาะ
เคย
เข้า
เขา
อีก
อาจ
อะไร
ออก
อย่าง
อยู่
อยาก
หาก
หลาย
หลังจาก
หลัง
หรือ
หนึ่ง
ส่วน
ส่ง
สุด
สําหรับ
ว่า
วัน
ลง
ร่วม
ราย
รับ
ระหว่าง
รวม
ยัง
มี
มาก
มา
พร้อม
พบ
ผ่าน
ผล
บาง
น่า
นี้
นํา
นั้น
นัก
นอกจาก
ทุก
ที่สุด
ที่
ทําให้
ทํา
ทาง
ทั้งนี้
ทั้ง
ถ้า
ถูก
ถึง
ต้อง
ต่างๆ
ต่าง
ต่อ
ตาม
ตั้งแต่
ตั้ง
ด้าน
ด้วย
ดัง
ซึ่ง
ช่วง
จึง
จาก
จัด
จะ
คือ
ความ
ครั้ง
คง
ขึ้น
ของ
ขอ
ขณะ
ก่อน
ก็
การ
กับ
กัน
กว่า
กล่าว

View file

@ -0,0 +1,210 @@
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/resources/org/apache/lucene/analysis/tr/stopwords.txt
acaba
altmış
altı
ama
ancak
arada
aslında
ayrıca
bana
bazı
belki
ben
benden
beni
benim
beri
beş
bile
bin
bir
birçok
biri
birkaç
birkez
birşey
birşeyi
biz
bize
bizden
bizi
bizim
böyle
böylece
bu
buna
bunda
bundan
bunlar
bunları
bunların
bunu
bunun
burada
çok
çünkü
da
daha
dahi
de
defa
değil
diğer
diye
doksan
dokuz
dolayı
dolayısıyla
dört
edecek
eden
ederek
edilecek
ediliyor
edilmesi
ediyor
eğer
elli
en
etmesi
etti
ettiği
ettiğini
gibi
göre
halen
hangi
hatta
hem
henüz
hep
hepsi
her
herhangi
herkesin
hiç
hiçbir
için
iki
ile
ilgili
ise
işte
itibaren
itibariyle
kadar
karşın
katrilyon
kendi
kendilerine
kendini
kendisi
kendisine
kendisini
kez
ki
kim
kimden
kime
kimi
kimse
kırk
milyar
milyon
mu
mı
nasıl
ne
neden
nedenle
nerde
nerede
nereye
niye
niçin
o
olan
olarak
oldu
olduğu
olduğunu
olduklarını
olmadı
olmadığı
olmak
olması
olmayan
olmaz
olsa
olsun
olup
olur
olursa
oluyor
on
ona
ondan
onlar
onlardan
onları
onların
onu
onun
otuz
oysa
öyle
pek
rağmen
sadece
sanki
sekiz
seksen
sen
senden
seni
senin
siz
sizden
sizi
sizin
şey
şeyden
şeyi
şeyler
şöyle
şu
şuna
şunda
şundan
şunları
şunu
tarafından
trilyon
tüm
üç
üzere
var
vardı
ve
veya
ya
yani
yapacak
yapılan
yapılması
yapıyor
yapmak
yaptı
yaptığı
yaptığını
yaptıkları
yedi
yerine
yetmiş
yine
yirmi
yoksa
yüz
zaten

View file

@ -0,0 +1,77 @@
bị
bởi
cả
các
cái
cần
càng
chỉ
chiếc
cho
chứ
chưa
chuyện
có thể
cứ
của
cùng
cũng
đã
đang
đây
để
đến nỗi
đều
điều
do
đó
được
dưới
khi
không
lại
lên
lúc
mỗi
một cách
này
nên
nếu
ngay
nhiều
như
nhưng
những
nơi
nữa
phải
qua
ra
rằng
rằng
rất
rất
rồi
sau
sẽ
so
sự
tại
theo
thì
trên
trước
từ
từng
vẫn
vào
vậy
việc
với
vừa