modify index mapping

This commit is contained in:
Shinsuke Sugaya 2015-09-23 08:41:06 +09:00
parent 915b1addcc
commit f7e43cf005
8 changed files with 620 additions and 112 deletions

18
pom.xml
View file

@ -62,7 +62,7 @@
<pdfbox.version>1.8.7</pdfbox.version>
<!-- Elasticsearch -->
<elasticsearch.version>1.7.1</elasticsearch.version>
<elasticsearch.version>1.7.2</elasticsearch.version>
<cluster.runner.version>1.7.0.0</cluster.runner.version>
<!-- Tomcat -->
@ -232,16 +232,24 @@
<mkdir dir="${basedir}/target/plugins" />
<get dest="${basedir}/target/plugins">
<url
url="${maven.snapshot.repo.url}/org/codelibs/elasticsearch-configsync/1.6.0-SNAPSHOT/elasticsearch-configsync-1.6.0-20150716.075532-2.zip" />
url="${maven.release.repo.url}/org/codelibs/elasticsearch-analysis-kuromoji-neologd/1.7.1/elasticsearch-analysis-kuromoji-neologd-1.7.1.zip" />
<url
url="${maven.release.repo.url}/org/codelibs/elasticsearch-analysis-kuromoji-neologd/1.6.0/elasticsearch-analysis-kuromoji-neologd-1.6.0.zip" />
url="${maven.release.repo.url}/org/codelibs/elasticsearch-analysis-synonym/1.5.0/elasticsearch-analysis-synonym-1.5.0.zip" />
<url
url="${maven.snapshot.repo.url}/org/codelibs/elasticsearch-configsync/1.6.0-SNAPSHOT/elasticsearch-configsync-1.6.0-20150820.025903-5.zip" />
<url
url="${maven.snapshot.repo.url}/org/codelibs/elasticsearch-langfield/1.7.0-SNAPSHOT/elasticsearch-langfield-1.7.0-20150922.221718-3.zip" />
</get>
<delete dir="${basedir}/plugins" />
<mkdir dir="${basedir}/plugins" />
<unzip dest="${basedir}/plugins/analysis-kuromoji-neologd"
src="${basedir}/target/plugins/elasticsearch-analysis-kuromoji-neologd-1.6.0.zip" />
src="${basedir}/target/plugins/elasticsearch-analysis-kuromoji-neologd-1.7.1.zip" />
<unzip dest="${basedir}/plugins/analysis-synonym"
src="${basedir}/target/plugins/elasticsearch-analysis-synonym-1.5.0.zip" />
<unzip dest="${basedir}/plugins/configsync"
src="${basedir}/target/plugins/elasticsearch-configsync-1.6.0-20150716.075532-2.zip" />
src="${basedir}/target/plugins/elasticsearch-configsync-1.6.0-20150820.025903-5.zip" />
<unzip dest="${basedir}/plugins/langfield"
src="${basedir}/target/plugins/elasticsearch-langfield-1.7.0-20150922.221718-3.zip" />
</tasks>
</configuration>
<goals>

View file

@ -314,6 +314,15 @@ public class FessEsClient implements Client {
logger.warn("Failed to register " + filePath, e);
}
});
try (CurlResponse response = Curl.post(runner.node(), "_configsync/flush").execute()) {
if (response.getHttpStatusCode() == 200) {
logger.info("Flushed config files.");
} else {
logger.warn("Failed to flush config files.");
}
} catch (final Exception e) {
logger.warn("Failed to flush config files.", e);
}
}
try {
@ -719,10 +728,10 @@ public class FessEsClient implements Client {
}
}
// highlighting
if (ComponentUtil.getQueryHelper().getHighlightingFields() != null
&& ComponentUtil.getQueryHelper().getHighlightingFields().length != 0) {
for (final String hf : ComponentUtil.getQueryHelper().getHighlightingFields()) {
searchRequestBuilder.addHighlightedField(hf, ComponentUtil.getQueryHelper().getHighlightSnippetSize());
if (ComponentUtil.getQueryHelper().getHighlightedFields() != null
&& ComponentUtil.getQueryHelper().getHighlightedFields().length != 0) {
for (final String hf : ComponentUtil.getQueryHelper().getHighlightedFields()) {
searchRequestBuilder.addHighlightedField(hf, ComponentUtil.getQueryHelper().getHighlightFragmentSize());
}
}

View file

@ -89,7 +89,7 @@ public class QueryHelper implements Serializable {
protected String[] responseDocValuesFields;
protected String[] highlightingFields;
protected String[] highlightedFields;
protected String[] searchFields;
@ -103,7 +103,7 @@ public class QueryHelper implements Serializable {
protected String[] supportedAnalysisFields;
protected int highlightSnippetSize = 5;
protected int highlightFragmentSize = 100;
protected boolean useBigram = true;
@ -121,7 +121,7 @@ public class QueryHelper implements Serializable {
protected List<SortField> defaultSortFieldList = new ArrayList<SortField>();
protected String highlightingPrefix = "hl_";
protected String highlightPrefix = "hl_";
protected String minimumShouldMatch = "100%";
@ -158,8 +158,8 @@ public class QueryHelper implements Serializable {
if (responseDocValuesFields == null) {
responseDocValuesFields = new String[] { fieldHelper.clickCountField, fieldHelper.favoriteCountField };
}
if (highlightingFields == null) {
highlightingFields = new String[] { fieldHelper.contentField };
if (highlightedFields == null) {
highlightedFields = new String[] { fieldHelper.contentField };
}
if (searchFields == null) {
searchFields =
@ -1112,17 +1112,17 @@ public class QueryHelper implements Serializable {
}
/**
* @return the highlightingFields
* @return the highlightedFields
*/
public String[] getHighlightingFields() {
return highlightingFields;
public String[] getHighlightedFields() {
return highlightedFields;
}
/**
* @param highlightingFields the highlightingFields to set
* @param highlightedFields the highlightedFields to set
*/
public void setHighlightingFields(final String[] highlightingFields) {
this.highlightingFields = highlightingFields;
public void setHighlightedFields(final String[] highlightedFields) {
this.highlightedFields = highlightedFields;
}
/**
@ -1186,17 +1186,17 @@ public class QueryHelper implements Serializable {
}
/**
* @return the highlightSnippetSize
* @return the highlightFragmentSize
*/
public int getHighlightSnippetSize() {
return highlightSnippetSize;
public int getHighlightFragmentSize() {
return highlightFragmentSize;
}
/**
* @param highlightSnippetSize the highlightSnippetSize to set
* @param highlightFragmentSize the highlightFragmentSize to set
*/
public void setHighlightSnippetSize(final int highlightSnippetSize) {
this.highlightSnippetSize = highlightSnippetSize;
public void setHighlightFragmentSize(final int highlightFragmentSize) {
this.highlightFragmentSize = highlightFragmentSize;
}
/**
@ -1290,12 +1290,12 @@ public class QueryHelper implements Serializable {
return defaultSortFieldList.toArray(new SortField[defaultSortFieldList.size()]);
}
public void setHighlightingPrefix(final String highlightingPrefix) {
this.highlightingPrefix = highlightingPrefix;
public void setHighlightPrefix(final String highlightPrefix) {
this.highlightPrefix = highlightPrefix;
}
public String getHighlightingPrefix() {
return highlightingPrefix;
public String getHighlightPrefix() {
return highlightPrefix;
}
public String[] getSupportedMltFields() {

View file

@ -97,7 +97,7 @@ public class QueryResponseList implements List<Map<String, Object>> {
// build highlighting fields
final QueryHelper queryHelper = ComponentUtil.getQueryHelper();
ComponentUtil.getFieldHelper();
final String hlPrefix = queryHelper.getHighlightingPrefix();
final String hlPrefix = queryHelper.getHighlightPrefix();
for (final SearchHit searchHit : searchHits.getHits()) {
final Map<String, Object> docMap = new HashMap<String, Object>();
if (searchHit.getSource() == null) {

View file

@ -9,11 +9,11 @@
<!-- Dictionaries -->
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ja/mapping.txt"</arg>
<arg>"synonym.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>
<arg>"ja/synonym.txt"</arg>
<arg>"ja/mapping.txt"</arg>
</postConstruct>
<postConstruct name="addConfigFile">
<arg>"fess"</arg>

View file

@ -4,6 +4,65 @@
"refresh_interval": "1s",
"number_of_shards": 5,
"number_of_replicas": 0
},
"analysis": {
"char_filter": {
"mapping_ja_cfilter": {
"type": "mapping",
"mappings_path": "ja/mapping.txt"
}
},
"filter": {
"stemmer_en_tfilter": {
"type": "stemmer",
"name": "english"
}
},
"tokenizer": {
"kuromoji_neologd_tokenizer": {
"type": "reloadable_kuromoji_neologd_tokenizer",
"mode": "normal",
"user_dictionary": "ja/kuromoji.txt",
"discard_punctuation": false,
"reload_interval":"1m"
},
"2gram_synonym_tokenizer": {
"type": "ngram_synonym",
"n": "2",
"synonyms_path": "synonym.txt",
"dynamic_reload":true,
"reload_interval":"1m"
}
},
"analyzer": {
"japanese_analyzer": {
"type": "custom",
"char_filter": [
"mapping_ja_cfilter",
"kuromoji_neologd_iteration_mark"
],
"tokenizer": "kuromoji_neologd_tokenizer",
"filter": [
"kuromoji_neologd_baseform",
"kuromoji_neologd_stemmer",
"kuromoji_neologd_part_of_speech"
]
},
"english_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"stemmer_en_tfilter"
]
},
"standard_analyzer": {
"type": "custom",
"char_filter": [
"mapping_ja_cfilter"
],
"tokenizer": "2gram_synonym_tokenizer"
}
}
}
}
}

View file

@ -1,84 +1,516 @@
{
"doc" : {
"_all" : {
"enabled" : false
},
"_id" : {
"path" : "id"
},
"properties" : {
"anchor" : {
"type" : "string",
"index" : "not_analyzed"
},
"boost" : {
"type" : "float"
},
"click_count" : {
"type" : "long"
},
"config_id" : {
"type" : "string",
"index" : "not_analyzed"
},
"content" : {
"type" : "string"
},
"content_length" : {
"type" : "long"
},
"created" : {
"type" : "long"
},
"digest" : {
"type" : "string"
},
"doc_id" : {
"type" : "string",
"index" : "not_analyzed"
},
"favorite_count" : {
"type" : "long"
},
"filetype" : {
"type" : "string",
"index" : "not_analyzed"
},
"host" : {
"type" : "string",
"index" : "not_analyzed"
},
"id" : {
"type" : "string",
"index" : "not_analyzed"
},
"lang" : {
"type" : "string",
"index" : "not_analyzed"
},
"mimetype" : {
"type" : "string",
"index" : "not_analyzed"
},
"parent_id" : {
"type" : "string",
"index" : "not_analyzed"
},
"segment" : {
"type" : "string",
"index" : "not_analyzed"
},
"site" : {
"type" : "string",
"index" : "not_analyzed"
},
"title" : {
"type" : "string"
},
"url" : {
"type" : "string",
"index" : "not_analyzed"
"doc": {
"_all": {
"enabled": false
},
"_source": {
"enabled": true
},
"_id": {
"path": "id"
},
"dynamic_templates": [
{
"lang_ar": {
"match": "*_ar",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_bg": {
"match": "*_bg",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_bn": {
"match": "*_bn",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_ca": {
"match": "*_ca",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_cs": {
"match": "*_cs",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_da": {
"match": "*_da",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_de": {
"match": "*_de",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_el": {
"match": "*_el",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_en": {
"match": "*_en",
"mapping": {
"type": "string",
"analyzer": "english_analyzer"
}
}
},
{
"lang_es": {
"match": "*_es",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_et": {
"match": "*_et",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_fa": {
"match": "*_fa",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_fi": {
"match": "*_fi",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_fr": {
"match": "*_fr",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_gu": {
"match": "*_gu",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_he": {
"match": "*_he",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_hi": {
"match": "*_hi",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_hr": {
"match": "*_hr",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_hu": {
"match": "*_hu",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_id": {
"match": "*_id",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_it": {
"match": "*_it",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_ja": {
"match": "*_ja",
"mapping": {
"type": "string",
"analyzer": "japanese_analyzer"
}
}
},
{
"lang_ko": {
"match": "*_ko",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_lt": {
"match": "*_lt",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_lv": {
"match": "*_lv",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_mk": {
"match": "*_mk",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_ml": {
"match": "*_ml",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_nl": {
"match": "*_nl",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_no": {
"match": "*_no",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_pa": {
"match": "*_pa",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_pl": {
"match": "*_pl",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_pt": {
"match": "*_pt",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_ro": {
"match": "*_ro",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_ru": {
"match": "*_ru",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_si": {
"match": "*_si",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_sq": {
"match": "*_sq",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_sv": {
"match": "*_sv",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_ta": {
"match": "*_ta",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_te": {
"match": "*_te",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_th": {
"match": "*_th",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_tl": {
"match": "*_tl",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_tr": {
"match": "*_tr",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_uk": {
"match": "*_uk",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_ur": {
"match": "*_ur",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_vi": {
"match": "*_vi",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_zh-cn": {
"match": "*_zh-cn",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
},
{
"lang_zh-tw": {
"match": "*_zh-tw",
"mapping": {
"type": "string",
"analyzer": "standard_analyzer"
}
}
}
],
"properties": {
"anchor": {
"type": "string",
"index": "not_analyzed"
},
"boost": {
"type": "float"
},
"click_count": {
"type": "long"
},
"config_id": {
"type": "string",
"index": "not_analyzed"
},
"content": {
"type": "langstring",
"analyzer": "standard_analyzer",
"term_vector": "with_positions_offsets"
},
"content_length": {
"type": "long"
},
"created": {
"type": "long"
},
"digest": {
"type": "string"
},
"doc_id": {
"type": "string",
"index": "not_analyzed"
},
"favorite_count": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"host": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "string",
"index": "not_analyzed"
},
"lang": {
"type": "string",
"index": "not_analyzed"
},
"mimetype": {
"type": "string",
"index": "not_analyzed"
},
"parent_id": {
"type": "string",
"index": "not_analyzed"
},
"segment": {
"type": "string",
"index": "not_analyzed"
},
"site": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "langstring",
"analyzer": "standard_analyzer",
"term_vector": "with_positions_offsets"
},
"url": {
"type": "string",
"index": "not_analyzed"
}
}
}
}