fix #2277 add index.filetype

This commit is contained in:
Shinsuke Sugaya 2019-10-19 21:53:33 +09:00
parent afcf896695
commit 6bcefd9b63
9 changed files with 265 additions and 252 deletions

View file

@ -15,12 +15,21 @@
*/
package org.codelibs.fess.entity;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.annotation.PostConstruct;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.util.ComponentUtil;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FacetInfo {
private static final Logger logger = LoggerFactory.getLogger(FacetInfo.class);
public String[] field;
public String[] query;
@ -33,6 +42,25 @@ public class FacetInfo {
public String missing;
@PostConstruct
public void init() {
final String[] fileTypes = ComponentUtil.getFileTypeHelper().getTypes();
if (fileTypes.length > 0) {
final List<String> queryList = new ArrayList<>();
for (String s : query) {
queryList.add(s);
}
final String field = ComponentUtil.getFessConfig().getIndexFieldFiletype();
for (String s : fileTypes) {
queryList.add(field + ":" + s);
}
query = queryList.toArray(n -> new String[n]);
if (logger.isDebugEnabled()) {
logger.debug("loaded facet query: {}", queryList);
}
}
}
public BucketOrder getBucketOrder() {
if (StringUtil.isNotBlank(sort)) {
final String[] values = sort.split("\\.");

View file

@ -15,14 +15,44 @@
*/
package org.codelibs.fess.entity;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import javax.annotation.PostConstruct;
import org.codelibs.fess.util.ComponentUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FacetQueryView {
private static final Logger logger = LoggerFactory.getLogger(FacetQueryView.class);
protected String title;
protected Map<String, String> queryMap = new LinkedHashMap<>();
@PostConstruct
public void init() {
final String filetypeField = ComponentUtil.getFessConfig().getIndexFieldFiletype();
Collection<String> values = queryMap.values();
if (values.stream().anyMatch(s -> s.startsWith(filetypeField))) {
final String[] fileTypes = ComponentUtil.getFileTypeHelper().getTypes();
for (String fileType : fileTypes) {
final String value = filetypeField + ":" + fileType;
if (!values.contains(value)) {
queryMap.put(fileType.toUpperCase(Locale.ROOT), value);
}
}
queryMap.remove("labels.facet_filetype_others");
queryMap.put("labels.facet_filetype_others", "filetype:others");
if (logger.isDebugEnabled()) {
logger.debug("updated query map: {}", queryMap);
}
}
}
public String getTitle() {
return title;
}

View file

@ -18,14 +18,36 @@ package org.codelibs.fess.helper;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.PostConstruct;
import org.apache.commons.lang3.StringUtils;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.util.ComponentUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FileTypeHelper {
private static final Logger logger = LoggerFactory.getLogger(FileTypeHelper.class);
protected String defaultValue = "others";
protected Map<String, String> mimetypeMap = new HashMap<>();
@PostConstruct
public void init() {
StreamUtil.split(ComponentUtil.getFessConfig().getIndexFiletype(), "\n").of(
stream -> stream.filter(StringUtil::isNotBlank).forEach(s -> {
final String[] values = StringUtils.split(s, "=", 2);
if (values.length == 2) {
mimetypeMap.put(values[0], values[1]);
}
}));
if (logger.isDebugEnabled()) {
logger.debug("loaded filetype: {}", mimetypeMap);
}
}
public void add(final String mimetype, final String filetype) {
mimetypeMap.put(mimetype, filetype);
}
@ -45,4 +67,8 @@ public class FileTypeHelper {
public void setDefaultValue(final String defaultValue) {
this.defaultValue = defaultValue;
}
public String[] getTypes() {
return mimetypeMap.values().stream().distinct().toArray(n -> new String[n]);
}
}

View file

@ -636,6 +636,60 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. 1m */
String INDEX_INDICES_TIMEOUT = "index.indices.timeout";
/** The key of the configuration. e.g. text/html=html
application/msword=word
application/vnd.openxmlformats-officedocument.wordprocessingml.document=word
application/vnd.ms-excel=excel
application/vnd.ms-excel.sheet.2=excel
application/vnd.ms-excel.sheet.3=excel
application/vnd.ms-excel.sheet.4=excel
application/vnd.ms-excel.workspace.3=excel
application/vnd.ms-excel.workspace.4=excel
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet=excel
application/vnd.ms-powerpoint=powerpoint
application/vnd.openxmlformats-officedocument.presentationml.presentation=powerpoint
application/vnd.oasis.opendocument.text=odt
application/vnd.oasis.opendocument.spreadsheet=ods
application/vnd.oasis.opendocument.presentation=odp
application/pdf=pdf
application/x-fictionbook+xml=fb2
application/e-pub+zip=epub
application/x-ibooks+zip=ibooks
text/plain=txt
application/rtf=rtf
application/vnd.ms-htmlhelp=chm
application/zip=zip
application/x-7z-comressed=7z
application/x-bzip=bz
application/x-bzip2=bz2
application/x-tar=tar
application/x-rar-compressed=rar
video/3gp=3gp
video/3g2=3g2
video/x-msvideo=avi
video/x-flv=flv
video/mpeg=mpeg
video/mp4=mp4
video/ogv=ogv
video/quicktime=qt
video/x-m4v=m4v
audio/x-aif=aif
audio/midi=midi
audio/mpga=mpga
audio/mp4=mp4a
audio/ogg=oga
audio/x-wav=wav
image/webp=webp
image/bmp=bmp
image/x-icon=ico
image/x-icon=ico
image/png=png
image/svg+xml=svg
image/tiff=tiff
image/jpeg=jpg
*/
String INDEX_FILETYPE = "index.filetype";
/** The key of the configuration. e.g. 1000 */
String QUERY_MAX_LENGTH = "query.max.length";
@ -3311,6 +3365,65 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
String getIndexIndicesTimeout();
/**
* Get the value for the key 'index.filetype'. <br>
* The value is, e.g. text/html=html
application/msword=word
application/vnd.openxmlformats-officedocument.wordprocessingml.document=word
application/vnd.ms-excel=excel
application/vnd.ms-excel.sheet.2=excel
application/vnd.ms-excel.sheet.3=excel
application/vnd.ms-excel.sheet.4=excel
application/vnd.ms-excel.workspace.3=excel
application/vnd.ms-excel.workspace.4=excel
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet=excel
application/vnd.ms-powerpoint=powerpoint
application/vnd.openxmlformats-officedocument.presentationml.presentation=powerpoint
application/vnd.oasis.opendocument.text=odt
application/vnd.oasis.opendocument.spreadsheet=ods
application/vnd.oasis.opendocument.presentation=odp
application/pdf=pdf
application/x-fictionbook+xml=fb2
application/e-pub+zip=epub
application/x-ibooks+zip=ibooks
text/plain=txt
application/rtf=rtf
application/vnd.ms-htmlhelp=chm
application/zip=zip
application/x-7z-comressed=7z
application/x-bzip=bz
application/x-bzip2=bz2
application/x-tar=tar
application/x-rar-compressed=rar
video/3gp=3gp
video/3g2=3g2
video/x-msvideo=avi
video/x-flv=flv
video/mpeg=mpeg
video/mp4=mp4
video/ogv=ogv
video/quicktime=qt
video/x-m4v=m4v
audio/x-aif=aif
audio/midi=midi
audio/mpga=mpga
audio/mp4=mp4a
audio/ogg=oga
audio/x-wav=wav
image/webp=webp
image/bmp=bmp
image/x-icon=ico
image/x-icon=ico
image/png=png
image/svg+xml=svg
image/tiff=tiff
image/jpeg=jpg
<br>
* comment: filetype
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexFiletype();
/**
* Get the value for the key 'query.max.length'. <br>
* The value is, e.g. 1000 <br>
@ -7086,6 +7199,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return get(FessConfig.INDEX_INDICES_TIMEOUT);
}
public String getIndexFiletype() {
return get(FessConfig.INDEX_FILETYPE);
}
public String getQueryMaxLength() {
return get(FessConfig.QUERY_MAX_LENGTH);
}
@ -8775,6 +8892,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.INDEX_DELETE_TIMEOUT, "3m");
defaultMap.put(FessConfig.INDEX_HEALTH_TIMEOUT, "10m");
defaultMap.put(FessConfig.INDEX_INDICES_TIMEOUT, "1m");
defaultMap
.put(FessConfig.INDEX_FILETYPE,
"text/html=html\napplication/msword=word\napplication/vnd.openxmlformats-officedocument.wordprocessingml.document=word\napplication/vnd.ms-excel=excel\napplication/vnd.ms-excel.sheet.2=excel\napplication/vnd.ms-excel.sheet.3=excel\napplication/vnd.ms-excel.sheet.4=excel\napplication/vnd.ms-excel.workspace.3=excel\napplication/vnd.ms-excel.workspace.4=excel\napplication/vnd.openxmlformats-officedocument.spreadsheetml.sheet=excel\napplication/vnd.ms-powerpoint=powerpoint\napplication/vnd.openxmlformats-officedocument.presentationml.presentation=powerpoint\napplication/vnd.oasis.opendocument.text=odt\napplication/vnd.oasis.opendocument.spreadsheet=ods\napplication/vnd.oasis.opendocument.presentation=odp\napplication/pdf=pdf\napplication/x-fictionbook+xml=fb2\napplication/e-pub+zip=epub\napplication/x-ibooks+zip=ibooks\ntext/plain=txt\napplication/rtf=rtf\napplication/vnd.ms-htmlhelp=chm\napplication/zip=zip\napplication/x-7z-comressed=7z\napplication/x-bzip=bz\napplication/x-bzip2=bz2\napplication/x-tar=tar\napplication/x-rar-compressed=rar\nvideo/3gp=3gp\nvideo/3g2=3g2\nvideo/x-msvideo=avi\nvideo/x-flv=flv\nvideo/mpeg=mpeg\nvideo/mp4=mp4\nvideo/ogv=ogv\nvideo/quicktime=qt\nvideo/x-m4v=m4v\naudio/x-aif=aif\naudio/midi=midi\naudio/mpga=mpga\naudio/mp4=mp4a\naudio/ogg=oga\naudio/x-wav=wav\nimage/webp=webp\nimage/bmp=bmp\nimage/x-icon=ico\nimage/x-icon=ico\nimage/png=png\nimage/svg+xml=svg\nimage/tiff=tiff\nimage/jpeg=jpg\n");
defaultMap.put(FessConfig.QUERY_MAX_LENGTH, "1000");
defaultMap.put(FessConfig.QUERY_TIMEOUT, "10000");
defaultMap.put(FessConfig.QUERY_TIMEOUT_LOGGING, "true");

View file

@ -61,49 +61,6 @@
"content_length:[100000 TO 499999]",
"content_length:[500000 TO 999999]",
"content_length:[1000000 TO *]",
"filetype:html",
"filetype:word",
"filetype:excel",
"filetype:powerpoint",
"filetype:odt",
"filetype:ods",
"filetype:odp",
"filetype:pdf",
"filetype:fb2",
"filetype:epub",
"filetype:ibooks",
"filetype:txt",
"filetype:rtf",
"filetype:chm",
"filetype:zip",
"filetype:7z",
"filetype:bz",
"filetype:bz2",
"filetype:tar",
"filetype:rar",
"filetype:3gp",
"filetype:3g2",
"filetype:avi",
"filetype:flv",
"filetype:mpeg",
"filetype:mp4",
"filetype:ogv",
"filetype:qt",
"filetype:m4v",
"filetype:aif",
"filetype:mid",
"filetype:mpga",
"filetype:mp4a",
"filetype:oga",
"filetype:ogg",
"filetype:wav",
"filetype:webp",
"filetype:bmp",
"filetype:ico",
"filetype:png",
"filetype:svg",
"filetype:tiff",
"filetype:jpg",
"filetype:others"
]
</property>
@ -300,7 +257,7 @@
</postConstruct>
<postConstruct name="addQuery">
<arg>"labels.facet_filetype_mid"</arg>
<arg>"filetype:mid"</arg>
<arg>"filetype:midi"</arg>
</postConstruct>
<postConstruct name="addQuery">
<arg>"labels.facet_filetype_mpga"</arg>

View file

@ -27,6 +27,8 @@
</component>
<component name="documentHelper" class="org.codelibs.fess.helper.DocumentHelper">
</component>
<component name="fileTypeHelper" class="org.codelibs.fess.helper.FileTypeHelper">
</component>
<component name="indexingHelper" class="org.codelibs.fess.helper.IndexingHelper">
</component>
<component name="pathMappingHelper" class="org.codelibs.fess.helper.PathMappingHelper">

View file

@ -326,6 +326,61 @@ index.delete.timeout=3m
index.health.timeout=10m
index.indices.timeout=1m
# filetype
index.filetype=\
text/html=html\n\
application/msword=word\n\
application/vnd.openxmlformats-officedocument.wordprocessingml.document=word\n\
application/vnd.ms-excel=excel\n\
application/vnd.ms-excel.sheet.2=excel\n\
application/vnd.ms-excel.sheet.3=excel\n\
application/vnd.ms-excel.sheet.4=excel\n\
application/vnd.ms-excel.workspace.3=excel\n\
application/vnd.ms-excel.workspace.4=excel\n\
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet=excel\n\
application/vnd.ms-powerpoint=powerpoint\n\
application/vnd.openxmlformats-officedocument.presentationml.presentation=powerpoint\n\
application/vnd.oasis.opendocument.text=odt\n\
application/vnd.oasis.opendocument.spreadsheet=ods\n\
application/vnd.oasis.opendocument.presentation=odp\n\
application/pdf=pdf\n\
application/x-fictionbook+xml=fb2\n\
application/e-pub+zip=epub\n\
application/x-ibooks+zip=ibooks\n\
text/plain=txt\n\
application/rtf=rtf\n\
application/vnd.ms-htmlhelp=chm\n\
application/zip=zip\n\
application/x-7z-comressed=7z\n\
application/x-bzip=bz\n\
application/x-bzip2=bz2\n\
application/x-tar=tar\n\
application/x-rar-compressed=rar\n\
video/3gp=3gp\n\
video/3g2=3g2\n\
video/x-msvideo=avi\n\
video/x-flv=flv\n\
video/mpeg=mpeg\n\
video/mp4=mp4\n\
video/ogv=ogv\n\
video/quicktime=qt\n\
video/x-m4v=m4v\n\
audio/x-aif=aif\n\
audio/midi=midi\n\
audio/mpga=mpga\n\
audio/mp4=mp4a\n\
audio/ogg=oga\n\
audio/x-wav=wav\n\
image/webp=webp\n\
image/bmp=bmp\n\
image/x-icon=ico\n\
image/x-icon=ico\n\
image/png=png\n\
image/svg+xml=svg\n\
image/tiff=tiff\n\
image/jpeg=jpg\n\
# query
query.max.length=1000
query.timeout=10000

View file

@ -43,212 +43,6 @@
</postConstruct>
-->
</component>
<component name="fileTypeHelper" class="org.codelibs.fess.helper.FileTypeHelper">
<postConstruct name="add">
<arg>"text/html"</arg>
<arg>"html"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/msword"</arg>
<arg>"word"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.openxmlformats-officedocument.wordprocessingml.document"</arg>
<arg>"word"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.ms-excel"</arg>
<arg>"excel"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.ms-excel.sheet.2"</arg>
<arg>"excel"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.ms-excel.sheet.3"</arg>
<arg>"excel"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.ms-excel.sheet.4"</arg>
<arg>"excel"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.ms-excel.workspace.3"</arg>
<arg>"excel"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.ms-excel.workspace.4"</arg>
<arg>"excel"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"</arg>
<arg>"excel"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.ms-powerpoint"</arg>
<arg>"powerpoint"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.openxmlformats-officedocument.presentationml.presentation"</arg>
<arg>"powerpoint"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.oasis.opendocument.text"</arg>
<arg>"odt"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.oasis.opendocument.spreadsheet"</arg>
<arg>"ods"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.oasis.opendocument.presentation"</arg>
<arg>"odp"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/pdf"</arg>
<arg>"pdf"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/x-fictionbook+xml"</arg>
<arg>"fb2"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/e-pub+zip"</arg>
<arg>"epub"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/x-ibooks+zip"</arg>
<arg>"ibooks"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"text/plain"</arg>
<arg>"txt"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/rtf"</arg>
<arg>"rtf"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/vnd.ms-htmlhelp"</arg>
<arg>"chm"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/zip"</arg>
<arg>"zip"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/x-7z-comressed"</arg>
<arg>"7z"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/x-bzip"</arg>
<arg>"bz"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/x-bzip2"</arg>
<arg>"bz2"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/x-tar"</arg>
<arg>"tar"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"application/x-rar-compressed"</arg>
<arg>"rar"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"video/3gp"</arg>
<arg>"3gp"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"video/3g2"</arg>
<arg>"3g2"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"video/x-msvideo"</arg>
<arg>"avi"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"video/x-flv"</arg>
<arg>"flv"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"video/mpeg"</arg>
<arg>"mpeg"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"video/mp4"</arg>
<arg>"mp4"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"video/ogv"</arg>
<arg>"ogv"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"video/quicktime"</arg>
<arg>"qt"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"video/x-m4v"</arg>
<arg>"m4v"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"audio/x-aif"</arg>
<arg>"aif"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"audio/midi"</arg>
<arg>"midi"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"audio/mpga"</arg>
<arg>"mpga"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"audio/mp4"</arg>
<arg>"mp4a"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"audio/ogg"</arg>
<arg>"oga"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"audio/x-wav"</arg>
<arg>"wav"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"image/webp"</arg>
<arg>"webp"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"image/bmp"</arg>
<arg>"bmp"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"image/x-icon"</arg>
<arg>"ico"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"image/x-icon"</arg>
<arg>"ico"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"image/png"</arg>
<arg>"png"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"image/svg+xml"</arg>
<arg>"svg"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"image/tiff"</arg>
<arg>"tiff"</arg>
</postConstruct>
<postConstruct name="add">
<arg>"image/jpeg"</arg>
<arg>"jpg"</arg>
</postConstruct>
</component>
<component name="fessCrawler" class="org.codelibs.fess.exec.Crawler"
instance="prototype">
</component>

View file

@ -158,10 +158,11 @@
key="${facetQueryView.title}" /></li>
<c:set var="facetFound" value="F"/>
<c:forEach var="queryEntry" items="${facetQueryView.queryMap}">
<c:if test="${facetResponse.queryCountMap[queryEntry.value] != 0}">
<c:if test="${facetResponse.queryCountMap[queryEntry.value] > 0}">
<li class="list-group-item"><la:link
href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(fe:sdh(sdh))}${fe:pagingQuery(queryEntry.value)}${fe:facetQuery()}${fe:geoQuery()}">
<la:message key="${queryEntry.key}" />
<c:if test="${fn:startsWith(queryEntry.key, 'labels.')}"><la:message key="${queryEntry.key}" /></c:if>
<c:if test="${not fn:startsWith(queryEntry.key, 'labels.')}">${f:h(queryEntry.key)}</c:if>
<span class="badge badge-secondary badge-pill float-right">${f:h(facetResponse.queryCountMap[queryEntry.value])}</span>
</la:link></li>
<c:set var="facetFound" value="T"/>