fix #2563 text fragment support

This commit is contained in:
Shinsuke Sugaya 2021-04-22 14:16:49 +09:00
parent fa9e27a1c3
commit 109c012f16
5 changed files with 268 additions and 44 deletions

View file

@ -471,4 +471,11 @@ public class Constants extends CoreLibConstants {
public static final String EXECUTE_TYPE_SUGGEST = "suggest";
public static final String DEFAULT_SCRIPT = "groovy";
public static final String TEXT_FRAGMENTS = "text_fragments";
public static final String TEXT_FRAGMENT_TYPE_QUERY = "query";
public static final String TEXT_FRAGMENT_TYPE_HIGHLIGHT = "highlight";
}

View file

@ -57,6 +57,8 @@ import org.codelibs.core.io.CloseableUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.misc.DynamicProperties;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fesen.common.text.Text;
import org.codelibs.fesen.search.fetch.subphase.highlight.HighlightField;
import org.codelibs.fess.Constants;
import org.codelibs.fess.app.web.base.SearchForm;
import org.codelibs.fess.app.web.base.login.FessLoginAssist;
@ -114,6 +116,8 @@ public class ViewHelper {
protected static final Pattern SHARED_FOLDER_PATTERN = Pattern.compile("^file:/+[^/]\\.");
protected static final String ELLIPSIS = "...";
protected boolean encodeUrlLink = false;
protected String urlLinkEncoding = Constants.UTF_8;
@ -154,6 +158,12 @@ public class ViewHelper {
protected long facetCacheDuration = 60 * 10L; // 10min
protected int textFragmentPrefixLength;
protected int textFragmentSuffixLength;
protected int textFragmentSize;
@PostConstruct
public void init() {
if (logger.isDebugEnabled()) {
@ -197,6 +207,10 @@ public class ViewHelper {
}));
facetCache = CacheBuilder.newBuilder().maximumSize(1000).expireAfterWrite(facetCacheDuration, TimeUnit.SECONDS).build();
textFragmentPrefixLength = fessConfig.getQueryHighlightTextFragmentPrefixLengthAsInteger();
textFragmentSuffixLength = fessConfig.getQueryHighlightTextFragmentSuffixLengthAsInteger();
textFragmentSize = fessConfig.getQueryHighlightTextFragmentSizeAsInteger();
}
public String getContentTitle(final Map<String, Object> document) {
@ -438,14 +452,39 @@ public class ViewHelper {
}
final String mimetype = DocumentUtil.getValue(document, fessConfig.getIndexFieldMimetype(), String.class);
if (StringUtil.isNotBlank(mimetype) && "application/pdf".equals(mimetype)) {
return appendPDFSearchWord(url);
if (StringUtil.isNotBlank(mimetype)) {
switch (mimetype) {
case "text/html":
return appendHTMLSearchWord(document, url);
case "application/pdf":
return appendPDFSearchWord(document, url);
default:
break;
}
}
}
return url;
}
protected String appendHTMLSearchWord(final Map<String, Object> document, final String url) {
final TextFragment[] textFragments = (TextFragment[]) document.get(Constants.TEXT_FRAGMENTS);
if (textFragments != null) {
final StringBuilder buf = new StringBuilder(1000);
buf.append(url).append("#:~:");
for (int i = 0; i < textFragmentSize && i < textFragments.length; i++) {
buf.append(textFragments[i].toURLString()).append('&');
}
return buf.toString();
}
return url;
}
@Deprecated
protected String appendPDFSearchWord(final String url) {
return appendPDFSearchWord(null, url);
}
protected String appendPDFSearchWord(final Map<String, Object> document, final String url) {
final String queries = (String) LaRequestUtil.getRequest().getAttribute(Constants.REQUEST_QUERIES);
if (queries != null) {
try {
@ -783,6 +822,54 @@ public class ViewHelper {
}
}
public String createHighlightText(final HighlightField highlightField) {
final Text[] fragments = highlightField.fragments();
if (fragments != null && fragments.length != 0) {
final String[] texts = new String[fragments.length];
for (int i = 0; i < fragments.length; i++) {
texts[i] = fragments[i].string();
}
String value = StringUtils.join(texts, ELLIPSIS);
if (StringUtil.isNotBlank(value) && !ComponentUtil.getFessConfig().endsWithFullstop(value)) {
return value + ELLIPSIS;
}
return value;
}
return null;
}
public TextFragment[] createTextFragmentsByHighlight(final HighlightField[] fields) {
final List<TextFragment> list = new ArrayList<>();
for (final HighlightField field : fields) {
final Text[] fragments = field.fragments();
if (fragments != null) {
for (final Text fragment : fragments) {
final String text = fragment.string();
if (text.length() > textFragmentPrefixLength + textFragmentSuffixLength) {
final String target =
text.replace(originalHighlightTagPre, StringUtil.EMPTY).replace(originalHighlightTagPost, StringUtil.EMPTY);
if (target.length() > textFragmentPrefixLength + textFragmentSuffixLength) {
list.add(new TextFragment(null, target.substring(0, textFragmentPrefixLength),
target.substring(target.length() - textFragmentSuffixLength), null));
}
}
}
}
}
return list.toArray(n -> new TextFragment[n]);
}
public TextFragment[] createTextFragmentsByQuery() {
return LaRequestUtil.getOptionalRequest().map(req -> {
@SuppressWarnings("unchecked")
Set<String> querySet = (Set<String>) req.getAttribute(Constants.HIGHLIGHT_QUERIES);
if (querySet != null) {
return querySet.stream().map(s -> new TextFragment(null, s, null, null)).toArray(n -> new TextFragment[n]);
}
return new TextFragment[0];
}).orElse(new TextFragment[0]);
}
public boolean isUseSession() {
return useSession;
}
@ -827,29 +914,6 @@ public class ViewHelper {
this.actionHook = actionHook;
}
public static class ActionHook {
public ActionResponse godHandPrologue(final ActionRuntime runtime, final Function<ActionRuntime, ActionResponse> func) {
return func.apply(runtime);
}
public ActionResponse godHandMonologue(final ActionRuntime runtime, final Function<ActionRuntime, ActionResponse> func) {
return func.apply(runtime);
}
public void godHandEpilogue(final ActionRuntime runtime, final Consumer<ActionRuntime> consumer) {
consumer.accept(runtime);
}
public ActionResponse hookBefore(final ActionRuntime runtime, final Function<ActionRuntime, ActionResponse> func) {
return func.apply(runtime);
}
public void hookFinally(final ActionRuntime runtime, final Consumer<ActionRuntime> consumer) {
consumer.accept(runtime);
}
}
public void setEncodeUrlLink(final boolean encodeUrlLink) {
this.encodeUrlLink = encodeUrlLink;
}
@ -873,4 +937,62 @@ public class ViewHelper {
public void setFacetCacheDuration(final long facetCacheDuration) {
this.facetCacheDuration = facetCacheDuration;
}
public static class ActionHook {
public ActionResponse godHandPrologue(final ActionRuntime runtime, final Function<ActionRuntime, ActionResponse> func) {
return func.apply(runtime);
}
public ActionResponse godHandMonologue(final ActionRuntime runtime, final Function<ActionRuntime, ActionResponse> func) {
return func.apply(runtime);
}
public void godHandEpilogue(final ActionRuntime runtime, final Consumer<ActionRuntime> consumer) {
consumer.accept(runtime);
}
public ActionResponse hookBefore(final ActionRuntime runtime, final Function<ActionRuntime, ActionResponse> func) {
return func.apply(runtime);
}
public void hookFinally(final ActionRuntime runtime, final Consumer<ActionRuntime> consumer) {
consumer.accept(runtime);
}
}
// #:~:text=[prefix-,]textStart[,textEnd][,-suffix]
public static class TextFragment {
private String prefix;
private String textStart;
private String textEnd;
private String suffix;
TextFragment(final String prefix, final String textStart, final String textEnd, final String suffix) {
this.prefix = prefix;
this.textStart = textStart == null ? StringUtil.EMPTY : textStart;
this.textEnd = textEnd;
this.suffix = suffix;
}
public String toURLString() {
final StringBuilder buf = new StringBuilder();
buf.append("text=");
if (StringUtil.isNotBlank(prefix)) {
buf.append(encodeToString(prefix)).append("-,");
}
buf.append(encodeToString(textStart));
if (StringUtil.isNotBlank(textEnd)) {
buf.append(',').append(encodeToString(textEnd));
}
if (StringUtil.isNotBlank(suffix)) {
buf.append(",-").append(encodeToString(suffix));
}
return buf.toString();
}
private String encodeToString(final String text) {
return URLEncoder.encode(text, Constants.CHARSET_UTF_8);
}
}
}

View file

@ -825,6 +825,18 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. true */
String QUERY_HIGHLIGHT_BOUNDARY_POSITION_DETECT = "query.highlight.boundary.position.detect";
/** The key of the configuration. e.g. query */
String QUERY_HIGHLIGHT_TEXT_FRAGMENT_TYPE = "query.highlight.text.fragment.type";
/** The key of the configuration. e.g. 3 */
String QUERY_HIGHLIGHT_TEXT_FRAGMENT_SIZE = "query.highlight.text.fragment.size";
/** The key of the configuration. e.g. 5 */
String QUERY_HIGHLIGHT_TEXT_FRAGMENT_PREFIX_LENGTH = "query.highlight.text.fragment.prefix.length";
/** The key of the configuration. e.g. 5 */
String QUERY_HIGHLIGHT_TEXT_FRAGMENT_SUFFIX_LENGTH = "query.highlight.text.fragment.suffix.length";
/** The key of the configuration. e.g. 100000 */
String QUERY_MAX_SEARCH_RESULT_OFFSET = "query.max.search.result.offset";
@ -4086,6 +4098,58 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
boolean isQueryHighlightBoundaryPositionDetect();
/**
* Get the value for the key 'query.highlight.text.fragment.type'. <br>
* The value is, e.g. query <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getQueryHighlightTextFragmentType();
/**
* Get the value for the key 'query.highlight.text.fragment.size'. <br>
* The value is, e.g. 3 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getQueryHighlightTextFragmentSize();
/**
* Get the value for the key 'query.highlight.text.fragment.size' as {@link Integer}. <br>
* The value is, e.g. 3 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getQueryHighlightTextFragmentSizeAsInteger();
/**
* Get the value for the key 'query.highlight.text.fragment.prefix.length'. <br>
* The value is, e.g. 5 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getQueryHighlightTextFragmentPrefixLength();
/**
* Get the value for the key 'query.highlight.text.fragment.prefix.length' as {@link Integer}. <br>
* The value is, e.g. 5 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getQueryHighlightTextFragmentPrefixLengthAsInteger();
/**
* Get the value for the key 'query.highlight.text.fragment.suffix.length'. <br>
* The value is, e.g. 5 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getQueryHighlightTextFragmentSuffixLength();
/**
* Get the value for the key 'query.highlight.text.fragment.suffix.length' as {@link Integer}. <br>
* The value is, e.g. 5 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getQueryHighlightTextFragmentSuffixLengthAsInteger();
/**
* Get the value for the key 'query.max.search.result.offset'. <br>
* The value is, e.g. 100000 <br>
@ -8250,6 +8314,34 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return is(FessConfig.QUERY_HIGHLIGHT_BOUNDARY_POSITION_DETECT);
}
public String getQueryHighlightTextFragmentType() {
return get(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_TYPE);
}
public String getQueryHighlightTextFragmentSize() {
return get(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_SIZE);
}
public Integer getQueryHighlightTextFragmentSizeAsInteger() {
return getAsInteger(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_SIZE);
}
public String getQueryHighlightTextFragmentPrefixLength() {
return get(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_PREFIX_LENGTH);
}
public Integer getQueryHighlightTextFragmentPrefixLengthAsInteger() {
return getAsInteger(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_PREFIX_LENGTH);
}
public String getQueryHighlightTextFragmentSuffixLength() {
return get(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_SUFFIX_LENGTH);
}
public Integer getQueryHighlightTextFragmentSuffixLengthAsInteger() {
return getAsInteger(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_SUFFIX_LENGTH);
}
public String getQueryMaxSearchResultOffset() {
return get(FessConfig.QUERY_MAX_SEARCH_RESULT_OFFSET);
}
@ -10038,6 +10130,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.QUERY_HIGHLIGHT_PHRASE_LIMIT, "256");
defaultMap.put(FessConfig.QUERY_HIGHLIGHT_CONTENT_DESCRIPTION_FIELDS, "hl_content,digest");
defaultMap.put(FessConfig.QUERY_HIGHLIGHT_BOUNDARY_POSITION_DETECT, "true");
defaultMap.put(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_TYPE, "query");
defaultMap.put(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_SIZE, "3");
defaultMap.put(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_PREFIX_LENGTH, "5");
defaultMap.put(FessConfig.QUERY_HIGHLIGHT_TEXT_FRAGMENT_SUFFIX_LENGTH, "5");
defaultMap.put(FessConfig.QUERY_MAX_SEARCH_RESULT_OFFSET, "100000");
defaultMap.put(FessConfig.QUERY_ADDITIONAL_DEFAULT_FIELDS, "");
defaultMap.put(FessConfig.QUERY_ADDITIONAL_RESPONSE_FIELDS, "");

View file

@ -23,14 +23,11 @@ import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fesen.action.search.SearchResponse;
import org.codelibs.fesen.common.document.DocumentField;
import org.codelibs.fesen.common.text.Text;
import org.codelibs.fesen.search.SearchHit;
import org.codelibs.fesen.search.SearchHits;
import org.codelibs.fesen.search.aggregations.Aggregations;
@ -45,8 +42,6 @@ public class QueryResponseList implements List<Map<String, Object>> {
private static final Logger logger = LogManager.getLogger(QueryResponseList.class);
protected static final String ELLIPSIS = "...";
protected final List<Map<String, Object>> parent;
/** The value of current page number. */
@ -153,23 +148,20 @@ public class QueryResponseList implements List<Map<String, Object>> {
docMap.putAll(searchHit.getSourceAsMap());
}
final ViewHelper viewHelper = ComponentUtil.getViewHelper();
final Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();
try {
if (highlightFields != null) {
for (final Map.Entry<String, HighlightField> entry : highlightFields.entrySet()) {
final HighlightField highlightField = entry.getValue();
final Text[] fragments = highlightField.fragments();
if (fragments != null && fragments.length != 0) {
final String[] texts = new String[fragments.length];
for (int i = 0; i < fragments.length; i++) {
texts[i] = fragments[i].string();
}
String value = StringUtils.join(texts, ELLIPSIS);
if (StringUtil.isNotBlank(value) && !fessConfig.endsWithFullstop(value)) {
value = value + ELLIPSIS;
}
docMap.put(hlPrefix + highlightField.getName(), value);
highlightFields.values().stream().forEach(highlightField -> {
final String text = viewHelper.createHighlightText(highlightField);
if (text != null) {
docMap.put(hlPrefix + highlightField.getName(), text);
}
});
if (Constants.TEXT_FRAGMENT_TYPE_HIGHLIGHT.equals(fessConfig.getQueryHighlightTextFragmentType())) {
docMap.put(Constants.TEXT_FRAGMENTS,
viewHelper.createTextFragmentsByHighlight(highlightFields.values().toArray(n -> new HighlightField[n])));
}
}
} catch (final Exception e) {
@ -178,8 +170,11 @@ public class QueryResponseList implements List<Map<String, Object>> {
}
}
if (Constants.TEXT_FRAGMENT_TYPE_QUERY.equals(fessConfig.getQueryHighlightTextFragmentType())) {
docMap.put(Constants.TEXT_FRAGMENTS, viewHelper.createTextFragmentsByQuery());
}
// ContentTitle
final ViewHelper viewHelper = ComponentUtil.getViewHelper();
if (viewHelper != null) {
docMap.put(fessConfig.getResponseFieldContentTitle(), viewHelper.getContentTitle(docMap));
docMap.put(fessConfig.getResponseFieldContentDescription(), viewHelper.getContentDescription(docMap));

View file

@ -429,6 +429,10 @@ query.highlight.order=score
query.highlight.phrase.limit=256
query.highlight.content.description.fields=hl_content,digest
query.highlight.boundary.position.detect=true
query.highlight.text.fragment.type=query
query.highlight.text.fragment.size=3
query.highlight.text.fragment.prefix.length=5
query.highlight.text.fragment.suffix.length=5
query.max.search.result.offset=100000
query.additional.default.fields=
query.additional.response.fields=