fix #1506 append ellipsis
This commit is contained in:
parent
7b6a539b53
commit
dd5ed5a4c0
5 changed files with 68 additions and 8 deletions
|
@ -234,6 +234,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6 */
|
||||
String CRAWLER_DOCUMENT_SPACE_CHARS = "crawler.document.space.chars";
|
||||
|
||||
/** The key of the configuration. e.g. u002eu06d4u2e3cu3002 */
|
||||
String CRAWLER_DOCUMENT_FULLSTOP_CHARS = "crawler.document.fullstop.chars";
|
||||
|
||||
/** The key of the configuration. e.g. UTF-8 */
|
||||
String CRAWLER_CRAWLING_DATA_ENCODING = "crawler.crawling.data.encoding";
|
||||
|
||||
|
@ -1912,6 +1915,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
String getCrawlerDocumentSpaceChars();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.fullstop.chars'. <br>
|
||||
* The value is, e.g. u002eu06d4u2e3cu3002 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentFullstopChars();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.crawling.data.encoding'. <br>
|
||||
* The value is, e.g. UTF-8 <br>
|
||||
|
@ -5879,6 +5889,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return get(FessConfig.CRAWLER_DOCUMENT_SPACE_CHARS);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFullstopChars() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FULLSTOP_CHARS);
|
||||
}
|
||||
|
||||
public String getCrawlerCrawlingDataEncoding() {
|
||||
return get(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING);
|
||||
}
|
||||
|
@ -7924,6 +7938,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
defaultMap
|
||||
.put(FessConfig.CRAWLER_DOCUMENT_SPACE_CHARS,
|
||||
"u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FULLSTOP_CHARS, "u002eu06d4u2e3cu3002");
|
||||
defaultMap.put(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING, "UTF-8");
|
||||
defaultMap.put(FessConfig.CRAWLER_WEB_PROTOCOLS, "http,https");
|
||||
defaultMap.put(FessConfig.CRAWLER_FILE_PROTOCOLS, "file,smb,ftp");
|
||||
|
|
|
@ -96,6 +96,8 @@ public interface FessProp {
|
|||
|
||||
public static final String CRAWLER_DOCUMENT_SPACE_CHARS = "crawlerDocumentSpaceChars";
|
||||
|
||||
public static final String CRAWLER_DOCUMENT_FULLSTOP_CHARS = "crawlerDocumentFullstopChars";
|
||||
|
||||
public static final String INDEX_ADMIN_ARRAY_FIELD_SET = "indexAdminArrayFieldSet";
|
||||
|
||||
public static final String INDEX_ADMIN_DATE_FIELD_SET = "indexAdminDateFieldSet";
|
||||
|
@ -1444,9 +1446,12 @@ public interface FessProp {
|
|||
String getCrawlerDocumentSpaceChars();
|
||||
|
||||
public default int[] getCrawlerDocumentSpaceCharsAsArray() {
|
||||
int[] spaceChars = (int[]) propMap.get(CRAWLER_DOCUMENT_SPACE_CHARS);
|
||||
return getCrawlerDocumentCharsAsArray(CRAWLER_DOCUMENT_SPACE_CHARS, getCrawlerDocumentSpaceChars());
|
||||
}
|
||||
|
||||
public default int[] getCrawlerDocumentCharsAsArray(final String key, final String spaceStr) {
|
||||
int[] spaceChars = (int[]) propMap.get(key);
|
||||
if (spaceChars == null) {
|
||||
final String spaceStr = getCrawlerDocumentSpaceChars();
|
||||
if (spaceStr.startsWith("u")) {
|
||||
spaceChars =
|
||||
split(spaceStr, "u").get(
|
||||
|
@ -1459,11 +1464,29 @@ public interface FessProp {
|
|||
spaceChars[i] = spaceStr.codePointAt(i);
|
||||
}
|
||||
}
|
||||
propMap.put(CRAWLER_DOCUMENT_SPACE_CHARS, spaceChars);
|
||||
propMap.put(key, spaceChars);
|
||||
}
|
||||
return spaceChars;
|
||||
}
|
||||
|
||||
String getCrawlerDocumentFullstopChars();
|
||||
|
||||
public default boolean endsWithFullstop(final String s) {
|
||||
if (StringUtil.isBlank(s)) {
|
||||
return false;
|
||||
}
|
||||
for (final int i : getCrawlerDocumentFullstopCharsAsArray()) {
|
||||
if (s.endsWith(String.valueOf(i))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public default int[] getCrawlerDocumentFullstopCharsAsArray() {
|
||||
return getCrawlerDocumentCharsAsArray(CRAWLER_DOCUMENT_FULLSTOP_CHARS, getCrawlerDocumentFullstopChars());
|
||||
}
|
||||
|
||||
String getQueryAdditionalResponseFields();
|
||||
|
||||
public default String[] getQueryAdditionalResponseFields(final String... fields) {
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.ListIterator;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.core.stream.StreamUtil;
|
||||
import org.codelibs.fess.helper.QueryHelper;
|
||||
import org.codelibs.fess.helper.ViewHelper;
|
||||
|
@ -41,6 +42,8 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
public class QueryResponseList implements List<Map<String, Object>> {
|
||||
|
||||
private static final String ELLIPSIS = "...";
|
||||
|
||||
private static final String SCORE = "score";
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(QueryResponseList.class);
|
||||
|
@ -160,7 +163,10 @@ public class QueryResponseList implements List<Map<String, Object>> {
|
|||
for (int i = 0; i < fragments.length; i++) {
|
||||
texts[i] = fragments[i].string();
|
||||
}
|
||||
final String value = StringUtils.join(texts, "...");
|
||||
String value = StringUtils.join(texts, ELLIPSIS);
|
||||
if (StringUtil.isNotBlank(value) && !fessConfig.endsWithFullstop(value)) {
|
||||
value = value + ELLIPSIS;
|
||||
}
|
||||
docMap.put(hlPrefix + highlightField.getName(), value);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -152,6 +152,7 @@ crawler.document.max.alphanum.term.size=20
|
|||
crawler.document.max.symbol.term.size=10
|
||||
crawler.document.duplicate.term.removed=false
|
||||
crawler.document.space.chars=u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6
|
||||
crawler.document.fullstop.chars=u002eu06d4u2e3cu3002
|
||||
crawler.crawling.data.encoding=UTF-8
|
||||
crawler.web.protocols=http,https
|
||||
crawler.file.protocols=file,smb,ftp
|
||||
|
|
|
@ -120,10 +120,25 @@ public class FessPropTest extends UnitFessTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
int[] spaceChars = fessConfig.getCrawlerDocumentSpaceCharsAsArray();
|
||||
assertEquals(2, spaceChars.length);
|
||||
assertEquals(32, spaceChars[0]);
|
||||
assertEquals(12288, spaceChars[1]);
|
||||
int[] chars = fessConfig.getCrawlerDocumentSpaceCharsAsArray();
|
||||
assertEquals(2, chars.length);
|
||||
assertEquals(32, chars[0]);
|
||||
assertEquals(12288, chars[1]);
|
||||
}
|
||||
|
||||
public void test_getCrawlerDocumentFullstopCharsAsArray() {
|
||||
FessProp.propMap.clear();
|
||||
FessConfig fessConfig = new FessConfig.SimpleImpl() {
|
||||
@Override
|
||||
public String getCrawlerDocumentFullstopChars() {
|
||||
return "u0020u3000";
|
||||
}
|
||||
};
|
||||
|
||||
int[] chars = fessConfig.getCrawlerDocumentFullstopCharsAsArray();
|
||||
assertEquals(2, chars.length);
|
||||
assertEquals(32, chars[0]);
|
||||
assertEquals(12288, chars[1]);
|
||||
}
|
||||
|
||||
public void test_getCrawlerDocumentHtmlPrunedTagsAsArray() throws Exception {
|
||||
|
|
Loading…
Add table
Reference in a new issue