fix #445 : use TextUtil
This commit is contained in:
parent
d8224b5cea
commit
6ff0658e4e
1 changed files with 3 additions and 53 deletions
|
@ -21,18 +21,14 @@ import java.util.Map;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.fess.crawler.entity.ResponseData;
|
||||
import org.codelibs.fess.crawler.util.UnsafeStringBuilder;
|
||||
import org.codelibs.fess.crawler.util.TextUtil;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class DocumentHelper implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DocumentHelper.class);
|
||||
|
||||
public String getContent(final ResponseData responseData, final String content, final Map<String, Object> dataMap) {
|
||||
if (content == null) {
|
||||
return StringUtil.EMPTY; // empty
|
||||
|
@ -40,53 +36,7 @@ public class DocumentHelper implements Serializable {
|
|||
|
||||
final int maxAlphanumTermSize = getMaxAlphanumTermSize();
|
||||
final int maxSymbolTermSize = getMaxSymbolTermSize();
|
||||
final UnsafeStringBuilder buf = new UnsafeStringBuilder(content.length());
|
||||
boolean isSpace = false;
|
||||
int alphanumSize = 0;
|
||||
int symbolSize = 0;
|
||||
for (int i = 0; i < content.length(); i++) {
|
||||
final char c = content.charAt(i);
|
||||
if (Character.isISOControl(c) || c == '\u0020' || c == '\u3000' || c == 65533) {
|
||||
// space
|
||||
if (!isSpace) {
|
||||
buf.append(' ');
|
||||
isSpace = true;
|
||||
}
|
||||
alphanumSize = 0;
|
||||
symbolSize = 0;
|
||||
} else if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
|
||||
// alphanum
|
||||
if (maxAlphanumTermSize >= 0) {
|
||||
if (alphanumSize < maxAlphanumTermSize) {
|
||||
buf.append(c);
|
||||
}
|
||||
alphanumSize++;
|
||||
} else {
|
||||
buf.append(c);
|
||||
}
|
||||
isSpace = false;
|
||||
symbolSize = 0;
|
||||
} else if ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~')) {
|
||||
// symbol
|
||||
if (maxSymbolTermSize >= 0) {
|
||||
if (symbolSize < maxSymbolTermSize) {
|
||||
buf.append(c);
|
||||
}
|
||||
symbolSize++;
|
||||
} else {
|
||||
buf.append(c);
|
||||
}
|
||||
isSpace = false;
|
||||
alphanumSize = 0;
|
||||
} else {
|
||||
buf.append(c);
|
||||
isSpace = false;
|
||||
alphanumSize = 0;
|
||||
symbolSize = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return buf.toUnsafeString().trim();
|
||||
return TextUtil.normalizeText(content, content.length(), maxAlphanumTermSize, maxSymbolTermSize);
|
||||
}
|
||||
|
||||
protected int getMaxAlphanumTermSize() {
|
||||
|
@ -111,7 +61,7 @@ public class DocumentHelper implements Serializable {
|
|||
subContent = content.substring(0, maxWidth * 2);
|
||||
}
|
||||
|
||||
final String originalStr = subContent.replaceAll("[\u0000-\u0020\u007f\u3000]+", " ").trim();
|
||||
final String originalStr = TextUtil.normalizeText(subContent, subContent.length(), -1, -1);
|
||||
return StringUtils.abbreviate(originalStr, maxWidth);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue