fix #2735 add crawler.document.append.filename
This commit is contained in:
parent
d952b31ee0
commit
e9c4a0c0b2
4 changed files with 47 additions and 8 deletions
|
@ -213,6 +213,10 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
}
|
||||
buf.append(contentMeta);
|
||||
}
|
||||
final String fileName = getFileName(url, urlEncoding);
|
||||
if (StringUtil.isNotBlank(fileName) && fessConfig.isCrawlerDocumentAppendFilename()) {
|
||||
buf.append(' ').append(fileName);
|
||||
}
|
||||
final String bodyBase = buf.toString().trim();
|
||||
responseData.addMetaData(Extractor.class.getSimpleName(), extractor);
|
||||
final String body = documentHelper.getContent(crawlingConfig, responseData, bodyBase, dataMap);
|
||||
|
@ -232,7 +236,6 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
|
|||
putResultDataBody(dataMap, fessConfig.getIndexFieldDigest(),
|
||||
documentHelper.getDigest(responseData, bodyBase, dataMap, fessConfig.getCrawlerDocumentFileMaxDigestLengthAsInteger()));
|
||||
// title
|
||||
final String fileName = getFileName(url, urlEncoding);
|
||||
if (!hasTitle(dataMap)) {
|
||||
final String titleField = fessConfig.getIndexFieldTitle();
|
||||
dataMap.remove(titleField);
|
||||
|
|
|
@ -392,8 +392,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
// content
|
||||
final String body = getSingleNodeValue(document, getContentXpath(fessConfig, xpathConfigMap),
|
||||
prunedContent ? node -> pruneNode(node, crawlingConfig) : node -> node);
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldContent(),
|
||||
documentHelper.getContent(crawlingConfig, responseData, body, dataMap));
|
||||
final String fileName = getFileName(url, urlEncoding);
|
||||
putResultDataContent(dataMap, responseData, fessConfig, crawlingConfig, documentHelper, body, fileName);
|
||||
if ((Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache()))
|
||||
|| fessConfig.isCrawlerDocumentCacheEnabled()) && fessConfig.isSupportedDocumentCacheMimetypes(mimeType)) {
|
||||
if (responseData.getContentLength() > 0
|
||||
|
@ -429,7 +429,6 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
// site
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldSite(), getSite(url, urlEncoding));
|
||||
// filename
|
||||
final String fileName = getFileName(url, urlEncoding);
|
||||
if (StringUtil.isNotBlank(fileName)) {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldFilename(), fileName);
|
||||
}
|
||||
|
@ -502,6 +501,16 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
});
|
||||
}
|
||||
|
||||
protected void putResultDataContent(final Map<String, Object> dataMap, final ResponseData responseData, final FessConfig fessConfig,
|
||||
final CrawlingConfig crawlingConfig, final DocumentHelper documentHelper, final String body, final String fileName) {
|
||||
final String content = documentHelper.getContent(crawlingConfig, responseData, body, dataMap);
|
||||
if (StringUtil.isNotBlank(fileName) && fessConfig.isCrawlerDocumentAppendFilename()) {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), content + " " + fileName);
|
||||
} else {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), content);
|
||||
}
|
||||
}
|
||||
|
||||
protected CrawlingConfig getCrawlingConfig(final ResponseData responseData) {
|
||||
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
|
||||
return crawlingConfigHelper.get(responseData.getSessionId());
|
||||
|
|
|
@ -325,6 +325,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. true */
|
||||
String CRAWLER_DOCUMENT_APPEND_DATA = "crawler.document.append.data";
|
||||
|
||||
/** The key of the configuration. e.g. false */
|
||||
String CRAWLER_DOCUMENT_APPEND_FILENAME = "crawler.document.append.filename";
|
||||
|
||||
/** The key of the configuration. e.g. 20 */
|
||||
String CRAWLER_DOCUMENT_MAX_ALPHANUM_TERM_SIZE = "crawler.document.max.alphanum.term.size";
|
||||
|
||||
|
@ -1164,7 +1167,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. org.codelibs,org.dbflute,org.lastaflute */
|
||||
String LOGGING_APP_PACKAGES = "logging.app.packages";
|
||||
|
||||
/** The key of the configuration. e.g. 4000 */
|
||||
/** The key of the configuration. e.g. 10000 */
|
||||
String FORM_ADMIN_MAX_INPUT_SIZE = "form.admin.max.input.size";
|
||||
|
||||
/** The key of the configuration. e.g. false */
|
||||
|
@ -2651,6 +2654,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
boolean isCrawlerDocumentAppendData();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.append.filename'. <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentAppendFilename();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'crawler.document.append.filename' true? <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isCrawlerDocumentAppendFilename();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.max.alphanum.term.size'. <br>
|
||||
* The value is, e.g. 20 <br>
|
||||
|
@ -5422,14 +5439,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
|
||||
/**
|
||||
* Get the value for the key 'form.admin.max.input.size'. <br>
|
||||
* The value is, e.g. 4000 <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getFormAdminMaxInputSize();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'form.admin.max.input.size' as {@link Integer}. <br>
|
||||
* The value is, e.g. 4000 <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
|
@ -8009,6 +8026,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return is(FessConfig.CRAWLER_DOCUMENT_APPEND_DATA);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentAppendFilename() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_APPEND_FILENAME);
|
||||
}
|
||||
|
||||
public boolean isCrawlerDocumentAppendFilename() {
|
||||
return is(FessConfig.CRAWLER_DOCUMENT_APPEND_FILENAME);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentMaxAlphanumTermSize() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_MAX_ALPHANUM_TERM_SIZE);
|
||||
}
|
||||
|
@ -10727,6 +10752,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_UNKNOWN_HOSTNAME, "unknown");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_USE_SITE_ENCODING_ON_ENGLISH, "false");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_APPEND_DATA, "true");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_APPEND_FILENAME, "false");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_MAX_ALPHANUM_TERM_SIZE, "20");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_MAX_SYMBOL_TERM_SIZE, "10");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED, "false");
|
||||
|
@ -10978,7 +11004,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
"filetype,created,click_count,title,doc_id,url,score,site,filename,host,digest,boost,mimetype,favorite_count,_id,lang,last_modified,content_length,timestamp");
|
||||
defaultMap.put(FessConfig.LOGGING_SEARCH_USE_LOGFILE, "true");
|
||||
defaultMap.put(FessConfig.LOGGING_APP_PACKAGES, "org.codelibs,org.dbflute,org.lastaflute");
|
||||
defaultMap.put(FessConfig.FORM_ADMIN_MAX_INPUT_SIZE, "4000");
|
||||
defaultMap.put(FessConfig.FORM_ADMIN_MAX_INPUT_SIZE, "10000");
|
||||
defaultMap.put(FessConfig.FORM_ADMIN_LABEL_IN_CONFIG_ENABLED, "false");
|
||||
defaultMap.put(FessConfig.FORM_ADMIN_DEFAULT_TEMPLATE_NAME, "__TEMPLATE__");
|
||||
defaultMap.put(FessConfig.OSDD_LINK_ENABLED, "true");
|
||||
|
|
|
@ -205,6 +205,7 @@ crawler.document.site.encoding=UTF-8
|
|||
crawler.document.unknown.hostname=unknown
|
||||
crawler.document.use.site.encoding.on.english=false
|
||||
crawler.document.append.data=true
|
||||
crawler.document.append.filename=false
|
||||
crawler.document.max.alphanum.term.size=20
|
||||
crawler.document.max.symbol.term.size=10
|
||||
crawler.document.duplicate.term.removed=false
|
||||
|
|
Loading…
Add table
Reference in a new issue