diff --git a/plugin.xml b/plugin.xml index 064f40dfc..52b145b04 100644 --- a/plugin.xml +++ b/plugin.xml @@ -40,8 +40,8 @@ - - + + diff --git a/pom.xml b/pom.xml index f8e7aeb07..522b1d57b 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ 0.6.0F - 1.0.6 + 1.0.7 2.1.1 @@ -769,7 +769,12 @@ org.codelibs corelib - 0.3.3 + 0.3.4 + + + commons-io + commons-io + 2.4 org.apache.commons diff --git a/src/main/java/org/codelibs/fess/Constants.java b/src/main/java/org/codelibs/fess/Constants.java index 82a560bd8..1878c43e1 100644 --- a/src/main/java/org/codelibs/fess/Constants.java +++ b/src/main/java/org/codelibs/fess/Constants.java @@ -218,8 +218,6 @@ public class Constants extends CoreLibConstants { public static final String INDEXING_TARGET = "indexingTarget"; - public static final String DIGEST_PREFIX = "..."; - public static final String BASIC = "BASIC"; public static final String DIGEST = "DIGEST"; @@ -376,4 +374,12 @@ public class Constants extends CoreLibConstants { public static final String PAGING_QUERY_LIST = "pagingQueryList"; public static final String REQUEST_LANGUAGES = "requestLanguages"; + + public static final String SEARCH_PREFERENCE_PRIMARY = "_primary"; + + public static final String CONFIG_IGNORE_FAILURE_URLS = "ignore.failureUrls"; + + public static final String CONFIG_CLEANUP_FILTERS = "cleanup.urlFilters"; + + public static final String CONFIG_CLEANUP_ALL = "cleanup.all"; } diff --git a/src/main/java/org/codelibs/fess/api/json/JsonApiManager.java b/src/main/java/org/codelibs/fess/api/json/JsonApiManager.java index a8470c1ec..5e7b59c94 100644 --- a/src/main/java/org/codelibs/fess/api/json/JsonApiManager.java +++ b/src/main/java/org/codelibs/fess/api/json/JsonApiManager.java @@ -16,7 +16,6 @@ package org.codelibs.fess.api.json; import java.io.IOException; -import java.io.StringWriter; import java.net.URLDecoder; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -57,6 +56,7 @@ import org.codelibs.fess.util.DocumentUtil; import org.codelibs.fess.util.FacetResponse; import org.codelibs.fess.util.FacetResponse.Field; import org.codelibs.fess.util.StreamUtil; +import org.dbflute.optional.OptionalThing; import org.elasticsearch.script.Script; import org.lastaflute.web.util.LaRequestUtil; import org.slf4j.Logger; @@ -140,12 +140,12 @@ public class JsonApiManager extends BaseApiManager { int status = 0; String errMsg = StringUtil.EMPTY; String query = null; - final StringBuilder buf = new StringBuilder(1000); + final StringBuilder buf = new StringBuilder(1000); // TODO replace response stream request.setAttribute(Constants.SEARCH_LOG_ACCESS_TYPE, Constants.SEARCH_LOG_ACCESS_TYPE_JSON); try { final SearchRenderData data = new SearchRenderData(); final SearchApiRequestParams params = new SearchApiRequestParams(request, fessConfig); - searchService.search(request, params, data); + searchService.search(request, params, data, OptionalThing.empty()); query = params.getQuery(); final String execTime = data.getExecTime(); final String queryTime = Long.toString(data.getQueryTime()); @@ -277,7 +277,7 @@ public class JsonApiManager extends BaseApiManager { int status = 0; String errMsg = StringUtil.EMPTY; - final StringBuilder buf = new StringBuilder(255); + final StringBuilder buf = new StringBuilder(255); // TODO replace response stream try { final List> labelTypeItems = labelTypeHelper.getLabelTypeItemList(); buf.append("\"record_count\":"); @@ -327,7 +327,7 @@ public class JsonApiManager extends BaseApiManager { int status = 0; String errMsg = StringUtil.EMPTY; - final StringBuilder buf = new StringBuilder(255); + final StringBuilder buf = new StringBuilder(255); // TODO replace response stream try { final List popularWordList = popularWordHelper.getWordList(seed, tags, null, fields, excludes); @@ -379,52 +379,53 @@ public class JsonApiManager extends BaseApiManager { throw new WebApiException(6, "No searched urls."); } - searchService.getDocumentByDocId(docId, new String[] { fessConfig.getIndexFieldUrl() }).ifPresent(doc -> { - final String favoriteUrl = DocumentUtil.getValue(doc, fessConfig.getIndexFieldUrl(), String.class); - final String userCode = userInfoHelper.getUserCode(); + searchService.getDocumentByDocId(docId, new String[] { fessConfig.getIndexFieldUrl() }, OptionalThing.empty()) + .ifPresent(doc -> { + final String favoriteUrl = DocumentUtil.getValue(doc, fessConfig.getIndexFieldUrl(), String.class); + final String userCode = userInfoHelper.getUserCode(); - if (StringUtil.isBlank(userCode)) { - throw new WebApiException(2, "No user session."); - } else if (StringUtil.isBlank(favoriteUrl)) { - throw new WebApiException(2, "URL is null."); - } + if (StringUtil.isBlank(userCode)) { + throw new WebApiException(2, "No user session."); + } else if (StringUtil.isBlank(favoriteUrl)) { + throw new WebApiException(2, "URL is null."); + } - boolean found = false; - for (final String id : docIds) { - if (docId.equals(id)) { - found = true; - break; - } - } - if (!found) { - throw new WebApiException(5, "Not found: " + favoriteUrl); - } + boolean found = false; + for (final String id : docIds) { + if (docId.equals(id)) { + found = true; + break; + } + } + if (!found) { + throw new WebApiException(5, "Not found: " + favoriteUrl); + } - if (!favoriteLogService.addUrl(userCode, (userInfo, favoriteLog) -> { - favoriteLog.setUserInfoId(userInfo.getId()); - favoriteLog.setUrl(favoriteUrl); - favoriteLog.setDocId(docId); - favoriteLog.setQueryId(queryId); - favoriteLog.setCreatedAt(systemHelper.getCurrentTimeAsLocalDateTime()); - })) { - throw new WebApiException(4, "Failed to add url: " + favoriteUrl); - } + if (!favoriteLogService.addUrl(userCode, (userInfo, favoriteLog) -> { + favoriteLog.setUserInfoId(userInfo.getId()); + favoriteLog.setUrl(favoriteUrl); + favoriteLog.setDocId(docId); + favoriteLog.setQueryId(queryId); + favoriteLog.setCreatedAt(systemHelper.getCurrentTimeAsLocalDateTime()); + })) { + throw new WebApiException(4, "Failed to add url: " + favoriteUrl); + } - final String id = DocumentUtil.getValue(doc, fessConfig.getIndexFieldId(), String.class); - searchService.update(id, builder -> { - final Script script = new Script("ctx._source." + fessConfig.getIndexFieldFavoriteCount() + "+=1"); - builder.setScript(script); - final Map upsertMap = new HashMap<>(); - upsertMap.put(fessConfig.getIndexFieldFavoriteCount(), 1); - builder.setUpsert(upsertMap); - builder.setRefresh(true); - }); + final String id = DocumentUtil.getValue(doc, fessConfig.getIndexFieldId(), String.class); + searchService.update(id, builder -> { + final Script script = new Script("ctx._source." + fessConfig.getIndexFieldFavoriteCount() + "+=1"); + builder.setScript(script); + final Map upsertMap = new HashMap<>(); + upsertMap.put(fessConfig.getIndexFieldFavoriteCount(), 1); + builder.setUpsert(upsertMap); + builder.setRefresh(true); + }); - writeJsonResponse(0, "\"result\":\"ok\"", null); + writeJsonResponse(0, "\"result\":\"ok\"", null); - }).orElse(() -> { - throw new WebApiException(6, "Not found: " + docId); - }); + }).orElse(() -> { + throw new WebApiException(6, "Not found: " + docId); + }); } catch (final Exception e) { int status; @@ -471,7 +472,7 @@ public class JsonApiManager extends BaseApiManager { searchService.getDocumentListByDocIds( docIds, new String[] { fessConfig.getIndexFieldUrl(), fessConfig.getIndexFieldDocId(), - fessConfig.getIndexFieldFavoriteCount() }); + fessConfig.getIndexFieldFavoriteCount() }, OptionalThing.empty()); List urlList = new ArrayList<>(docList.size()); for (final Map doc : docList) { final String urlObj = DocumentUtil.getValue(doc, fessConfig.getIndexFieldUrl(), String.class); @@ -491,7 +492,7 @@ public class JsonApiManager extends BaseApiManager { } } - final StringBuilder buf = new StringBuilder(); + final StringBuilder buf = new StringBuilder(255); // TODO replace response stream buf.append("\"num\":").append(docIdList.size()); if (!docIdList.isEmpty()) { buf.append(", \"doc_ids\":["); @@ -601,7 +602,7 @@ public class JsonApiManager extends BaseApiManager { protected static String escapeJsonString(final String str) { - final StringWriter out = new StringWriter(str.length() * 2); + final StringBuilder out = new StringBuilder(str.length() * 2); int sz; sz = str.length(); for (int i = 0; i < sz; i++) { @@ -609,59 +610,59 @@ public class JsonApiManager extends BaseApiManager { // handle unicode if (ch > 0xfff) { - out.write("\\u"); - out.write(hex(ch)); + out.append("\\u"); + out.append(hex(ch)); } else if (ch > 0xff) { - out.write("\\u0"); - out.write(hex(ch)); + out.append("\\u0"); + out.append(hex(ch)); } else if (ch > 0x7f) { - out.write("\\u00"); - out.write(hex(ch)); + out.append("\\u00"); + out.append(hex(ch)); } else if (ch < 32) { switch (ch) { case '\b': - out.write('\\'); - out.write('b'); + out.append('\\'); + out.append('b'); break; case '\n': - out.write('\\'); - out.write('n'); + out.append('\\'); + out.append('n'); break; case '\t': - out.write('\\'); - out.write('t'); + out.append('\\'); + out.append('t'); break; case '\f': - out.write('\\'); - out.write('f'); + out.append('\\'); + out.append('f'); break; case '\r': - out.write('\\'); - out.write('r'); + out.append('\\'); + out.append('r'); break; default: if (ch > 0xf) { - out.write("\\u00"); - out.write(hex(ch)); + out.append("\\u00"); + out.append(hex(ch)); } else { - out.write("\\u000"); - out.write(hex(ch)); + out.append("\\u000"); + out.append(hex(ch)); } break; } } else { switch (ch) { case '"': - out.write("\\u0022"); + out.append("\\u0022"); break; case '\\': - out.write("\\u005C"); + out.append("\\u005C"); break; case '/': - out.write("\\u002F"); + out.append("\\u002F"); break; default: - out.write(ch); + out.append(ch); break; } } diff --git a/src/main/java/org/codelibs/fess/api/suggest/SuggestApiManager.java b/src/main/java/org/codelibs/fess/api/suggest/SuggestApiManager.java index ca9dd1ef3..21699a2b2 100644 --- a/src/main/java/org/codelibs/fess/api/suggest/SuggestApiManager.java +++ b/src/main/java/org/codelibs/fess/api/suggest/SuggestApiManager.java @@ -59,7 +59,7 @@ public class SuggestApiManager extends BaseApiManager { ServletException { int status = 0; String errMsg = StringUtil.EMPTY; - final StringBuilder buf = new StringBuilder(255); + final StringBuilder buf = new StringBuilder(255); // TODO replace response stream final RoleQueryHelper roleQueryHelper = ComponentUtil.getRoleQueryHelper(); try { diff --git a/src/main/java/org/codelibs/fess/app/job/ScriptExecutorJob.java b/src/main/java/org/codelibs/fess/app/job/ScriptExecutorJob.java index 82c04e934..5ed3a95c9 100644 --- a/src/main/java/org/codelibs/fess/app/job/ScriptExecutorJob.java +++ b/src/main/java/org/codelibs/fess/app/job/ScriptExecutorJob.java @@ -23,7 +23,6 @@ import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.job.JobExecutor; import org.codelibs.fess.job.ScheduledJobException; import org.codelibs.fess.util.ComponentUtil; -import org.lastaflute.di.core.SingletonLaContainer; import org.lastaflute.job.JobManager; import org.lastaflute.job.LaJob; import org.lastaflute.job.LaJobRuntime; @@ -94,7 +93,7 @@ public class ScriptExecutorJob implements LaJob { } private void storeJobLog(final JobLog jobLog) { - final JobLogService jobLogService = SingletonLaContainer.getComponent(JobLogService.class); + final JobLogService jobLogService = ComponentUtil.getComponent(JobLogService.class); jobLogService.store(jobLog); } diff --git a/src/main/java/org/codelibs/fess/app/service/FailureUrlService.java b/src/main/java/org/codelibs/fess/app/service/FailureUrlService.java index cce3c8ec4..629ee426d 100644 --- a/src/main/java/org/codelibs/fess/app/service/FailureUrlService.java +++ b/src/main/java/org/codelibs/fess/app/service/FailureUrlService.java @@ -17,7 +17,6 @@ package org.codelibs.fess.app.service; import java.io.PrintWriter; import java.io.Serializable; -import java.io.StringWriter; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -25,6 +24,7 @@ import java.util.regex.Pattern; import javax.annotation.Resource; +import org.apache.commons.io.output.StringBuilderWriter; import org.apache.commons.lang3.StringUtils; import org.codelibs.core.beans.util.BeanUtil; import org.codelibs.core.lang.StringUtil; @@ -41,7 +41,6 @@ import org.codelibs.fess.util.ComponentUtil; import org.dbflute.cbean.result.ListResultBean; import org.dbflute.cbean.result.PagingResultBean; import org.dbflute.optional.OptionalEntity; -import org.lastaflute.di.core.SingletonLaContainer; public class FailureUrlService implements Serializable { @@ -171,7 +170,7 @@ public class FailureUrlService implements Serializable { } public void store(final CrawlingConfig crawlingConfig, final String errorName, final String url, final Throwable e) { - final FailureUrlBhv bhv = SingletonLaContainer.getComponent(FailureUrlBhv.class); + final FailureUrlBhv bhv = ComponentUtil.getComponent(FailureUrlBhv.class); FailureUrl failureUrl = bhv.selectEntity(cb -> { cb.query().setUrl_Equal(url); if (crawlingConfig != null) { @@ -201,7 +200,7 @@ public class FailureUrlService implements Serializable { private String getStackTrace(final Throwable t) { final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); - final StringWriter sw = new StringWriter(); + final StringBuilderWriter sw = new StringBuilderWriter(); final PrintWriter pw = new PrintWriter(sw, true); t.printStackTrace(pw); return systemHelper.abbreviateLongText(sw.toString()); diff --git a/src/main/java/org/codelibs/fess/app/service/SearchService.java b/src/main/java/org/codelibs/fess/app/service/SearchService.java index 39e3e509e..6ee922bed 100644 --- a/src/main/java/org/codelibs/fess/app/service/SearchService.java +++ b/src/main/java/org/codelibs/fess/app/service/SearchService.java @@ -39,11 +39,13 @@ import org.codelibs.fess.es.client.FessEsClient.SearchConditionBuilder; import org.codelibs.fess.es.client.FessEsClientException; import org.codelibs.fess.helper.QueryHelper; import org.codelibs.fess.helper.SystemHelper; +import org.codelibs.fess.mylasta.action.FessUserBean; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.QueryResponseList; import org.codelibs.fess.util.QueryStringBuilder; import org.dbflute.optional.OptionalEntity; +import org.dbflute.optional.OptionalThing; import org.dbflute.util.DfTypeUtil; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.bulk.BulkRequestBuilder; @@ -81,7 +83,8 @@ public class SearchService { // Method // ============== - public void search(final HttpServletRequest request, final SearchRequestParams params, final SearchRenderData data) { + public void search(final HttpServletRequest request, final SearchRequestParams params, final SearchRenderData data, + final OptionalThing userBean) { final long requestedTime = systemHelper.getCurrentTimeAsLong(); final long startTime = System.currentTimeMillis(); @@ -98,6 +101,7 @@ public class SearchService { fessConfig.getIndexDocumentSearchIndex(), fessConfig.getIndexDocumentType(), searchRequestBuilder -> { + fessConfig.processSearchPreference(searchRequestBuilder, userBean); return SearchConditionBuilder.builder(searchRequestBuilder) .query(StringUtil.isBlank(sortField) ? query : query + " sort:" + sortField).offset(pageStart) .size(pageSize).facetInfo(params.getFacetInfo()).geoInfo(params.getGeoInfo()) @@ -212,19 +216,23 @@ public class SearchService { return StringUtil.EMPTY_STRINGS; } - public OptionalEntity> getDocumentByDocId(final String docId, final String[] fields) { + public OptionalEntity> getDocumentByDocId(final String docId, final String[] fields, + final OptionalThing userBean) { return fessEsClient.getDocument(fessConfig.getIndexDocumentSearchIndex(), fessConfig.getIndexDocumentType(), builder -> { builder.setQuery(QueryBuilders.termQuery(fessConfig.getIndexFieldDocId(), docId)); builder.addFields(fields); + fessConfig.processSearchPreference(builder, userBean); return true; }); } - public List> getDocumentListByDocIds(final String[] docIds, final String[] fields) { + public List> getDocumentListByDocIds(final String[] docIds, final String[] fields, + final OptionalThing userBean) { return fessEsClient.getDocumentList(fessConfig.getIndexDocumentSearchIndex(), fessConfig.getIndexDocumentType(), builder -> { builder.setQuery(QueryBuilders.termsQuery(fessConfig.getIndexFieldDocId(), docIds)); builder.setSize(fessConfig.getPagingSearchPageMaxSizeAsInteger().intValue()); builder.addFields(fields); + fessConfig.processSearchPreference(builder, userBean); return true; }); } diff --git a/src/main/java/org/codelibs/fess/app/web/admin/searchlist/AdminSearchlistAction.java b/src/main/java/org/codelibs/fess/app/web/admin/searchlist/AdminSearchlistAction.java index a9c8bb1b5..11f991fc1 100644 --- a/src/main/java/org/codelibs/fess/app/web/admin/searchlist/AdminSearchlistAction.java +++ b/src/main/java/org/codelibs/fess/app/web/admin/searchlist/AdminSearchlistAction.java @@ -124,7 +124,7 @@ public class AdminSearchlistAction extends FessAdminAction { final WebRenderData renderData = new WebRenderData(); form.initialize(); try { - searchService.search(request, form, renderData); + searchService.search(request, form, renderData, getUserBean()); return asListHtml().renderWith(data -> { renderData.register(data); }); diff --git a/src/main/java/org/codelibs/fess/app/web/cache/CacheAction.java b/src/main/java/org/codelibs/fess/app/web/cache/CacheAction.java index 1d33e6bd1..eddd7a28c 100644 --- a/src/main/java/org/codelibs/fess/app/web/cache/CacheAction.java +++ b/src/main/java/org/codelibs/fess/app/web/cache/CacheAction.java @@ -61,6 +61,7 @@ public class CacheAction extends FessSearchAction { final TermQueryBuilder termQuery = QueryBuilders.termQuery(fessConfig.getIndexFieldDocId(), form.docId); queryRequestBuilder.setQuery(termQuery); queryRequestBuilder.addFields(queryHelper.getCacheResponseFields()); + fessConfig.processSearchPreference(queryRequestBuilder, getUserBean()); return true; }).orElse(null); } catch (final Exception e) { diff --git a/src/main/java/org/codelibs/fess/app/web/cache/CacheForm.java b/src/main/java/org/codelibs/fess/app/web/cache/CacheForm.java index 28d0b569c..491987b67 100644 --- a/src/main/java/org/codelibs/fess/app/web/cache/CacheForm.java +++ b/src/main/java/org/codelibs/fess/app/web/cache/CacheForm.java @@ -16,6 +16,8 @@ package org.codelibs.fess.app.web.cache; import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; import javax.validation.constraints.Size; @@ -40,4 +42,5 @@ public class CacheForm implements Serializable { public String lang; + public Map fields = new HashMap<>(); } diff --git a/src/main/java/org/codelibs/fess/app/web/go/GoAction.java b/src/main/java/org/codelibs/fess/app/web/go/GoAction.java index 36ac914c8..d3b21add4 100644 --- a/src/main/java/org/codelibs/fess/app/web/go/GoAction.java +++ b/src/main/java/org/codelibs/fess/app/web/go/GoAction.java @@ -79,6 +79,7 @@ public class GoAction extends FessSearchAction { final TermQueryBuilder termQuery = QueryBuilders.termQuery(fessConfig.getIndexFieldDocId(), form.docId); queryRequestBuilder.setQuery(termQuery); queryRequestBuilder.addFields(fessConfig.getIndexFieldUrl(), fessConfig.getIndexFieldConfigId()); + fessConfig.processSearchPreference(queryRequestBuilder, getUserBean()); return true; }).orElse(null); } catch (final Exception e) { diff --git a/src/main/java/org/codelibs/fess/app/web/go/GoForm.java b/src/main/java/org/codelibs/fess/app/web/go/GoForm.java index 3aa8a8064..42fdc8e3b 100644 --- a/src/main/java/org/codelibs/fess/app/web/go/GoForm.java +++ b/src/main/java/org/codelibs/fess/app/web/go/GoForm.java @@ -15,6 +15,9 @@ */ package org.codelibs.fess.app.web.go; +import java.util.HashMap; +import java.util.Map; + import javax.validation.constraints.Size; import org.lastaflute.web.validation.Required; @@ -44,4 +47,6 @@ public class GoForm { public String sort; public String lang; + + public Map fields = new HashMap<>(); } diff --git a/src/main/java/org/codelibs/fess/app/web/screenshot/ScreenshotAction.java b/src/main/java/org/codelibs/fess/app/web/screenshot/ScreenshotAction.java index ea8757cad..3b52f6324 100644 --- a/src/main/java/org/codelibs/fess/app/web/screenshot/ScreenshotAction.java +++ b/src/main/java/org/codelibs/fess/app/web/screenshot/ScreenshotAction.java @@ -59,6 +59,7 @@ public class ScreenshotAction extends FessSearchAction { final TermQueryBuilder termQuery = QueryBuilders.termQuery(fessConfig.getIndexFieldDocId(), form.docId); queryRequestBuilder.setQuery(termQuery); queryRequestBuilder.addFields(queryHelper.getResponseFields()); + fessConfig.processSearchPreference(queryRequestBuilder, getUserBean()); return true; }).orElse(null); final String url = DocumentUtil.getValue(doc, fessConfig.getIndexFieldUrl(), String.class); diff --git a/src/main/java/org/codelibs/fess/app/web/screenshot/ScreenshotForm.java b/src/main/java/org/codelibs/fess/app/web/screenshot/ScreenshotForm.java index 4b481f0af..7ebc06dd1 100644 --- a/src/main/java/org/codelibs/fess/app/web/screenshot/ScreenshotForm.java +++ b/src/main/java/org/codelibs/fess/app/web/screenshot/ScreenshotForm.java @@ -16,6 +16,8 @@ package org.codelibs.fess.app.web.screenshot; import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; import javax.validation.constraints.Size; @@ -41,4 +43,5 @@ public class ScreenshotForm implements Serializable { public String lang; + public Map fields = new HashMap<>(); } diff --git a/src/main/java/org/codelibs/fess/app/web/search/SearchAction.java b/src/main/java/org/codelibs/fess/app/web/search/SearchAction.java index 552a78c66..4db1663da 100644 --- a/src/main/java/org/codelibs/fess/app/web/search/SearchAction.java +++ b/src/main/java/org/codelibs/fess/app/web/search/SearchAction.java @@ -122,7 +122,7 @@ public class SearchAction extends FessSearchAction { form.lang = searchService.getLanguages(request, form); request.setAttribute(Constants.REQUEST_LANGUAGES, form.lang); final WebRenderData renderData = new WebRenderData(); - searchService.search(request, form, renderData); + searchService.search(request, form, renderData, getUserBean()); return asHtml(path_SearchJsp).renderWith(data -> { renderData.register(data); // favorite or screenshot diff --git a/src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java b/src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java index 09e476951..a08378f08 100644 --- a/src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java +++ b/src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java @@ -36,6 +36,7 @@ import org.codelibs.fess.crawler.entity.UrlQueue; import org.codelibs.fess.crawler.log.LogType; import org.codelibs.fess.es.client.FessEsClient; import org.codelibs.fess.es.config.exentity.CrawlingConfig; +import org.codelibs.fess.exception.ContainerNotAvailableException; import org.codelibs.fess.helper.CrawlingConfigHelper; import org.codelibs.fess.helper.CrawlingInfoHelper; import org.codelibs.fess.helper.IndexingHelper; @@ -72,7 +73,8 @@ public class FessCrawlerThread extends CrawlerThread { final Map dataMap = new HashMap(); dataMap.put(fessConfig.getIndexFieldUrl(), url); final List roleTypeList = new ArrayList(); - for (final String roleType : crawlingConfig.getRoleTypeValues()) { + final String[] roleTypeValues = crawlingConfig.getRoleTypeValues(); + for (final String roleType : roleTypeValues) { roleTypeList.add(roleType); } if (url.startsWith("smb://")) { @@ -181,8 +183,15 @@ public class FessCrawlerThread extends CrawlerThread { protected void storeChildUrlsToQueue(final UrlQueue urlQueue, final Set childUrlSet) { if (childUrlSet != null) { // add an url - storeChildUrls(childUrlSet.stream().filter(rd -> StringUtil.isNotBlank(rd.getUrl())).collect(Collectors.toSet()), - urlQueue.getUrl(), urlQueue.getDepth() != null ? urlQueue.getDepth() + 1 : 1); + try { + storeChildUrls(childUrlSet.stream().filter(rd -> StringUtil.isNotBlank(rd.getUrl())).collect(Collectors.toSet()), + urlQueue.getUrl(), urlQueue.getDepth() != null ? urlQueue.getDepth() + 1 : 1); + } catch (Throwable t) { + if (!ComponentUtil.available()) { + throw new ContainerNotAvailableException(t); + } + throw t; + } } } diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java index aacb8e579..bc29a22de 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java @@ -16,7 +16,6 @@ package org.codelibs.fess.crawler.transformer; import java.io.InputStream; -import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.util.ArrayList; import java.util.Collections; @@ -47,10 +46,12 @@ import org.codelibs.fess.crawler.exception.CrawlingAccessException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer; import org.codelibs.fess.crawler.util.CrawlingParameterUtil; +import org.codelibs.fess.crawler.util.UnsafeStringBuilder; import org.codelibs.fess.es.config.exentity.CrawlingConfig; import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName; import org.codelibs.fess.helper.CrawlingConfigHelper; import org.codelibs.fess.helper.CrawlingInfoHelper; +import org.codelibs.fess.helper.DocumentHelper; import org.codelibs.fess.helper.FileTypeHelper; import org.codelibs.fess.helper.LabelTypeHelper; import org.codelibs.fess.helper.PathMappingHelper; @@ -101,7 +102,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im final String mimeType = responseData.getMimeType(); params.put(HttpHeaders.CONTENT_TYPE, mimeType); params.put(HttpHeaders.CONTENT_ENCODING, responseData.getCharSet()); - final StringBuilder contentMetaBuf = new StringBuilder(1000); + final UnsafeStringBuilder contentMetaBuf = new UnsafeStringBuilder(1000); final Map dataMap = new HashMap(); final Map metaDataMap = new HashMap<>(); String content; @@ -163,7 +164,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im if (content == null) { content = StringUtil.EMPTY; } - final String contentMeta = contentMetaBuf.toString(); + final String contentMeta = contentMetaBuf.toUnsafeString().trim(); final FessConfig fessConfig = ComponentUtil.getFessConfig(); final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper(); @@ -174,6 +175,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im final Date documentExpires = crawlingInfoHelper.getDocumentExpires(crawlingConfig); final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); final FileTypeHelper fileTypeHelper = ComponentUtil.getFileTypeHelper(); + final DocumentHelper documentHelper = ComponentUtil.getDocumentHelper(); String url = responseData.getUrl(); final String indexingTarget = crawlingConfig.getIndexingTarget(url); url = pathMappingHelper.replaceUrl(sessionId, url); @@ -200,7 +202,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im // segment putResultDataBody(dataMap, fessConfig.getIndexFieldSegment(), sessionId); // content - final StringBuilder buf = new StringBuilder(content.length() + 1000); + final UnsafeStringBuilder buf = new UnsafeStringBuilder(content.length() + 1000); if (fessConfig.isCrawlerDocumentFileAppendBodyContent()) { buf.append(content); } @@ -210,12 +212,9 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im } buf.append(contentMeta); } - final String body = normalizeContent(buf.toString()); - if (StringUtil.isNotBlank(body)) { - putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), body); - } else { - putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), StringUtil.EMPTY); - } + final String bodyBase = buf.toUnsafeString().trim(); + final String body = documentHelper.getContent(responseData, bodyBase, dataMap); + putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), body); if ((Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache())) || fessConfig .isCrawlerDocumentCacheEnabled()) && fessConfig.isSupportedDocumentCacheMimetypes(mimeType)) { if (responseData.getContentLength() > 0 @@ -229,14 +228,16 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im } // digest putResultDataBody(dataMap, fessConfig.getIndexFieldDigest(), - Constants.DIGEST_PREFIX - + abbreviate(normalizeContent(content), fessConfig.getCrawlerDocumentFileMaxDigestLengthAsInteger())); + documentHelper.getDigest(responseData, bodyBase, dataMap, fessConfig.getCrawlerDocumentFileMaxDigestLengthAsInteger())); // title if (!dataMap.containsKey(fessConfig.getIndexFieldTitle())) { if (url.endsWith("/")) { if (StringUtil.isNotBlank(content)) { - putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(), - abbreviate(body, fessConfig.getCrawlerDocumentFileMaxTitleLengthAsInteger())); + putResultDataBody( + dataMap, + fessConfig.getIndexFieldTitle(), + documentHelper.getDigest(responseData, body, dataMap, + fessConfig.getCrawlerDocumentFileMaxTitleLengthAsInteger())); } else { putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(), fessConfig.getCrawlerDocumentFileNoTitleLabel()); } @@ -332,18 +333,6 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im return dataMap; } - protected String abbreviate(final String str, final int maxWidth) { - String newStr = StringUtils.abbreviate(str, maxWidth); - try { - if (newStr.getBytes(Constants.UTF_8).length > maxWidth + fessConfig.getCrawlerDocumentFileAbbreviationMarginLengthAsInteger()) { - newStr = StringUtils.abbreviate(str, maxWidth / 2); - } - } catch (final UnsupportedEncodingException e) { - // NOP - } - return newStr; - } - private String getResourceName(final ResponseData responseData) { String name = responseData.getUrl(); final String enc = responseData.getCharSet(); diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java index 2bfb0555e..0e5708c55 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessTikaTransformer.java @@ -22,7 +22,6 @@ import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.exception.FessSystemException; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; -import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,7 +45,7 @@ public class FessTikaTransformer extends AbstractFessFileTransformer { @Override protected Extractor getExtractor(final ResponseData responseData) { - final Extractor extractor = SingletonLaContainer.getComponent("tikaExtractor"); + final Extractor extractor = ComponentUtil.getComponent("tikaExtractor"); if (extractor == null) { throw new FessSystemException("Could not find tikaExtractor."); } diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java index 480b42cbf..4527add67 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java @@ -100,13 +100,6 @@ public interface FessTransformer { return StringUtils.abbreviate(url, getMaxSiteLength()); } - public default String normalizeContent(final String content) { - if (content == null) { - return StringUtil.EMPTY; // empty - } - return content.replaceAll("\\s+", " "); - } - public default void putResultDataBody(final Map dataMap, final String key, final Object value) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); if (fessConfig.getIndexFieldUrl().equals(key)) { diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java index 6fdb2bde1..f567024b9 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java @@ -30,7 +30,6 @@ import java.util.Set; import javax.annotation.PostConstruct; import javax.xml.transform.TransformerException; -import org.apache.commons.lang3.StringUtils; import org.apache.xpath.objects.XObject; import org.codelibs.core.io.InputStreamUtil; import org.codelibs.core.io.SerializeUtil; @@ -47,10 +46,12 @@ import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.CrawlingAccessException; import org.codelibs.fess.crawler.transformer.impl.XpathTransformer; import org.codelibs.fess.crawler.util.CrawlingParameterUtil; +import org.codelibs.fess.crawler.util.UnsafeStringBuilder; import org.codelibs.fess.es.config.exentity.CrawlingConfig; import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName; import org.codelibs.fess.helper.CrawlingConfigHelper; import org.codelibs.fess.helper.CrawlingInfoHelper; +import org.codelibs.fess.helper.DocumentHelper; import org.codelibs.fess.helper.DuplicateHostHelper; import org.codelibs.fess.helper.FileTypeHelper; import org.codelibs.fess.helper.LabelTypeHelper; @@ -71,7 +72,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf private static final int UTF8_BOM_SIZE = 3; - public boolean prunedCacheContent = true; + public boolean prunedContent = true; public Map convertUrlMap = new HashMap<>(); @@ -177,6 +178,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf final Date documentExpires = crawlingInfoHelper.getDocumentExpires(crawlingConfig); final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); final FileTypeHelper fileTypeHelper = ComponentUtil.getFileTypeHelper(); + final DocumentHelper documentHelper = ComponentUtil.getDocumentHelper(); String url = responseData.getUrl(); final String indexingTarget = crawlingConfig.getIndexingTarget(url); url = pathMappingHelper.replaceUrl(sessionId, url); @@ -208,7 +210,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf } // title // content - putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), getDocumentContent(responseData, document)); + final String body = getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlContentXpath(), prunedContent); + putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), documentHelper.getContent(responseData, body, dataMap)); if ((Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache())) || fessConfig .isCrawlerDocumentCacheEnabled()) && fessConfig.isSupportedDocumentCacheMimetypes(mimeType)) { if (responseData.getContentLength() > 0 @@ -230,7 +233,13 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf } } // digest - putResultDataBody(dataMap, fessConfig.getIndexFieldDigest(), getDocumentDigest(responseData, document)); + final String digest = getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlDigestXpath(), false); + if (StringUtil.isNotBlank(digest)) { + putResultDataBody(dataMap, fessConfig.getIndexFieldDigest(), digest); + } else { + putResultDataBody(dataMap, fessConfig.getIndexFieldDigest(), + documentHelper.getDigest(responseData, body, dataMap, fessConfig.getCrawlerDocumentHtmlMaxDigestLengthAsInteger())); + } // segment putResultDataBody(dataMap, fessConfig.getIndexFieldSegment(), sessionId); // host @@ -314,19 +323,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf return null; } - protected String getDocumentDigest(final ResponseData responseData, final Document document) { - final String digest = getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlDigestXpath(), false); - if (StringUtil.isNotBlank(digest)) { - return digest; - } - - final String body = - normalizeContent(removeCommentTag(getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlContentXpath(), - prunedCacheContent))); - return StringUtils.abbreviate(body, fessConfig.getCrawlerDocumentHtmlMaxDigestLengthAsInteger()); - } - - String removeCommentTag(final String content) { + protected String removeCommentTag(final String content) { if (content == null) { return StringUtil.EMPTY; } @@ -348,18 +345,14 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf return value; } - private String getDocumentContent(final ResponseData responseData, final Document document) { - return normalizeContent(getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlContentXpath(), true)); - } - protected String getSingleNodeValue(final Document document, final String xpath, final boolean pruned) { - StringBuilder buf = null; + UnsafeStringBuilder buf = null; NodeList list = null; try { list = getXPathAPI().selectNodeList(document, xpath); for (int i = 0; i < list.getLength(); i++) { if (buf == null) { - buf = new StringBuilder(1000); + buf = new UnsafeStringBuilder(1000); } else { buf.append(' '); } @@ -377,7 +370,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf if (buf == null) { return null; } - return buf.toString(); + return buf.toUnsafeString().trim(); } protected Node pruneNode(final Node node) { @@ -415,7 +408,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf protected String getMultipleNodeValue(final Document document, final String xpath) { NodeList nodeList = null; - final StringBuilder buf = new StringBuilder(100); + final UnsafeStringBuilder buf = new UnsafeStringBuilder(100); try { nodeList = getXPathAPI().selectNodeList(document, xpath); for (int i = 0; i < nodeList.getLength(); i++) { @@ -426,7 +419,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf } catch (final Exception e) { logger.warn("Could not parse a value of " + xpath); } - return buf.toString(); + return buf.toUnsafeString().trim(); } protected String replaceDuplicateHost(final String url) { diff --git a/src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java index e54416782..14a928f7a 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java @@ -109,7 +109,7 @@ public abstract class AbstractDataStoreImpl implements DataStore { } - protected Object convertValue(final String template, final Map paramMap) { + protected Object convertValue(final String template, final Map paramMap) { if (StringUtil.isEmpty(template)) { return StringUtil.EMPTY; } diff --git a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java index db144a635..0349ef28b 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringEscapeUtils; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.Constants; import org.codelibs.fess.app.service.FailureUrlService; @@ -37,8 +38,8 @@ import org.codelibs.fess.ds.DataStoreCrawlingException; import org.codelibs.fess.ds.DataStoreException; import org.codelibs.fess.ds.IndexUpdateCallback; import org.codelibs.fess.es.config.exentity.DataConfig; +import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.StreamUtil; -import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -260,11 +261,11 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl { } else { url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber(); } - final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class); + final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class); failureUrlService.store(dataConfig, errorName, url, target); } catch (final Exception e) { final String url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber(); - final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class); + final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class); failureUrlService.store(dataConfig, e.getClass().getCanonicalName(), url, e); logger.warn("Crawling Access Exception at : " + dataMap, e); @@ -288,7 +289,7 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl { final String value = paramMap.get(SEPARATOR_CHARACTER_PARAM); if (StringUtil.isNotBlank(value)) { try { - csvConfig.setSeparator(value.charAt(0)); + csvConfig.setSeparator(StringEscapeUtils.unescapeJava(value).charAt(0)); } catch (final Exception e) { logger.warn("Failed to load " + SEPARATOR_CHARACTER_PARAM, e); } diff --git a/src/main/java/org/codelibs/fess/ds/impl/CsvListDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/CsvListDataStoreImpl.java new file mode 100644 index 000000000..1bc1463a2 --- /dev/null +++ b/src/main/java/org/codelibs/fess/ds/impl/CsvListDataStoreImpl.java @@ -0,0 +1,88 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.ds.impl; + +import java.io.File; +import java.util.Map; + +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.ds.DataStoreException; +import org.codelibs.fess.ds.IndexUpdateCallback; +import org.codelibs.fess.es.config.exentity.DataConfig; +import org.codelibs.fess.util.ComponentUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.orangesignal.csv.CsvConfig; + +public class CsvListDataStoreImpl extends CsvDataStoreImpl { + + private static final Logger logger = LoggerFactory.getLogger(CsvListDataStoreImpl.class); + + public boolean deleteProcessedFile = true; + + public long csvFileTimestampMargin = 60 * 1000;// 1min + + public boolean ignoreDataStoreException = true; + + @Override + protected boolean isCsvFile(final File parentFile, final String filename) { + if (super.isCsvFile(parentFile, filename)) { + final File file = new File(parentFile, filename); + final long now = System.currentTimeMillis(); + return now - file.lastModified() > csvFileTimestampMargin; + } + return false; + } + + @Override + protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map paramMap, + final Map scriptMap, final Map defaultDataMap) { + + final CrawlerClientFactory crawlerClientFactory = ComponentUtil.getCrawlerClientFactory(); + dataConfig.initializeClientFactory(crawlerClientFactory); + final FileListIndexUpdateCallbackImpl fileListIndexUpdateCallback = + new FileListIndexUpdateCallbackImpl(callback, crawlerClientFactory); + super.storeData(dataConfig, fileListIndexUpdateCallback, paramMap, scriptMap, defaultDataMap); + fileListIndexUpdateCallback.commit(); + } + + @Override + protected void processCsv(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map paramMap, + final Map scriptMap, final Map defaultDataMap, final CsvConfig csvConfig, final File csvFile, + final long readInterval, final String csvFileEncoding, final boolean hasHeaderLine) { + try { + super.processCsv(dataConfig, callback, paramMap, scriptMap, defaultDataMap, csvConfig, csvFile, readInterval, csvFileEncoding, + hasHeaderLine); + + // delete csv file + if (deleteProcessedFile && !csvFile.delete()) { + logger.warn("Failed to delete {}", csvFile.getAbsolutePath()); + } + } catch (final DataStoreException e) { + if (ignoreDataStoreException) { + logger.error("Failed to process " + csvFile.getAbsolutePath(), e); + // rename csv file, or delete it if failed + if (!csvFile.renameTo(new File(csvFile.getParent(), csvFile.getName() + ".txt")) && !csvFile.delete()) { + logger.warn("Failed to delete {}", csvFile.getAbsolutePath()); + } + } else { + throw e; + } + } + } + +} diff --git a/src/main/java/org/codelibs/fess/ds/impl/EsDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/EsDataStoreImpl.java new file mode 100644 index 000000000..5e4fa5933 --- /dev/null +++ b/src/main/java/org/codelibs/fess/ds/impl/EsDataStoreImpl.java @@ -0,0 +1,248 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.ds.impl; + +import java.net.InetAddress; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.stream.Collectors; + +import org.codelibs.core.lang.StringUtil; +import org.codelibs.fess.Constants; +import org.codelibs.fess.app.service.FailureUrlService; +import org.codelibs.fess.crawler.exception.CrawlingAccessException; +import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException; +import org.codelibs.fess.ds.DataStoreCrawlingException; +import org.codelibs.fess.ds.DataStoreException; +import org.codelibs.fess.ds.IndexUpdateCallback; +import org.codelibs.fess.es.config.exentity.DataConfig; +import org.codelibs.fess.util.ComponentUtil; +import org.codelibs.fess.util.StreamUtil; +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.client.transport.TransportClient; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.InetSocketTransportAddress; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchHits; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class EsDataStoreImpl extends AbstractDataStoreImpl { + private static final String PREFERENCE = "preference"; + + private static final String QUERY = "query"; + + private static final String FIELDS = "fields"; + + private static final String SIZE = "size"; + + private static final String TYPE = "type"; + + private static final String TIMEOUT = "timeout"; + + private static final String SCROLL = "scroll"; + + private static final String INDEX = "index"; + + private static final String HOSTS = "hosts"; + + private static final String SETTINGS_PREFIX = "settings."; + + private static final Logger logger = LoggerFactory.getLogger(EsDataStoreImpl.class); + + @Override + protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map paramMap, + final Map scriptMap, final Map defaultDataMap) { + final String hostsStr = paramMap.get(HOSTS); + if (StringUtil.isBlank(hostsStr)) { + logger.info("hosts is empty."); + return; + } + + final long readInterval = getReadInterval(paramMap); + + final Settings settings = + Settings.settingsBuilder() + .put(paramMap + .entrySet() + .stream() + .filter(e -> e.getKey().startsWith(SETTINGS_PREFIX)) + .collect( + Collectors.toMap(e -> e.getKey().replaceFirst("^settings\\.", StringUtil.EMPTY), e -> e.getValue()))) + .build(); + logger.info("Connecting to " + hostsStr + " with [" + settings.toDelimitedString(',') + "]"); + final InetSocketTransportAddress[] addresses = StreamUtil.of(hostsStr.split(",")).map(h -> { + String[] values = h.trim().split(":"); + try { + if (values.length == 1) { + return new InetSocketTransportAddress(InetAddress.getByName(values[0]), 9300); + } else if (values.length == 2) { + return new InetSocketTransportAddress(InetAddress.getByName(values[0]), Integer.parseInt(values[1])); + } + } catch (Exception e) { + logger.warn("Failed to parse address: " + h, e); + } + return null; + }).filter(v -> v != null).toArray(n -> new InetSocketTransportAddress[n]); + try (Client client = TransportClient.builder().settings(settings).build().addTransportAddresses(addresses)) { + processData(dataConfig, callback, paramMap, scriptMap, defaultDataMap, readInterval, client); + } + } + + protected void processData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map paramMap, + final Map scriptMap, final Map defaultDataMap, final long readInterval, final Client client) { + + final boolean deleteProcessedDoc = paramMap.getOrDefault("delete.processed.doc", Constants.FALSE).equalsIgnoreCase(Constants.TRUE); + final String[] indices; + if (paramMap.containsKey(INDEX)) { + indices = paramMap.get(INDEX).trim().split(","); + } else { + indices = new String[] { "_all" }; + } + final String scroll = paramMap.containsKey(SCROLL) ? paramMap.get(SCROLL).trim() : "1m"; + final String timeout = paramMap.containsKey(TIMEOUT) ? paramMap.get(TIMEOUT).trim() : "1m"; + final SearchRequestBuilder builder = client.prepareSearch(indices); + if (paramMap.containsKey(TYPE)) { + builder.setTypes(paramMap.get(TYPE).trim().split(",")); + } + if (paramMap.containsKey(SIZE)) { + builder.setSize(Integer.parseInt(paramMap.get(SIZE))); + } + if (paramMap.containsKey(FIELDS)) { + builder.addFields(paramMap.get(FIELDS).trim().split(",")); + } + builder.setQuery(paramMap.containsKey(QUERY) ? paramMap.get(QUERY).trim() : "{\"query\":{\"match_all\":{}}}"); + builder.setScroll(scroll); + builder.setPreference(paramMap.containsKey(PREFERENCE) ? paramMap.get(PREFERENCE).trim() : Constants.SEARCH_PREFERENCE_PRIMARY); + try { + SearchResponse response = builder.execute().actionGet(timeout); + + String scrollId = response.getScrollId(); + while (scrollId != null) { + final SearchHits searchHits = response.getHits(); + final SearchHit[] hits = searchHits.getHits(); + if (hits.length == 0) { + scrollId = null; + break; + } + + boolean loop = true; + final BulkRequestBuilder bulkRequest = deleteProcessedDoc ? client.prepareBulk() : null; + for (final SearchHit hit : hits) { + if (!alive || !loop) { + break; + } + + final Map dataMap = new HashMap(); + dataMap.putAll(defaultDataMap); + final Map resultMap = new LinkedHashMap<>(); + resultMap.putAll(paramMap); + resultMap.put("index", hit.getIndex()); + resultMap.put("type", hit.getType()); + resultMap.put("id", hit.getId()); + resultMap.put("version", Long.valueOf(hit.getVersion())); + resultMap.put("hit", hit); + resultMap.put("source", hit.getSource()); + + if (logger.isDebugEnabled()) { + for (final Map.Entry entry : resultMap.entrySet()) { + logger.debug(entry.getKey() + "=" + entry.getValue()); + } + } + + for (final Map.Entry entry : scriptMap.entrySet()) { + final Object convertValue = convertValue(entry.getValue(), resultMap); + if (convertValue != null) { + dataMap.put(entry.getKey(), convertValue); + } + } + + if (logger.isDebugEnabled()) { + for (final Map.Entry entry : dataMap.entrySet()) { + logger.debug(entry.getKey() + "=" + entry.getValue()); + } + } + + try { + loop = callback.store(paramMap, dataMap); + } catch (final CrawlingAccessException e) { + logger.warn("Crawling Access Exception at : " + dataMap, e); + + Throwable target = e; + if (target instanceof MultipleCrawlingAccessException) { + final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses(); + if (causes.length > 0) { + target = causes[causes.length - 1]; + } + } + + String errorName; + final Throwable cause = target.getCause(); + if (cause != null) { + errorName = cause.getClass().getCanonicalName(); + } else { + errorName = target.getClass().getCanonicalName(); + } + + String url; + if (target instanceof DataStoreCrawlingException) { + url = ((DataStoreCrawlingException) target).getUrl(); + } else { + url = hit.getIndex() + "/" + hit.getType() + "/" + hit.getId(); + } + final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class); + failureUrlService.store(dataConfig, errorName, url, target); + } catch (final Exception e) { + final String url = hit.getIndex() + "/" + hit.getType() + "/" + hit.getId(); + final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class); + failureUrlService.store(dataConfig, e.getClass().getCanonicalName(), url, e); + + logger.warn("Crawling Access Exception at : " + dataMap, e); + } + + if (bulkRequest != null) { + bulkRequest.add(client.prepareDelete(hit.getIndex(), hit.getType(), hit.getId())); + } + + if (readInterval > 0) { + sleep(readInterval); + } + } + + if (bulkRequest != null && bulkRequest.numberOfActions() > 0) { + final BulkResponse bulkResponse = bulkRequest.execute().actionGet(timeout); + if (bulkResponse.hasFailures()) { + logger.warn(bulkResponse.buildFailureMessage()); + } + } + + if (!alive) { + break; + } + response = client.prepareSearchScroll(scrollId).setScroll(scroll).execute().actionGet(timeout); + scrollId = response.getScrollId(); + } + } catch (final Exception e) { + throw new DataStoreException("Failed to crawl data when acessing elasticsearch.", e); + } + } + +} diff --git a/src/main/java/org/codelibs/fess/ds/impl/EsListDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/EsListDataStoreImpl.java new file mode 100644 index 000000000..32034ea3f --- /dev/null +++ b/src/main/java/org/codelibs/fess/ds/impl/EsListDataStoreImpl.java @@ -0,0 +1,39 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.ds.impl; + +import java.util.Map; + +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.ds.IndexUpdateCallback; +import org.codelibs.fess.es.config.exentity.DataConfig; +import org.codelibs.fess.util.ComponentUtil; + +public class EsListDataStoreImpl extends EsDataStoreImpl { + + @Override + protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map paramMap, + final Map scriptMap, final Map defaultDataMap) { + + final CrawlerClientFactory crawlerClientFactory = ComponentUtil.getCrawlerClientFactory(); + dataConfig.initializeClientFactory(crawlerClientFactory); + final FileListIndexUpdateCallbackImpl fileListIndexUpdateCallback = + new FileListIndexUpdateCallbackImpl(callback, crawlerClientFactory); + super.storeData(dataConfig, fileListIndexUpdateCallback, paramMap, scriptMap, defaultDataMap); + fileListIndexUpdateCallback.commit(); + } + +} diff --git a/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java index 49717bd33..7d6bacb80 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java @@ -50,6 +50,8 @@ import org.slf4j.LoggerFactory; import com.orangesignal.csv.CsvConfig; +@Deprecated +// replace with CsvListDataStoreImpl public class FileListDataStoreImpl extends CsvDataStoreImpl { private static final Logger logger = LoggerFactory.getLogger(FileListDataStoreImpl.class); diff --git a/src/main/java/org/codelibs/fess/ds/impl/FileListIndexUpdateCallbackImpl.java b/src/main/java/org/codelibs/fess/ds/impl/FileListIndexUpdateCallbackImpl.java new file mode 100644 index 000000000..82cb3f974 --- /dev/null +++ b/src/main/java/org/codelibs/fess/ds/impl/FileListIndexUpdateCallbackImpl.java @@ -0,0 +1,214 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.ds.impl; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.codelibs.core.io.SerializeUtil; +import org.codelibs.fess.Constants; +import org.codelibs.fess.crawler.builder.RequestDataBuilder; +import org.codelibs.fess.crawler.client.CrawlerClient; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.entity.ResultData; +import org.codelibs.fess.crawler.exception.CrawlerSystemException; +import org.codelibs.fess.crawler.processor.ResponseProcessor; +import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor; +import org.codelibs.fess.crawler.rule.Rule; +import org.codelibs.fess.crawler.rule.RuleManager; +import org.codelibs.fess.crawler.transformer.Transformer; +import org.codelibs.fess.ds.DataStoreCrawlingException; +import org.codelibs.fess.ds.IndexUpdateCallback; +import org.codelibs.fess.es.client.FessEsClient; +import org.codelibs.fess.helper.IndexingHelper; +import org.codelibs.fess.mylasta.direction.FessConfig; +import org.codelibs.fess.util.ComponentUtil; +import org.codelibs.fess.util.StreamUtil; +import org.lastaflute.di.core.SingletonLaContainer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback { + private static final Logger logger = LoggerFactory.getLogger(FileListIndexUpdateCallbackImpl.class); + + protected IndexUpdateCallback indexUpdateCallback; + + protected CrawlerClientFactory crawlerClientFactory; + + protected List deleteIdList = new ArrayList(100); + + protected int maxDeleteDocumentCacheSize = 100; + + protected FileListIndexUpdateCallbackImpl(final IndexUpdateCallback indexUpdateCallback, final CrawlerClientFactory crawlerClientFactory) { + this.indexUpdateCallback = indexUpdateCallback; + this.crawlerClientFactory = crawlerClientFactory; + } + + @Override + public boolean store(final Map paramMap, final Map dataMap) { + final Object eventType = dataMap.remove(getParamValue(paramMap, "field.event_type", "event_type")); + + if (getParamValue(paramMap, "event.create", "create").equals(eventType) + || getParamValue(paramMap, "event.modify", "modify").equals(eventType)) { + // updated file + return addDocument(paramMap, dataMap); + } else if (getParamValue(paramMap, "event.delete", "delete").equals(eventType)) { + // deleted file + return deleteDocument(paramMap, dataMap); + } + + logger.warn("unknown event: " + eventType + ", data: " + dataMap); + return false; + } + + protected String getParamValue(Map paramMap, String key, String defaultValue) { + return paramMap.getOrDefault(key, defaultValue); + } + + protected boolean addDocument(final Map paramMap, final Map dataMap) { + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + synchronized (indexUpdateCallback) { + // required check + if (!dataMap.containsKey(fessConfig.getIndexFieldUrl()) || dataMap.get(fessConfig.getIndexFieldUrl()) == null) { + logger.warn("Could not add a doc. Invalid data: " + dataMap); + return false; + } + + final String url = dataMap.get(fessConfig.getIndexFieldUrl()).toString(); + try { + final CrawlerClient client = crawlerClientFactory.getClient(url); + if (client == null) { + logger.warn("CrawlerClient is null. Data: " + dataMap); + return false; + } + + final long startTime = System.currentTimeMillis(); + final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build()); + responseData.setExecutionTime(System.currentTimeMillis() - startTime); + if (dataMap.containsKey(Constants.SESSION_ID)) { + responseData.setSessionId((String) dataMap.get(Constants.SESSION_ID)); + } else { + responseData.setSessionId((String) paramMap.get(Constants.CRAWLING_INFO_ID)); + } + + final RuleManager ruleManager = SingletonLaContainer.getComponent(RuleManager.class); + final Rule rule = ruleManager.getRule(responseData); + if (rule == null) { + logger.warn("No url rule. Data: " + dataMap); + return false; + } else { + responseData.setRuleId(rule.getRuleId()); + final ResponseProcessor responseProcessor = rule.getResponseProcessor(); + if (responseProcessor instanceof DefaultResponseProcessor) { + final Transformer transformer = ((DefaultResponseProcessor) responseProcessor).getTransformer(); + final ResultData resultData = transformer.transform(responseData); + final byte[] data = resultData.getData(); + if (data != null) { + try { + @SuppressWarnings("unchecked") + final Map responseDataMap = (Map) SerializeUtil.fromBinaryToObject(data); + dataMap.putAll(responseDataMap); + } catch (final Exception e) { + throw new CrawlerSystemException("Could not create an instance from bytes.", e); + } + } + + // remove + String[] ignoreFields; + if (paramMap.containsKey("ignore.field.names")) { + ignoreFields = paramMap.get("ignore.field.names").split(","); + } else { + ignoreFields = new String[] { Constants.INDEXING_TARGET, Constants.SESSION_ID }; + } + StreamUtil.of(ignoreFields).map(s -> s.trim()).forEach(s -> dataMap.remove(s)); + + return indexUpdateCallback.store(paramMap, dataMap); + } else { + logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: " + responseProcessor + + ", Data: " + dataMap); + return false; + } + } + } catch (final Exception e) { + throw new DataStoreCrawlingException(url, "Failed to add: " + dataMap, e); + } + } + } + + protected boolean deleteDocument(final Map paramMap, final Map dataMap) { + + if (logger.isDebugEnabled()) { + logger.debug("Deleting " + dataMap); + } + + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + + // required check + if (!dataMap.containsKey(fessConfig.getIndexFieldUrl()) || dataMap.get(fessConfig.getIndexFieldUrl()) == null) { + logger.warn("Could not delete a doc. Invalid data: " + dataMap); + return false; + } + + synchronized (indexUpdateCallback) { + deleteIdList.add(ComponentUtil.getCrawlingInfoHelper().generateId(dataMap)); + + if (deleteIdList.size() >= maxDeleteDocumentCacheSize) { + final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient(); + final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper(); + for (final String id : deleteIdList) { + indexingHelper.deleteDocument(fessEsClient, id); + } + if (logger.isDebugEnabled()) { + logger.debug("Deleted " + deleteIdList); + } + deleteIdList.clear(); + } + + } + return true; + } + + @Override + public void commit() { + if (!deleteIdList.isEmpty()) { + final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient(); + final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper(); + for (final String id : deleteIdList) { + indexingHelper.deleteDocument(fessEsClient, id); + } + if (logger.isDebugEnabled()) { + logger.debug("Deleted " + deleteIdList); + } + } + indexUpdateCallback.commit(); + } + + @Override + public long getDocumentSize() { + return indexUpdateCallback.getDocumentSize(); + } + + @Override + public long getExecuteTime() { + return indexUpdateCallback.getExecuteTime(); + } + + public void setMaxDeleteDocumentCacheSize(int maxDeleteDocumentCacheSize) { + this.maxDeleteDocumentCacheSize = maxDeleteDocumentCacheSize; + } +} diff --git a/src/main/java/org/codelibs/fess/ds/impl/IndexUpdateCallbackImpl.java b/src/main/java/org/codelibs/fess/ds/impl/IndexUpdateCallbackImpl.java index 36bde1b0f..d4e17231a 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/IndexUpdateCallbackImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/IndexUpdateCallbackImpl.java @@ -15,11 +15,11 @@ */ package org.codelibs.fess.ds.impl; -import java.util.ArrayList; -import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; +import javax.annotation.PostConstruct; + import org.codelibs.fess.ds.IndexUpdateCallback; import org.codelibs.fess.es.client.FessEsClient; import org.codelibs.fess.exception.FessSystemException; @@ -29,23 +29,32 @@ import org.codelibs.fess.helper.SearchLogHelper; import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; +import org.codelibs.fess.util.DocList; +import org.codelibs.fess.util.DocumentUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class IndexUpdateCallbackImpl implements IndexUpdateCallback { private static final Logger logger = LoggerFactory.getLogger(IndexUpdateCallbackImpl.class); - protected volatile AtomicLong documentSize = new AtomicLong(0); + protected AtomicLong documentSize = new AtomicLong(0); protected volatile long executeTime = 0; - final List> docList = new ArrayList<>(); + protected final DocList docList = new DocList(); + + protected long maxDocumentRequestSize; + + @PostConstruct + public void init() { + maxDocumentRequestSize = ComponentUtil.getFessConfig().getIndexerWebfsMaxDocumentRequestSizeAsInteger().longValue(); + } /* (non-Javadoc) * @see org.codelibs.fess.ds.impl.IndexUpdateCallback#store(java.util.Map) */ @Override - public synchronized boolean store(final Map paramMap, final Map dataMap) { + public boolean store(final Map paramMap, final Map dataMap) { final long startTime = System.currentTimeMillis(); final FessConfig fessConfig = ComponentUtil.getFessConfig(); final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient(); @@ -79,30 +88,41 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback { dataMap.put(fessConfig.getIndexFieldDocId(), systemHelper.generateDocId(dataMap)); } - docList.add(dataMap); - if (logger.isDebugEnabled()) { - logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + "."); + synchronized (docList) { + docList.add(dataMap); + if (logger.isDebugEnabled()) { + logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + "."); + } + + final Long contentLength = DocumentUtil.getValue(dataMap, fessConfig.getIndexFieldContentLength(), Long.class); + if (contentLength != null) { + docList.addContentSize(contentLength.longValue()); + if (docList.getContentSize() >= maxDocumentRequestSize) { + indexingHelper.sendDocuments(fessEsClient, docList); + } + } else if (docList.size() >= fessConfig.getIndexerDataMaxDocumentCacheSizeAsInteger().intValue()) { + indexingHelper.sendDocuments(fessEsClient, docList); + } + executeTime += System.currentTimeMillis() - startTime; } - if (docList.size() >= fessConfig.getIndexerDataMaxDocumentCacheSizeAsInteger().intValue()) { - indexingHelper.sendDocuments(fessEsClient, docList); - } documentSize.getAndIncrement(); if (logger.isDebugEnabled()) { logger.debug("The number of an added document is " + documentSize.get() + "."); } - executeTime += System.currentTimeMillis() - startTime; return true; } @Override public void commit() { - if (!docList.isEmpty()) { - final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper(); - final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient(); - indexingHelper.sendDocuments(fessEsClient, docList); + synchronized (docList) { + if (!docList.isEmpty()) { + final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper(); + final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient(); + indexingHelper.sendDocuments(fessEsClient, docList); + } } } diff --git a/src/main/java/org/codelibs/fess/es/client/FessEsClient.java b/src/main/java/org/codelibs/fess/es/client/FessEsClient.java index 12de081bd..508fc15be 100644 --- a/src/main/java/org/codelibs/fess/es/client/FessEsClient.java +++ b/src/main/java/org/codelibs/fess/es/client/FessEsClient.java @@ -475,8 +475,8 @@ public class FessEsClient implements Client { final FessConfig fessConfig = ComponentUtil.getFessConfig(); SearchResponse response = client.prepareSearch(index).setTypes(type).setScroll(scrollForDelete).setSize(sizeForDelete) - .addField(fessConfig.getIndexFieldId()).setQuery(queryBuilder).execute() - .actionGet(fessConfig.getIndexScrollSearchTimeoutTimeout()); + .addField(fessConfig.getIndexFieldId()).setQuery(queryBuilder).setPreference(Constants.SEARCH_PREFERENCE_PRIMARY) + .execute().actionGet(fessConfig.getIndexScrollSearchTimeoutTimeout()); int count = 0; String scrollId = response.getScrollId(); diff --git a/src/main/java/org/codelibs/fess/es/config/allcommon/EsAbstractBehavior.java b/src/main/java/org/codelibs/fess/es/config/allcommon/EsAbstractBehavior.java index 20af7b73c..893e7173e 100644 --- a/src/main/java/org/codelibs/fess/es/config/allcommon/EsAbstractBehavior.java +++ b/src/main/java/org/codelibs/fess/es/config/allcommon/EsAbstractBehavior.java @@ -88,7 +88,11 @@ public abstract class EsAbstractBehavior list = new EsPagingResultBean<>(); final SearchHits searchHits = response.getHits(); @@ -200,8 +208,12 @@ public abstract class EsAbstractBehavior handlerScriptMap; - private List labelTypeList; + private volatile List labelTypeList; - private List roleTypeList; + private volatile List roleTypeList; public DataConfig() { super(); diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java b/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java index 1bb0bed5e..74ee91850 100644 --- a/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java +++ b/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java @@ -38,7 +38,6 @@ import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ParameterUtil; import org.dbflute.cbean.result.ListResultBean; -import org.lastaflute.di.core.SingletonLaContainer; /** * @author FreeGen @@ -57,9 +56,9 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig { protected volatile Map> configParameterMap; - private List labelTypeList; + private volatile List labelTypeList; - private List roleTypeList; + private volatile List roleTypeList; public FileConfig() { super(); @@ -251,7 +250,7 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig { @Override public void initializeClientFactory(final CrawlerClientFactory clientFactory) { - final FileAuthenticationService fileAuthenticationService = SingletonLaContainer.getComponent(FileAuthenticationService.class); + final FileAuthenticationService fileAuthenticationService = ComponentUtil.getComponent(FileAuthenticationService.class); // Parameters final Map paramMap = new HashMap(); diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java b/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java index ddb941e08..3066eb187 100644 --- a/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java +++ b/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java @@ -38,7 +38,6 @@ import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ParameterUtil; import org.dbflute.cbean.result.ListResultBean; -import org.lastaflute.di.core.SingletonLaContainer; /** * @author FreeGen @@ -57,9 +56,9 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig { protected volatile Map> configParameterMap; - private List labelTypeList; + private volatile List labelTypeList; - private List roleTypeList; + private volatile List roleTypeList; public WebConfig() { super(); @@ -255,8 +254,8 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig { @Override public void initializeClientFactory(final CrawlerClientFactory clientFactory) { - final WebAuthenticationService webAuthenticationService = SingletonLaContainer.getComponent(WebAuthenticationService.class); - final RequestHeaderService requestHeaderService = SingletonLaContainer.getComponent(RequestHeaderService.class); + final WebAuthenticationService webAuthenticationService = ComponentUtil.getComponent(WebAuthenticationService.class); + final RequestHeaderService requestHeaderService = ComponentUtil.getComponent(RequestHeaderService.class); // HttpClient Parameters final Map paramMap = new HashMap(); diff --git a/src/main/java/org/codelibs/fess/es/log/allcommon/EsAbstractBehavior.java b/src/main/java/org/codelibs/fess/es/log/allcommon/EsAbstractBehavior.java index 10e465ab7..dacf756cb 100644 --- a/src/main/java/org/codelibs/fess/es/log/allcommon/EsAbstractBehavior.java +++ b/src/main/java/org/codelibs/fess/es/log/allcommon/EsAbstractBehavior.java @@ -88,7 +88,11 @@ public abstract class EsAbstractBehavior list = new EsPagingResultBean<>(); final SearchHits searchHits = response.getHits(); @@ -200,8 +208,12 @@ public abstract class EsAbstractBehavior list = new EsPagingResultBean<>(); final SearchHits searchHits = response.getHits(); @@ -200,8 +208,12 @@ public abstract class EsAbstractBehavior { final List> sessionIdList = new ArrayList>(); @@ -187,7 +188,7 @@ public class CrawlingInfoHelper implements Serializable { } private String generateId(final String url, final List roleTypeList) { - final StringBuilder buf = new StringBuilder(1000); + final UnsafeStringBuilder buf = new UnsafeStringBuilder(1000); buf.append(url); if (roleTypeList != null && !roleTypeList.isEmpty()) { Collections.sort(roleTypeList); @@ -200,7 +201,7 @@ public class CrawlingInfoHelper implements Serializable { } } - return normalize(buf.toString()); + return normalize(buf.toUnsafeString().trim()); } private String normalize(final String value) { diff --git a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java index cc182ff90..4dd7358c4 100644 --- a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java +++ b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java @@ -37,7 +37,6 @@ import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -97,7 +96,7 @@ public class DataIndexHelper implements Serializable { final long startTime = System.currentTimeMillis(); - final IndexUpdateCallback indexUpdateCallback = SingletonLaContainer.getComponent(IndexUpdateCallback.class); + final IndexUpdateCallback indexUpdateCallback = ComponentUtil.getComponent(IndexUpdateCallback.class); final List sessionIdList = new ArrayList(); final Map initParamMap = new HashMap(); @@ -252,7 +251,11 @@ public class DataIndexHelper implements Serializable { } final FessConfig fessConfig = ComponentUtil.getFessConfig(); final QueryBuilder queryBuilder = - QueryBuilders.boolQuery().must(QueryBuilders.termQuery(fessConfig.getIndexFieldConfigId(), dataConfig.getConfigId())) + QueryBuilders + .boolQuery() + .must(QueryBuilders.termQuery(fessConfig.getIndexFieldConfigId(), dataConfig.getConfigId())) + .must(QueryBuilders.boolQuery().should(QueryBuilders.rangeQuery(fessConfig.getIndexFieldExpires()).lte("now")) + .should(QueryBuilders.missingQuery(fessConfig.getIndexFieldExpires()))) .mustNot(QueryBuilders.termQuery(fessConfig.getIndexFieldSegment(), sessionId)); try { ComponentUtil.getElasticsearchClient().deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(), diff --git a/src/main/java/org/codelibs/fess/helper/DocumentHelper.java b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java new file mode 100644 index 000000000..6da4f4a3e --- /dev/null +++ b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java @@ -0,0 +1,67 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.helper; + +import java.io.Serializable; +import java.util.Map; + +import org.apache.commons.lang3.StringUtils; +import org.codelibs.core.lang.StringUtil; +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.crawler.util.TextUtil; +import org.codelibs.fess.mylasta.direction.FessConfig; +import org.codelibs.fess.util.ComponentUtil; + +public class DocumentHelper implements Serializable { + + private static final long serialVersionUID = 1L; + + public String getContent(final ResponseData responseData, final String content, final Map dataMap) { + if (content == null) { + return StringUtil.EMPTY; // empty + } + + final int maxAlphanumTermSize = getMaxAlphanumTermSize(); + final int maxSymbolTermSize = getMaxSymbolTermSize(); + return TextUtil.normalizeText(content, content.length(), maxAlphanumTermSize, maxSymbolTermSize); + } + + protected int getMaxAlphanumTermSize() { + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + return fessConfig.getCrawlerDocumentMaxAlphanumTermSizeAsInteger().intValue(); + } + + protected int getMaxSymbolTermSize() { + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + return fessConfig.getCrawlerDocumentMaxSymbolTermSizeAsInteger().intValue(); + } + + public String getDigest(final ResponseData responseData, final String content, final Map dataMap, final int maxWidth) { + if (content == null) { + return StringUtil.EMPTY; // empty + } + + String subContent; + if (content.length() < maxWidth * 2) { + subContent = content; + } else { + subContent = content.substring(0, maxWidth * 2); + } + + final String originalStr = TextUtil.normalizeText(subContent, subContent.length(), -1, -1); + return StringUtils.abbreviate(originalStr, maxWidth); + } +} diff --git a/src/main/java/org/codelibs/fess/helper/DuplicateHostHelper.java b/src/main/java/org/codelibs/fess/helper/DuplicateHostHelper.java index 3c36ef72f..a167b4c4c 100644 --- a/src/main/java/org/codelibs/fess/helper/DuplicateHostHelper.java +++ b/src/main/java/org/codelibs/fess/helper/DuplicateHostHelper.java @@ -23,7 +23,7 @@ import javax.annotation.PostConstruct; import org.codelibs.fess.app.service.DuplicateHostService; import org.codelibs.fess.es.config.exentity.DuplicateHost; -import org.lastaflute.di.core.SingletonLaContainer; +import org.codelibs.fess.util.ComponentUtil; public class DuplicateHostHelper implements Serializable { @@ -36,7 +36,7 @@ public class DuplicateHostHelper implements Serializable { if (duplicateHostList == null) { duplicateHostList = new ArrayList(); } - final DuplicateHostService duplicateHostService = SingletonLaContainer.getComponent(DuplicateHostService.class); + final DuplicateHostService duplicateHostService = ComponentUtil.getComponent(DuplicateHostService.class); duplicateHostList.addAll(duplicateHostService.getDuplicateHostList()); } diff --git a/src/main/java/org/codelibs/fess/helper/IndexingHelper.java b/src/main/java/org/codelibs/fess/helper/IndexingHelper.java index ed3a0d031..218c4c906 100644 --- a/src/main/java/org/codelibs/fess/helper/IndexingHelper.java +++ b/src/main/java/org/codelibs/fess/helper/IndexingHelper.java @@ -22,6 +22,8 @@ import java.util.Map; import org.codelibs.fess.es.client.FessEsClient; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; +import org.codelibs.fess.util.DocList; +import org.codelibs.fess.util.MemoryUtil; import org.elasticsearch.action.count.CountResponse; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -37,7 +39,10 @@ public class IndexingHelper { public long requestInterval = 500; - public void sendDocuments(final FessEsClient fessEsClient, final List> docList) { + public void sendDocuments(final FessEsClient fessEsClient, final DocList docList) { + if (docList.isEmpty()) { + return; + } final long execTime = System.currentTimeMillis(); if (logger.isDebugEnabled()) { logger.debug("Sending " + docList.size() + " documents to a server."); @@ -49,14 +54,21 @@ public class IndexingHelper { fessEsClient.addAll(fessConfig.getIndexDocumentUpdateIndex(), fessConfig.getIndexDocumentType(), docList); } if (logger.isInfoEnabled()) { - logger.info("Sent " + docList.size() + " docs (" + (System.currentTimeMillis() - execTime) + "ms)"); + if (docList.getContentSize() > 0) { + logger.info("Sent " + docList.size() + " docs (Doc:{process " + docList.getProcessingTime() + "ms, send " + + (System.currentTimeMillis() - execTime) + "ms, size " + + MemoryUtil.byteCountToDisplaySize(docList.getContentSize()) + "}, " + MemoryUtil.getMemoryUsageLog() + ")"); + } else { + logger.info("Sent " + docList.size() + " docs (Doc:{send " + (System.currentTimeMillis() - execTime) + "ms}, " + + MemoryUtil.getMemoryUsageLog() + ")"); + } } } finally { docList.clear(); } } - private void deleteOldDocuments(final FessEsClient fessEsClient, final List> docList) { + private void deleteOldDocuments(final FessEsClient fessEsClient, final DocList docList) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); final List docIdList = new ArrayList<>(); diff --git a/src/main/java/org/codelibs/fess/helper/KeyMatchHelper.java b/src/main/java/org/codelibs/fess/helper/KeyMatchHelper.java index 7f472dfe2..c1a6c0533 100644 --- a/src/main/java/org/codelibs/fess/helper/KeyMatchHelper.java +++ b/src/main/java/org/codelibs/fess/helper/KeyMatchHelper.java @@ -24,6 +24,7 @@ import java.util.Map; import javax.annotation.PostConstruct; import org.codelibs.core.misc.Pair; +import org.codelibs.fess.Constants; import org.codelibs.fess.app.service.KeyMatchService; import org.codelibs.fess.es.client.FessEsClient; import org.codelibs.fess.es.client.FessEsClient.SearchConditionBuilder; @@ -37,7 +38,6 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; -import org.lastaflute.di.core.SingletonLaContainer; public class KeyMatchHelper { protected volatile Map> keyMatchQueryMap = Collections.emptyMap(); @@ -55,7 +55,7 @@ public class KeyMatchHelper { protected void reload(final long interval) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); - final KeyMatchService keyMatchService = SingletonLaContainer.getComponent(KeyMatchService.class); + final KeyMatchService keyMatchService = ComponentUtil.getComponent(KeyMatchService.class); final Map> keyMatchQueryMap = new HashMap<>(); keyMatchService .getAvailableKeyMatchList() @@ -93,8 +93,8 @@ public class KeyMatchHelper { fessConfig.getIndexDocumentSearchIndex(), fessConfig.getIndexDocumentType(), searchRequestBuilder -> { - return SearchConditionBuilder.builder(searchRequestBuilder).administrativeAccess(true) - .size(keyMatch.getMaxSize()).query(keyMatch.getQuery()) + return SearchConditionBuilder.builder(searchRequestBuilder.setPreference(Constants.SEARCH_PREFERENCE_PRIMARY)) + .administrativeAccess(true).size(keyMatch.getMaxSize()).query(keyMatch.getQuery()) .responseFields(new String[] { fessConfig.getIndexFieldDocId() }).build(); }); return documentList; diff --git a/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java b/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java index d6c2a1684..f9de37fc5 100644 --- a/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java +++ b/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java @@ -32,7 +32,7 @@ import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.Constants; import org.codelibs.fess.app.service.LabelTypeService; import org.codelibs.fess.es.config.exentity.LabelType; -import org.lastaflute.di.core.SingletonLaContainer; +import org.codelibs.fess.util.ComponentUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,7 +49,7 @@ public class LabelTypeHelper implements Serializable { protected volatile List labelTypePatternList; protected LabelTypeService getLabelTypeService() { - return SingletonLaContainer.getComponent(LabelTypeService.class); + return ComponentUtil.getComponent(LabelTypeService.class); } @PostConstruct @@ -185,7 +185,7 @@ public class LabelTypeHelper implements Serializable { this.value = value; if (StringUtil.isNotBlank(includedPaths)) { - final StringBuilder buf = new StringBuilder(); + final StringBuilder buf = new StringBuilder(100); char split = 0; for (final String path : includedPaths.split("\n")) { if (split == 0) { @@ -199,7 +199,7 @@ public class LabelTypeHelper implements Serializable { } if (StringUtil.isNotBlank(excludedPaths)) { - final StringBuilder buf = new StringBuilder(); + final StringBuilder buf = new StringBuilder(100); char split = 0; for (final String path : excludedPaths.split("\n")) { if (split == 0) { diff --git a/src/main/java/org/codelibs/fess/helper/PathMappingHelper.java b/src/main/java/org/codelibs/fess/helper/PathMappingHelper.java index f7afbe8fe..6ec94a600 100644 --- a/src/main/java/org/codelibs/fess/helper/PathMappingHelper.java +++ b/src/main/java/org/codelibs/fess/helper/PathMappingHelper.java @@ -28,7 +28,6 @@ import org.codelibs.fess.Constants; import org.codelibs.fess.es.config.exbhv.PathMappingBhv; import org.codelibs.fess.es.config.exentity.PathMapping; import org.codelibs.fess.util.ComponentUtil; -import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,7 +48,7 @@ public class PathMappingHelper implements Serializable { ptList.add(Constants.PROCESS_TYPE_BOTH); try { - final PathMappingBhv pathMappingBhv = SingletonLaContainer.getComponent(PathMappingBhv.class); + final PathMappingBhv pathMappingBhv = ComponentUtil.getComponent(PathMappingBhv.class); cachedPathMappingList = pathMappingBhv.selectList(cb -> { cb.query().addOrderBy_SortOrder_Asc(); cb.query().setProcessType_InScope(ptList); diff --git a/src/main/java/org/codelibs/fess/helper/ProcessHelper.java b/src/main/java/org/codelibs/fess/helper/ProcessHelper.java index cc6ddac47..01f54c19b 100644 --- a/src/main/java/org/codelibs/fess/helper/ProcessHelper.java +++ b/src/main/java/org/codelibs/fess/helper/ProcessHelper.java @@ -40,7 +40,14 @@ public class ProcessHelper { @PreDestroy public void destroy() { for (final String sessionId : runningProcessMap.keySet()) { - destroyProcess(sessionId); + if (logger.isInfoEnabled()) { + logger.info("Stopping process " + sessionId); + } + if (destroyProcess(sessionId)) { + if (logger.isInfoEnabled()) { + logger.info("Stopped process " + sessionId); + } + } } } @@ -58,16 +65,16 @@ public class ProcessHelper { } } - public void destroyProcess(final String sessionId) { + public boolean destroyProcess(final String sessionId) { final JobProcess jobProcess = runningProcessMap.remove(sessionId); - destroyProcess(jobProcess); + return destroyProcess(jobProcess); } public boolean isProcessRunning() { return !runningProcessMap.isEmpty(); } - protected void destroyProcess(final JobProcess jobProcess) { + protected boolean destroyProcess(final JobProcess jobProcess) { if (jobProcess != null) { final InputStreamThread ist = jobProcess.getInputStreamThread(); try { @@ -104,10 +111,12 @@ public class ProcessHelper { } try { process.destroy(); + return true; } catch (final Exception e) { logger.error("Could not destroy a process correctly.", e); } } + return false; } public Set getRunningSessionIdSet() { diff --git a/src/main/java/org/codelibs/fess/helper/QueryHelper.java b/src/main/java/org/codelibs/fess/helper/QueryHelper.java index 05f556e16..d2013a52e 100644 --- a/src/main/java/org/codelibs/fess/helper/QueryHelper.java +++ b/src/main/java/org/codelibs/fess/helper/QueryHelper.java @@ -53,6 +53,7 @@ import org.codelibs.fess.entity.GeoInfo; import org.codelibs.fess.entity.QueryContext; import org.codelibs.fess.exception.InvalidQueryException; import org.codelibs.fess.mylasta.direction.FessConfig; +import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.StreamUtil; import org.dbflute.optional.OptionalThing; import org.elasticsearch.common.unit.Fuzziness; @@ -92,9 +93,6 @@ public class QueryHelper implements Serializable { @Resource protected KeyMatchHelper keyMatchHelper; - @Resource - protected QueryParser queryParser; - protected Set apiResponseFieldSet; protected Set highlightFieldSet = new HashSet<>(); @@ -243,7 +241,7 @@ public class QueryHelper implements Serializable { public void buildBaseQuery(final QueryContext queryContext, final Consumer context) { try { - final Query query = queryParser.parse(queryContext.getQueryString()); + final Query query = getQueryParser().parse(queryContext.getQueryString()); final QueryBuilder queryBuilder = convertQuery(queryContext, query); if (queryBuilder != null) { queryContext.setQueryBuilder(queryBuilder); @@ -258,6 +256,10 @@ public class QueryHelper implements Serializable { } } + protected QueryParser getQueryParser() { + return ComponentUtil.getQueryParser(); + } + protected QueryBuilder convertQuery(final QueryContext context, final Query query) { if (query instanceof TermQuery) { return convertTermQuery(context, (TermQuery) query); diff --git a/src/main/java/org/codelibs/fess/helper/SearchLogHelper.java b/src/main/java/org/codelibs/fess/helper/SearchLogHelper.java index 409ab6daf..030c7f60c 100644 --- a/src/main/java/org/codelibs/fess/helper/SearchLogHelper.java +++ b/src/main/java/org/codelibs/fess/helper/SearchLogHelper.java @@ -48,9 +48,9 @@ import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.DocumentUtil; import org.codelibs.fess.util.QueryResponseList; import org.codelibs.fess.util.StreamUtil; +import org.dbflute.optional.OptionalThing; import org.elasticsearch.action.update.UpdateRequest; import org.elasticsearch.script.Script; -import org.lastaflute.di.core.SingletonLaContainer; import org.lastaflute.web.util.LaRequestUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -218,7 +218,7 @@ public class SearchLogHelper { if (!userInfoMap.isEmpty()) { final List insertList = new ArrayList<>(userInfoMap.values()); final List updateList = new ArrayList<>(); - final UserInfoBhv userInfoBhv = SingletonLaContainer.getComponent(UserInfoBhv.class); + final UserInfoBhv userInfoBhv = ComponentUtil.getComponent(UserInfoBhv.class); userInfoBhv.selectList(cb -> { cb.query().setId_InScope(userInfoMap.keySet()); cb.fetchFirst(userInfoMap.size()); @@ -267,7 +267,7 @@ public class SearchLogHelper { final List clickLogList = new ArrayList<>(); for (final ClickLog clickLog : queue) { try { - final SearchLogBhv searchLogBhv = SingletonLaContainer.getComponent(SearchLogBhv.class); + final SearchLogBhv searchLogBhv = ComponentUtil.getComponent(SearchLogBhv.class); searchLogBhv.selectEntity(cb -> { cb.query().setQueryId_Equal(clickLog.getQueryId()); }).ifPresent(entity -> { @@ -289,7 +289,7 @@ public class SearchLogHelper { } if (!clickLogList.isEmpty()) { try { - final ClickLogBhv clickLogBhv = SingletonLaContainer.getComponent(ClickLogBhv.class); + final ClickLogBhv clickLogBhv = ComponentUtil.getComponent(ClickLogBhv.class); clickLogBhv.batchInsert(clickLogList); } catch (final Exception e) { logger.warn("Failed to insert: " + clickLogList, e); @@ -302,7 +302,7 @@ public class SearchLogHelper { searchService.bulkUpdate(builder -> { final FessConfig fessConfig = ComponentUtil.getFessConfig(); searchService.getDocumentListByDocIds(clickCountMap.keySet().toArray(new String[clickCountMap.size()]), - new String[] { fessConfig.getIndexFieldDocId() }).forEach( + new String[] { fessConfig.getIndexFieldDocId() }, OptionalThing.of(FessUserBean.empty())).forEach( doc -> { final String id = DocumentUtil.getValue(doc, fessConfig.getIndexFieldId(), String.class); final String docId = DocumentUtil.getValue(doc, fessConfig.getIndexFieldDocId(), String.class); diff --git a/src/main/java/org/codelibs/fess/helper/SuggestHelper.java b/src/main/java/org/codelibs/fess/helper/SuggestHelper.java index 7b287a8dc..1cae32435 100644 --- a/src/main/java/org/codelibs/fess/helper/SuggestHelper.java +++ b/src/main/java/org/codelibs/fess/helper/SuggestHelper.java @@ -110,7 +110,7 @@ public class SuggestHelper { return; } - final StringBuilder sb = new StringBuilder(); + final StringBuilder sb = new StringBuilder(100); final List fields = new ArrayList<>(); final List tags = new ArrayList<>(); final List roles = new ArrayList<>(); diff --git a/src/main/java/org/codelibs/fess/helper/SystemHelper.java b/src/main/java/org/codelibs/fess/helper/SystemHelper.java index ab67b85a6..917eae02d 100644 --- a/src/main/java/org/codelibs/fess/helper/SystemHelper.java +++ b/src/main/java/org/codelibs/fess/helper/SystemHelper.java @@ -152,7 +152,7 @@ public class SystemHelper implements Serializable { } try { - final StringBuilder buf = new StringBuilder(); + final StringBuilder buf = new StringBuilder(path.length() + 100); for (int i = 0; i < path.length(); i++) { final char c = path.charAt(i); if (CharUtil.isUrlChar(c) || c == '^' || c == '{' || c == '}' || c == '|' || c == '\\') { diff --git a/src/main/java/org/codelibs/fess/helper/ViewHelper.java b/src/main/java/org/codelibs/fess/helper/ViewHelper.java index d23b850d3..35aa98e1f 100644 --- a/src/main/java/org/codelibs/fess/helper/ViewHelper.java +++ b/src/main/java/org/codelibs/fess/helper/ViewHelper.java @@ -62,7 +62,6 @@ import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.DocumentUtil; import org.codelibs.fess.util.ResourceUtil; -import org.lastaflute.di.core.SingletonLaContainer; import org.lastaflute.taglib.function.LaFunctions; import org.lastaflute.web.response.StreamResponse; import org.lastaflute.web.util.LaRequestUtil; @@ -79,6 +78,10 @@ import com.ibm.icu.text.SimpleDateFormat; public class ViewHelper implements Serializable { + private static final Pattern LOCAL_PATH_PATTERN = Pattern.compile("^file:/+[a-zA-Z]:"); + + private static final Pattern SHARED_FOLDER_PATTERN = Pattern.compile("^file:/+[^/]\\."); + private static final long serialVersionUID = 1L; private static final Logger logger = LoggerFactory.getLogger(ViewHelper.class); @@ -478,7 +481,16 @@ public class ViewHelper implements Serializable { public Object getSitePath(final Map docMap) { final Object urlLink = docMap.get("urlLink"); if (urlLink != null) { - return StringUtils.abbreviate(urlLink.toString().replaceFirst("^[a-zA-Z0-9]*:/?/*", ""), sitePathLength); + final String returnUrl; + final String url = urlLink.toString(); + if (LOCAL_PATH_PATTERN.matcher(url).find() || SHARED_FOLDER_PATTERN.matcher(url).find()) { + returnUrl = url.replaceFirst("^file:/+", ""); + } else if (url.startsWith("file:")) { + returnUrl = url.replaceFirst("^file:/+", "/"); + } else { + returnUrl = url.replaceFirst("^[a-zA-Z0-9]*:/+", ""); + } + return StringUtils.abbreviate(returnUrl, sitePathLength); } return null; } @@ -502,20 +514,20 @@ public class ViewHelper implements Serializable { logger.debug("configType: " + configType + ", configId: " + configId); } if (ConfigType.WEB == configType) { - final WebConfigService webConfigService = SingletonLaContainer.getComponent(WebConfigService.class); + final WebConfigService webConfigService = ComponentUtil.getComponent(WebConfigService.class); config = webConfigService.getWebConfig(crawlingConfigHelper.getId(configId)).get(); } else if (ConfigType.FILE == configType) { - final FileConfigService fileConfigService = SingletonLaContainer.getComponent(FileConfigService.class); + final FileConfigService fileConfigService = ComponentUtil.getComponent(FileConfigService.class); config = fileConfigService.getFileConfig(crawlingConfigHelper.getId(configId)).get(); } else if (ConfigType.DATA == configType) { - final DataConfigService dataConfigService = SingletonLaContainer.getComponent(DataConfigService.class); + final DataConfigService dataConfigService = ComponentUtil.getComponent(DataConfigService.class); config = dataConfigService.getDataConfig(crawlingConfigHelper.getId(configId)).get(); } if (config == null) { throw new FessSystemException("No crawlingConfig: " + configId); } final String url = DocumentUtil.getValue(doc, fessConfig.getIndexFieldUrl(), String.class); - final CrawlerClientFactory crawlerClientFactory = SingletonLaContainer.getComponent(CrawlerClientFactory.class); + final CrawlerClientFactory crawlerClientFactory = ComponentUtil.getComponent(CrawlerClientFactory.class); config.initializeClientFactory(crawlerClientFactory); final CrawlerClient client = crawlerClientFactory.getClient(url); if (client == null) { diff --git a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java index c0dccdc2c..86a467aee 100644 --- a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java +++ b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java @@ -19,6 +19,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; import javax.annotation.Resource; @@ -32,16 +33,17 @@ import org.codelibs.fess.app.service.FileConfigService; import org.codelibs.fess.app.service.WebConfigService; import org.codelibs.fess.crawler.Crawler; import org.codelibs.fess.crawler.CrawlerContext; +import org.codelibs.fess.crawler.CrawlerStatus; import org.codelibs.fess.crawler.interval.FessIntervalController; import org.codelibs.fess.crawler.service.impl.EsDataService; import org.codelibs.fess.crawler.service.impl.EsUrlFilterService; import org.codelibs.fess.crawler.service.impl.EsUrlQueueService; +import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName; import org.codelibs.fess.es.config.exentity.FileConfig; import org.codelibs.fess.es.config.exentity.WebConfig; import org.codelibs.fess.indexer.IndexUpdater; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; -import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -72,7 +74,7 @@ public class WebFsIndexHelper implements Serializable { @Resource protected CrawlingConfigHelper crawlingConfigHelper; - public long maxAccessCount = 100000; + public long maxAccessCount = Long.MAX_VALUE; public long crawlingExecutionInterval = Constants.DEFAULT_CRAWLING_EXECUTION_INTERVAL; @@ -141,7 +143,7 @@ public class WebFsIndexHelper implements Serializable { final String sid = crawlingConfigHelper.store(sessionId, webConfig); // create crawler - final Crawler crawler = SingletonLaContainer.getComponent(Crawler.class); + final Crawler crawler = ComponentUtil.getComponent(Crawler.class); crawler.setSessionId(sid); sessionIdList.add(sid); @@ -174,6 +176,18 @@ public class WebFsIndexHelper implements Serializable { crawlerContext.setMaxAccessCount(maxCount); webConfig.initializeClientFactory(crawler.getClientFactory()); + final Map configParamMap = webConfig.getConfigParameterMap(ConfigName.CONFIG); + + if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) { + deleteCrawlData(sid); + } else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) { + final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class); + try { + urlFilterService.delete(sid); + } catch (Exception e) { + logger.warn("Failed to delete url filters for " + sid); + } + } // set urls final String[] urls = urlsStr.split("[\r\n]"); @@ -218,13 +232,15 @@ public class WebFsIndexHelper implements Serializable { } // failure url - final List excludedUrlList = failureUrlService.getExcludedUrlList(webConfig.getConfigId()); - for (final String u : excludedUrlList) { - if (StringUtil.isNotBlank(u)) { - final String urlValue = u.trim(); - crawler.addExcludeFilter(urlValue); - if (logger.isInfoEnabled()) { - logger.info("Excluded URL from failures: " + urlValue); + if (!Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_IGNORE_FAILURE_URLS))) { + final List excludedUrlList = failureUrlService.getExcludedUrlList(webConfig.getConfigId()); + for (final String u : excludedUrlList) { + if (StringUtil.isNotBlank(u)) { + final String urlValue = u.trim(); + crawler.addExcludeFilter(urlValue); + if (logger.isInfoEnabled()) { + logger.info("Excluded URL from failures: " + urlValue); + } } } } @@ -245,7 +261,7 @@ public class WebFsIndexHelper implements Serializable { final String sid = crawlingConfigHelper.store(sessionId, fileConfig); // create crawler - final Crawler crawler = SingletonLaContainer.getComponent(Crawler.class); + final Crawler crawler = ComponentUtil.getComponent(Crawler.class); crawler.setSessionId(sid); sessionIdList.add(sid); @@ -277,6 +293,18 @@ public class WebFsIndexHelper implements Serializable { crawlerContext.setMaxAccessCount(maxCount); fileConfig.initializeClientFactory(crawler.getClientFactory()); + final Map configParamMap = fileConfig.getConfigParameterMap(ConfigName.CONFIG); + + if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) { + deleteCrawlData(sid); + } else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) { + final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class); + try { + urlFilterService.delete(sid); + } catch (Exception e) { + logger.warn("Failed to delete url filters for " + sid); + } + } // set paths final String[] paths = pathsStr.split("[\r\n]"); @@ -348,14 +376,16 @@ public class WebFsIndexHelper implements Serializable { } // failure url - final List excludedUrlList = failureUrlService.getExcludedUrlList(fileConfig.getConfigId()); - if (excludedUrlList != null) { - for (final String u : excludedUrlList) { - if (StringUtil.isNotBlank(u)) { - final String urlValue = u.trim(); - crawler.addExcludeFilter(urlValue); - if (logger.isInfoEnabled()) { - logger.info("Excluded Path from failures: " + urlValue); + if (!Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_IGNORE_FAILURE_URLS))) { + final List excludedUrlList = failureUrlService.getExcludedUrlList(fileConfig.getConfigId()); + if (excludedUrlList != null) { + for (final String u : excludedUrlList) { + if (StringUtil.isNotBlank(u)) { + final String urlValue = u.trim(); + crawler.addExcludeFilter(urlValue); + if (logger.isInfoEnabled()) { + logger.info("Excluded Path from failures: " + urlValue); + } } } } @@ -411,7 +441,8 @@ public class WebFsIndexHelper implements Serializable { // check status for (int i = 0; i < startedCrawlerNum; i++) { - if (!crawlerList.get(i).getCrawlerContext().isRunning() && crawlerStatusList.get(i).equals(Constants.RUNNING)) { + if (crawlerList.get(i).getCrawlerContext().getStatus() == CrawlerStatus.DONE + && crawlerStatusList.get(i).equals(Constants.RUNNING)) { crawlerList.get(i).awaitTermination(); crawlerStatusList.set(i, Constants.DONE); final String sid = crawlerList.get(i).getCrawlerContext().getSessionId(); @@ -431,7 +462,8 @@ public class WebFsIndexHelper implements Serializable { finishedAll = true; for (int i = 0; i < crawlerList.size(); i++) { crawlerList.get(i).awaitTermination(crawlingExecutionInterval); - if (!crawlerList.get(i).getCrawlerContext().isRunning() && !crawlerStatusList.get(i).equals(Constants.DONE)) { + if (crawlerList.get(i).getCrawlerContext().getStatus() == CrawlerStatus.DONE + && !crawlerStatusList.get(i).equals(Constants.DONE)) { crawlerStatusList.set(i, Constants.DONE); final String sid = crawlerList.get(i).getCrawlerContext().getSessionId(); indexUpdater.addFinishedSessionId(sid); @@ -463,34 +495,42 @@ public class WebFsIndexHelper implements Serializable { crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_INDEX_EXEC_TIME, Long.toString(indexUpdater.getExecuteTime())); crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_INDEX_SIZE, Long.toString(indexUpdater.getDocumentSize())); - final EsUrlFilterService urlFilterService = SingletonLaContainer.getComponent(EsUrlFilterService.class); - final EsUrlQueueService urlQueueService = SingletonLaContainer.getComponent(EsUrlQueueService.class); - final EsDataService dataService = SingletonLaContainer.getComponent(EsDataService.class); + if (systemHelper.isForceStop()) { + return; + } + for (final String sid : sessionIdList) { // remove config crawlingConfigHelper.remove(sid); + deleteCrawlData(sid); + } + } - try { - // clear url filter - urlFilterService.delete(sid); - } catch (Exception e) { - logger.warn("Failed to delete UrlFilter for " + sid, e); - } + protected void deleteCrawlData(final String sid) { + final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class); + final EsUrlQueueService urlQueueService = ComponentUtil.getComponent(EsUrlQueueService.class); + final EsDataService dataService = ComponentUtil.getComponent(EsDataService.class); - try { - // clear queue - urlQueueService.clearCache(); - urlQueueService.delete(sid); - } catch (Exception e) { - logger.warn("Failed to delete UrlQueue for " + sid, e); - } + try { + // clear url filter + urlFilterService.delete(sid); + } catch (Exception e) { + logger.warn("Failed to delete UrlFilter for " + sid, e); + } - try { - // clear - dataService.delete(sid); - } catch (Exception e) { - logger.warn("Failed to delete AccessResult for " + sid, e); - } + try { + // clear queue + urlQueueService.clearCache(); + urlQueueService.delete(sid); + } catch (Exception e) { + logger.warn("Failed to delete UrlQueue for " + sid, e); + } + + try { + // clear + dataService.delete(sid); + } catch (Exception e) { + logger.warn("Failed to delete AccessResult for " + sid, e); } } diff --git a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java index 9a22aa43a..6c2481743 100644 --- a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java +++ b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java @@ -21,6 +21,7 @@ import java.util.List; import java.util.Map; import java.util.function.Consumer; +import javax.annotation.PreDestroy; import javax.annotation.Resource; import org.codelibs.core.lang.StringUtil; @@ -46,11 +47,12 @@ import org.codelibs.fess.helper.SearchLogHelper; import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; +import org.codelibs.fess.util.DocList; +import org.codelibs.fess.util.MemoryUtil; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.sort.SortOrder; -import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -105,6 +107,16 @@ public class IndexUpdater extends Thread { // nothing } + @PreDestroy + public void destroy() { + if (!finishCrawling) { + if (logger.isInfoEnabled()) { + logger.info("Stopping all crawler."); + } + forceStop(); + } + } + public void addFinishedSessionId(final String sessionId) { synchronized (finishedSessionIdList) { finishedSessionIdList.add(sessionId); @@ -144,7 +156,7 @@ public class IndexUpdater extends Thread { final FessConfig fessConfig = ComponentUtil.getFessConfig(); final long updateInterval = fessConfig.getIndexerWebfsUpdateIntervalAsInteger().longValue(); - final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListConuntAsInteger().intValue(); + final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListCountAsInteger().intValue(); final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper(); try { final Consumer cb = @@ -162,12 +174,13 @@ public class IndexUpdater extends Thread { builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC); }; - final List> docList = new ArrayList<>(); + final DocList docList = new DocList(); final List accessResultList = new ArrayList<>(); long updateTime = System.currentTimeMillis(); int errorCount = 0; int emptyListCount = 0; + long cleanupTime = -1; while (!finishCrawling || !accessResultList.isEmpty()) { try { final int sessionIdListSize = finishedSessionIdList.size(); @@ -196,7 +209,7 @@ public class IndexUpdater extends Thread { updateTime = System.currentTimeMillis(); - List arList = getAccessResultList(cb); + List arList = getAccessResultList(cb, cleanupTime); if (arList.isEmpty()) { emptyListCount++; } else { @@ -204,8 +217,8 @@ public class IndexUpdater extends Thread { } while (!arList.isEmpty()) { processAccessResults(docList, accessResultList, arList); - cleanupAccessResults(accessResultList); - arList = getAccessResultList(cb); + cleanupTime = cleanupAccessResults(accessResultList); + arList = getAccessResultList(cb, cleanupTime); } if (!docList.isEmpty()) { indexingHelper.sendDocuments(fessEsClient, docList); @@ -251,13 +264,23 @@ public class IndexUpdater extends Thread { } } + + if (!ComponentUtil.available()) { + logger.info("IndexUpdater is terminated."); + forceStop(); + break; + } } if (logger.isDebugEnabled()) { logger.debug("Finished indexUpdater."); } } catch (final Throwable t) { - logger.error("IndexUpdater is terminated.", t); + if (ComponentUtil.available()) { + logger.error("IndexUpdater is terminated.", t); + } else if (logger.isInfoEnabled()) { + logger.info("IndexUpdater is terminated."); + } forceStop(); } finally { intervalControlHelper.setCrawlerRunning(true); @@ -279,10 +302,9 @@ public class IndexUpdater extends Thread { } } - private void processAccessResults(final List> docList, final List accessResultList, - final List arList) { + private void processAccessResults(final DocList docList, final List accessResultList, final List arList) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); - final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue(); + final long maxDocumentRequestSize = fessConfig.getIndexerWebfsMaxDocumentRequestSizeAsInteger().longValue(); for (final EsAccessResult accessResult : arList) { if (logger.isDebugEnabled()) { logger.debug("Indexing " + accessResult.getUrl()); @@ -298,11 +320,12 @@ public class IndexUpdater extends Thread { continue; } + final long startTime = System.currentTimeMillis(); final AccessResultData accessResultData = accessResult.getAccessResultData(); if (accessResultData != null) { accessResult.setAccessResultData(null); try { - final Transformer transformer = SingletonLaContainer.getComponent(accessResultData.getTransformerName()); + final Transformer transformer = ComponentUtil.getComponent(accessResultData.getTransformerName()); if (transformer == null) { // no transformer logger.warn("No transformer: " + accessResultData.getTransformerName()); @@ -328,12 +351,20 @@ public class IndexUpdater extends Thread { updateDocument(map); docList.add(map); + final long processingTime = System.currentTimeMillis() - startTime; + docList.addProcessingTime(processingTime); if (logger.isDebugEnabled()) { - logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + "."); + logger.debug("Added the document(" + MemoryUtil.byteCountToDisplaySize(docList.getContentSize()) + ", " + + processingTime + "ms). " + "The number of a document cache is " + docList.size() + "."); } - if (docList.size() >= maxDocumentCacheSize) { + if (accessResult.getContentLength() == null) { indexingHelper.sendDocuments(fessEsClient, docList); + } else { + docList.addContentSize(accessResult.getContentLength().longValue()); + if (docList.getContentSize() >= maxDocumentRequestSize) { + indexingHelper.sendDocuments(fessEsClient, docList); + } } documentSize++; if (logger.isDebugEnabled()) { @@ -422,21 +453,22 @@ public class IndexUpdater extends Thread { } } - private void cleanupAccessResults(final List accessResultList) { + private long cleanupAccessResults(final List accessResultList) { if (!accessResultList.isEmpty()) { final long execTime = System.currentTimeMillis(); final int size = accessResultList.size(); dataService.update(accessResultList); accessResultList.clear(); + final long time = System.currentTimeMillis() - execTime; if (logger.isDebugEnabled()) { - logger.debug("Updated " + size + " access results. The execution time is " + (System.currentTimeMillis() - execTime) - + "ms."); + logger.debug("Updated " + size + " access results. The execution time is " + time + "ms."); } + return time; } - + return -1; } - private List getAccessResultList(final Consumer cb) { + private List getAccessResultList(final Consumer cb, final long cleanupTime) { if (logger.isDebugEnabled()) { logger.debug("Getting documents in IndexUpdater queue."); } @@ -453,15 +485,29 @@ public class IndexUpdater extends Thread { } final long totalHits = ((EsResultList) arList).getTotalHits(); if (logger.isInfoEnabled()) { - logger.info("Processing " + arList.size() + "/" + totalHits + " docs (" + (System.currentTimeMillis() - execTime) + "ms)"); + final StringBuilder buf = new StringBuilder(100); + buf.append("Processing "); + if (totalHits > 0) { + buf.append(arList.size()).append('/').append(totalHits).append(" docs (Doc:{access "); + } else { + buf.append("no docs (Doc:{access "); + } + buf.append(System.currentTimeMillis() - execTime).append("ms"); + if (cleanupTime >= 0) { + buf.append(", cleanup ").append(cleanupTime).append("ms"); + } + buf.append("}, "); + buf.append(MemoryUtil.getMemoryUsageLog()); + buf.append(')'); + logger.info(buf.toString()); } final long unprocessedDocumentSize = fessConfig.getIndexerUnprocessedDocumentSizeAsInteger().longValue(); - if (totalHits > unprocessedDocumentSize) { + final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper(); + if (totalHits > unprocessedDocumentSize && intervalControlHelper.isCrawlerRunning()) { if (logger.isInfoEnabled()) { logger.info("Stopped all crawler threads. " + " You have " + totalHits + " (>" + unprocessedDocumentSize + ") " + " unprocessed docs."); } - final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper(); intervalControlHelper.setCrawlerRunning(false); } return arList; diff --git a/src/main/java/org/codelibs/fess/job/CrawlJob.java b/src/main/java/org/codelibs/fess/job/CrawlJob.java index 7b5329395..556cb10dc 100644 --- a/src/main/java/org/codelibs/fess/job/CrawlJob.java +++ b/src/main/java/org/codelibs/fess/job/CrawlJob.java @@ -39,7 +39,6 @@ import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.InputStreamThread; import org.codelibs.fess.util.JobProcess; import org.codelibs.fess.util.StreamUtil; -import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -72,6 +71,10 @@ public class CrawlJob { protected boolean useLocalElasticsearch = true; + protected String jvmOptions; + + protected String lastaEnv; + public CrawlJob jobExecutor(final JobExecutor jobExecutor) { this.jobExecutor = jobExecutor; return this; @@ -133,6 +136,20 @@ public class CrawlJob { return this; } + public CrawlJob remoteDebug() { + return jvmOptions("-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=127.0.0.1:8000"); + } + + public CrawlJob jvmOptions(String option) { + this.jvmOptions = option; + return this; + } + + public CrawlJob lastaEnv(String env) { + this.lastaEnv = env; + return this; + } + public String execute(final JobExecutor jobExecutor) { jobExecutor(jobExecutor); return execute(); @@ -161,7 +178,7 @@ public class CrawlJob { } public String execute() { - final StringBuilder resultBuf = new StringBuilder(); + final StringBuilder resultBuf = new StringBuilder(100); final boolean runAll = webConfigIds == null && fileConfigIds == null && dataConfigIds == null; if (sessionId == null) { // create session id @@ -229,7 +246,7 @@ public class CrawlJob { protected void executeCrawler() { final List cmdList = new ArrayList(); final String cpSeparator = SystemUtils.IS_OS_WINDOWS ? ";" : ":"; - final ServletContext servletContext = SingletonLaContainer.getComponent(ServletContext.class); + final ServletContext servletContext = ComponentUtil.getComponent(ServletContext.class); final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); final ProcessHelper processHelper = ComponentUtil.getJobHelper(); final FessConfig fessConfig = ComponentUtil.getFessConfig(); @@ -238,7 +255,7 @@ public class CrawlJob { // -cp cmdList.add("-cp"); - final StringBuilder buf = new StringBuilder(); + final StringBuilder buf = new StringBuilder(100); final String confPath = System.getProperty(Constants.FESS_CONF_PATH); if (StringUtil.isNotBlank(confPath)) { buf.append(confPath); @@ -285,13 +302,15 @@ public class CrawlJob { } } - final String lastaEnv = System.getProperty("lasta.env"); - if (StringUtil.isNotBlank(lastaEnv)) { - if (lastaEnv.equals("web")) { + final String systemLastaEnv = System.getProperty("lasta.env"); + if (StringUtil.isNotBlank(systemLastaEnv)) { + if (systemLastaEnv.equals("web")) { cmdList.add("-Dlasta.env=crawler"); } else { - cmdList.add("-Dlasta.env=" + lastaEnv); + cmdList.add("-Dlasta.env=" + systemLastaEnv); } + } else if (StringUtil.isNotBlank(lastaEnv)) { + cmdList.add("-Dlasta.env=" + lastaEnv); } cmdList.add("-Dfess.crawler.process=true"); @@ -316,6 +335,10 @@ public class CrawlJob { } } + if (StringUtil.isNotBlank(jvmOptions)) { + StreamUtil.of(jvmOptions.split(" ")).filter(s -> StringUtil.isNotBlank(s)).forEach(s -> cmdList.add(s)); + } + cmdList.add(Crawler.class.getCanonicalName()); cmdList.add("--sessionId"); diff --git a/src/main/java/org/codelibs/fess/job/SuggestJob.java b/src/main/java/org/codelibs/fess/job/SuggestJob.java index 07b2331c7..2baaab227 100644 --- a/src/main/java/org/codelibs/fess/job/SuggestJob.java +++ b/src/main/java/org/codelibs/fess/job/SuggestJob.java @@ -37,7 +37,6 @@ import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.InputStreamThread; import org.codelibs.fess.util.JobProcess; import org.codelibs.fess.util.StreamUtil; -import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -108,10 +107,9 @@ public class SuggestJob { try { executeSuggestCreator(); - } catch (final FessSystemException e) { - throw e; } catch (final Exception e) { - throw new FessSystemException("Failed to execute a crawl job.", e); + logger.error("Failed to purge user info.", e); + resultBuf.append(e.getMessage()).append("\n"); } return resultBuf.toString(); @@ -121,7 +119,7 @@ public class SuggestJob { protected void executeSuggestCreator() { final List cmdList = new ArrayList<>(); final String cpSeparator = SystemUtils.IS_OS_WINDOWS ? ";" : ":"; - final ServletContext servletContext = SingletonLaContainer.getComponent(ServletContext.class); + final ServletContext servletContext = ComponentUtil.getComponent(ServletContext.class); final ProcessHelper processHelper = ComponentUtil.getJobHelper(); final FessConfig fessConfig = ComponentUtil.getFessConfig(); @@ -129,7 +127,7 @@ public class SuggestJob { // -cp cmdList.add("-cp"); - final StringBuilder buf = new StringBuilder(); + final StringBuilder buf = new StringBuilder(100); final String confPath = System.getProperty(Constants.FESS_CONF_PATH); if (StringUtil.isNotBlank(confPath)) { buf.append(confPath); diff --git a/src/main/java/org/codelibs/fess/mylasta/action/FessUserBean.java b/src/main/java/org/codelibs/fess/mylasta/action/FessUserBean.java index 1f8a2a799..383233cb0 100644 --- a/src/main/java/org/codelibs/fess/mylasta/action/FessUserBean.java +++ b/src/main/java/org/codelibs/fess/mylasta/action/FessUserBean.java @@ -15,6 +15,7 @@ */ package org.codelibs.fess.mylasta.action; +import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.entity.FessUser; import org.codelibs.fess.util.StreamUtil; import org.lastaflute.web.login.TypicalUserBean; @@ -76,4 +77,25 @@ public class FessUserBean extends TypicalUserBean { // #change_it also L public boolean hasGroups(final String[] acceptedGroups) { return StreamUtil.of(user.getGroupNames()).anyMatch(s1 -> StreamUtil.of(acceptedGroups).anyMatch(s2 -> s2.equals(s1))); } + + public static FessUserBean empty() { + return new FessUserBean(null) { + private static final long serialVersionUID = 1L; + + @Override + public String getUserId() { + return ""; + } + + @Override + public boolean hasRoles(final String[] acceptedRoles) { + return true; + } + + @Override + public String[] getRoles() { + return StringUtil.EMPTY_STRINGS; + } + }; + } } diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index 4d910daee..4a785eb0a 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -42,20 +42,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. -Djava.awt.headless=true -server - -Xmx256m - -XX:MaxMetaspaceSize=128m - -XX:CompressedClassSpaceSize=32m - -XX:-UseGCOverheadLimit - -XX:+UseConcMarkSweepGC - -XX:CMSInitiatingOccupancyFraction=75 - -XX:+UseParNewGC - -XX:+UseTLAB - -XX:+DisableExplicitGC */ - String JVM_CRAWLER_OPTIONS = "jvm.crawler.options"; - - /** The key of the configuration. e.g. -Djava.awt.headless=true - -server - -Xmx256m + -Xmx512m -XX:MaxMetaspaceSize=128m -XX:CompressedClassSpaceSize=32m -XX:-UseGCOverheadLimit @@ -68,6 +55,19 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction -Djcifs.smb.client.soTimeout=35000 -Djcifs.smb.client.responseTimeout=30000 */ + String JVM_CRAWLER_OPTIONS = "jvm.crawler.options"; + + /** The key of the configuration. e.g. -Djava.awt.headless=true + -server + -Xmx256m + -XX:MaxMetaspaceSize=128m + -XX:CompressedClassSpaceSize=32m + -XX:-UseGCOverheadLimit + -XX:+UseConcMarkSweepGC + -XX:CMSInitiatingOccupancyFraction=75 + -XX:+UseParNewGC + -XX:+UseTLAB + -XX:+DisableExplicitGC */ String JVM_SUGGEST_OPTIONS = "jvm.suggest.options"; /** The key of the configuration. e.g. default_crawler */ @@ -127,6 +127,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. true */ String CRAWLER_DOCUMENT_APPEND_DATA = "crawler.document.append.data"; + /** The key of the configuration. e.g. 20 */ + String CRAWLER_DOCUMENT_MAX_ALPHANUM_TERM_SIZE = "crawler.document.max.alphanum.term.size"; + + /** The key of the configuration. e.g. 10 */ + String CRAWLER_DOCUMENT_MAX_SYMBOL_TERM_SIZE = "crawler.document.max.symbol.term.size"; + /** The key of the configuration. e.g. UTF-8 */ String CRAWLER_CRAWLING_DATA_ENCODING = "crawler.crawling.data.encoding"; @@ -168,9 +174,6 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. No title. */ String CRAWLER_DOCUMENT_FILE_NO_TITLE_LABEL = "crawler.document.file.no.title.label"; - /** The key of the configuration. e.g. 10 */ - String CRAWLER_DOCUMENT_FILE_ABBREVIATION_MARGIN_LENGTH = "crawler.document.file.abbreviation.margin.length"; - /** The key of the configuration. e.g. false */ String CRAWLER_DOCUMENT_FILE_IGNORE_EMPTY_CONTENT = "crawler.document.file.ignore.empty.content"; @@ -213,21 +216,27 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. true */ String INDEXER_FAVORITE_COUNT_ENABLED = "indexer.favorite.count.enabled"; - /** The key of the configuration. e.g. 1000 */ + /** The key of the configuration. e.g. 5000 */ String INDEXER_WEBFS_COMMIT_MARGIN_TIME = "indexer.webfs.commit.margin.time"; - /** The key of the configuration. e.g. 60 */ - String INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT = "indexer.webfs.max.empty.list.conunt"; + /** The key of the configuration. e.g. 360 */ + String INDEXER_WEBFS_MAX_EMPTY_LIST_COUNT = "indexer.webfs.max.empty.list.count"; /** The key of the configuration. e.g. 10000 */ String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval"; - /** The key of the configuration. e.g. 5 */ + /** The key of the configuration. e.g. 100 */ String INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE = "indexer.webfs.max.document.cache.size"; + /** The key of the configuration. e.g. 10485760 */ + String INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE = "indexer.webfs.max.document.request.size"; + /** The key of the configuration. e.g. 5 */ String INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE = "indexer.data.max.document.cache.size"; + /** The key of the configuration. e.g. 10485760 */ + String INDEXER_DATA_MAX_DOCUMENT_REQUEST_SIZE = "indexer.data.max.document.request.size"; + /** The key of the configuration. e.g. favorite_count */ String INDEX_FIELD_favorite_count = "index.field.favorite_count"; @@ -741,6 +750,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. true */ String LDAP_ADMIN_SYNC_PASSWORD = "ldap.admin.sync.password"; + /** The key of the configuration. e.g. -1 */ + String LDAP_MAX_USERNAME_LENGTH = "ldap.max.username.length"; + /** The key of the configuration. e.g. memberOf */ String LDAP_MEMBEROF_ATTRIBUTE = "ldap.memberof.attribute"; @@ -827,7 +839,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction * Get the value for the key 'jvm.crawler.options'.
* The value is, e.g. -Djava.awt.headless=true -server - -Xmx256m + -Xmx512m -XX:MaxMetaspaceSize=128m -XX:CompressedClassSpaceSize=32m -XX:-UseGCOverheadLimit @@ -835,7 +847,11 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseParNewGC -XX:+UseTLAB - -XX:+DisableExplicitGC
+ -XX:+DisableExplicitGC + -Djcifs.smb.client.connTimeout=60000 + -Djcifs.smb.client.soTimeout=35000 + -Djcifs.smb.client.responseTimeout=30000 +
* comment: JVM options * @return The value of found property. (NotNull: if not found, exception but basically no way) */ @@ -853,11 +869,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseParNewGC -XX:+UseTLAB - -XX:+DisableExplicitGC - -Djcifs.smb.client.connTimeout=60000 - -Djcifs.smb.client.soTimeout=35000 - -Djcifs.smb.client.responseTimeout=30000 -
+ -XX:+DisableExplicitGC
* @return The value of found property. (NotNull: if not found, exception but basically no way) */ String getJvmSuggestOptions(); @@ -1035,6 +1047,36 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ boolean isCrawlerDocumentAppendData(); + /** + * Get the value for the key 'crawler.document.max.alphanum.term.size'.
+ * The value is, e.g. 20
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getCrawlerDocumentMaxAlphanumTermSize(); + + /** + * Get the value for the key 'crawler.document.max.alphanum.term.size' as {@link Integer}.
+ * The value is, e.g. 20
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getCrawlerDocumentMaxAlphanumTermSizeAsInteger(); + + /** + * Get the value for the key 'crawler.document.max.symbol.term.size'.
+ * The value is, e.g. 10
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getCrawlerDocumentMaxSymbolTermSize(); + + /** + * Get the value for the key 'crawler.document.max.symbol.term.size' as {@link Integer}.
+ * The value is, e.g. 10
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getCrawlerDocumentMaxSymbolTermSizeAsInteger(); + /** * Get the value for the key 'crawler.crawling.data.encoding'.
* The value is, e.g. UTF-8
@@ -1147,21 +1189,6 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ String getCrawlerDocumentFileNoTitleLabel(); - /** - * Get the value for the key 'crawler.document.file.abbreviation.margin.length'.
- * The value is, e.g. 10
- * @return The value of found property. (NotNull: if not found, exception but basically no way) - */ - String getCrawlerDocumentFileAbbreviationMarginLength(); - - /** - * Get the value for the key 'crawler.document.file.abbreviation.margin.length' as {@link Integer}.
- * The value is, e.g. 10
- * @return The value of found property. (NotNull: if not found, exception but basically no way) - * @throws NumberFormatException When the property is not integer. - */ - Integer getCrawlerDocumentFileAbbreviationMarginLengthAsInteger(); - /** * Get the value for the key 'crawler.document.file.ignore.empty.content'.
* The value is, e.g. false
@@ -1356,33 +1383,33 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** * Get the value for the key 'indexer.webfs.commit.margin.time'.
- * The value is, e.g. 1000
+ * The value is, e.g. 5000
* @return The value of found property. (NotNull: if not found, exception but basically no way) */ String getIndexerWebfsCommitMarginTime(); /** * Get the value for the key 'indexer.webfs.commit.margin.time' as {@link Integer}.
- * The value is, e.g. 1000
+ * The value is, e.g. 5000
* @return The value of found property. (NotNull: if not found, exception but basically no way) * @throws NumberFormatException When the property is not integer. */ Integer getIndexerWebfsCommitMarginTimeAsInteger(); /** - * Get the value for the key 'indexer.webfs.max.empty.list.conunt'.
- * The value is, e.g. 60
+ * Get the value for the key 'indexer.webfs.max.empty.list.count'.
+ * The value is, e.g. 360
* @return The value of found property. (NotNull: if not found, exception but basically no way) */ - String getIndexerWebfsMaxEmptyListConunt(); + String getIndexerWebfsMaxEmptyListCount(); /** - * Get the value for the key 'indexer.webfs.max.empty.list.conunt' as {@link Integer}.
- * The value is, e.g. 60
+ * Get the value for the key 'indexer.webfs.max.empty.list.count' as {@link Integer}.
+ * The value is, e.g. 360
* @return The value of found property. (NotNull: if not found, exception but basically no way) * @throws NumberFormatException When the property is not integer. */ - Integer getIndexerWebfsMaxEmptyListConuntAsInteger(); + Integer getIndexerWebfsMaxEmptyListCountAsInteger(); /** * Get the value for the key 'indexer.webfs.update.interval'.
@@ -1401,19 +1428,34 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** * Get the value for the key 'indexer.webfs.max.document.cache.size'.
- * The value is, e.g. 5
+ * The value is, e.g. 100
* @return The value of found property. (NotNull: if not found, exception but basically no way) */ String getIndexerWebfsMaxDocumentCacheSize(); /** * Get the value for the key 'indexer.webfs.max.document.cache.size' as {@link Integer}.
- * The value is, e.g. 5
+ * The value is, e.g. 100
* @return The value of found property. (NotNull: if not found, exception but basically no way) * @throws NumberFormatException When the property is not integer. */ Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger(); + /** + * Get the value for the key 'indexer.webfs.max.document.request.size'.
+ * The value is, e.g. 10485760
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerWebfsMaxDocumentRequestSize(); + + /** + * Get the value for the key 'indexer.webfs.max.document.request.size' as {@link Integer}.
+ * The value is, e.g. 10485760
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerWebfsMaxDocumentRequestSizeAsInteger(); + /** * Get the value for the key 'indexer.data.max.document.cache.size'.
* The value is, e.g. 5
@@ -1429,6 +1471,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ Integer getIndexerDataMaxDocumentCacheSizeAsInteger(); + /** + * Get the value for the key 'indexer.data.max.document.request.size'.
+ * The value is, e.g. 10485760
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerDataMaxDocumentRequestSize(); + + /** + * Get the value for the key 'indexer.data.max.document.request.size' as {@link Integer}.
+ * The value is, e.g. 10485760
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerDataMaxDocumentRequestSizeAsInteger(); + /** * Get the value for the key 'index.field.favorite_count'.
* The value is, e.g. favorite_count
@@ -3024,6 +3081,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ boolean isLdapAdminSyncPassword(); + /** + * Get the value for the key 'ldap.max.username.length'.
+ * The value is, e.g. -1
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getLdapMaxUsernameLength(); + + /** + * Get the value for the key 'ldap.max.username.length' as {@link Integer}.
+ * The value is, e.g. -1
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getLdapMaxUsernameLengthAsInteger(); + /** * Get the value for the key 'ldap.memberof.attribute'.
* The value is, e.g. memberOf
@@ -3247,6 +3319,22 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return is(FessConfig.CRAWLER_DOCUMENT_APPEND_DATA); } + public String getCrawlerDocumentMaxAlphanumTermSize() { + return get(FessConfig.CRAWLER_DOCUMENT_MAX_ALPHANUM_TERM_SIZE); + } + + public Integer getCrawlerDocumentMaxAlphanumTermSizeAsInteger() { + return getAsInteger(FessConfig.CRAWLER_DOCUMENT_MAX_ALPHANUM_TERM_SIZE); + } + + public String getCrawlerDocumentMaxSymbolTermSize() { + return get(FessConfig.CRAWLER_DOCUMENT_MAX_SYMBOL_TERM_SIZE); + } + + public Integer getCrawlerDocumentMaxSymbolTermSizeAsInteger() { + return getAsInteger(FessConfig.CRAWLER_DOCUMENT_MAX_SYMBOL_TERM_SIZE); + } + public String getCrawlerCrawlingDataEncoding() { return get(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING); } @@ -3307,14 +3395,6 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return get(FessConfig.CRAWLER_DOCUMENT_FILE_NO_TITLE_LABEL); } - public String getCrawlerDocumentFileAbbreviationMarginLength() { - return get(FessConfig.CRAWLER_DOCUMENT_FILE_ABBREVIATION_MARGIN_LENGTH); - } - - public Integer getCrawlerDocumentFileAbbreviationMarginLengthAsInteger() { - return getAsInteger(FessConfig.CRAWLER_DOCUMENT_FILE_ABBREVIATION_MARGIN_LENGTH); - } - public String getCrawlerDocumentFileIgnoreEmptyContent() { return get(FessConfig.CRAWLER_DOCUMENT_FILE_IGNORE_EMPTY_CONTENT); } @@ -3427,12 +3507,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return getAsInteger(FessConfig.INDEXER_WEBFS_COMMIT_MARGIN_TIME); } - public String getIndexerWebfsMaxEmptyListConunt() { - return get(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT); + public String getIndexerWebfsMaxEmptyListCount() { + return get(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_COUNT); } - public Integer getIndexerWebfsMaxEmptyListConuntAsInteger() { - return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT); + public Integer getIndexerWebfsMaxEmptyListCountAsInteger() { + return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_COUNT); } public String getIndexerWebfsUpdateInterval() { @@ -3451,6 +3531,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE); } + public String getIndexerWebfsMaxDocumentRequestSize() { + return get(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE); + } + + public Integer getIndexerWebfsMaxDocumentRequestSizeAsInteger() { + return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE); + } + public String getIndexerDataMaxDocumentCacheSize() { return get(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE); } @@ -3459,6 +3547,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return getAsInteger(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE); } + public String getIndexerDataMaxDocumentRequestSize() { + return get(FessConfig.INDEXER_DATA_MAX_DOCUMENT_REQUEST_SIZE); + } + + public Integer getIndexerDataMaxDocumentRequestSizeAsInteger() { + return getAsInteger(FessConfig.INDEXER_DATA_MAX_DOCUMENT_REQUEST_SIZE); + } + public String getIndexFieldFavoriteCount() { return get(FessConfig.INDEX_FIELD_favorite_count); } @@ -4295,6 +4391,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return is(FessConfig.LDAP_ADMIN_SYNC_PASSWORD); } + public String getLdapMaxUsernameLength() { + return get(FessConfig.LDAP_MAX_USERNAME_LENGTH); + } + + public Integer getLdapMaxUsernameLengthAsInteger() { + return getAsInteger(FessConfig.LDAP_MAX_USERNAME_LENGTH); + } + public String getLdapMemberofAttribute() { return get(FessConfig.LDAP_MEMBEROF_ATTRIBUTE); } diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java index 88469f237..0cdc9e417 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java @@ -27,6 +27,7 @@ import java.util.stream.Collectors; import javax.naming.directory.Attribute; import javax.naming.directory.BasicAttribute; +import javax.servlet.http.HttpSession; import org.codelibs.core.exception.ClassNotFoundRuntimeException; import org.codelibs.core.lang.StringUtil; @@ -37,8 +38,10 @@ import org.codelibs.fess.mylasta.action.FessUserBean; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.StreamUtil; import org.dbflute.optional.OptionalThing; +import org.elasticsearch.action.search.SearchRequestBuilder; import org.lastaflute.job.LaJob; import org.lastaflute.job.subsidiary.ConcurrentExec; +import org.lastaflute.web.util.LaRequestUtil; public interface FessProp { @@ -401,8 +404,19 @@ public interface FessProp { setSystemProperty(Constants.LDAP_SECURITY_PRINCIPAL, value); } + Integer getLdapMaxUsernameLengthAsInteger(); + public default String getLdapSecurityPrincipal(final String username) { - return String.format(getSystemProperty(Constants.LDAP_SECURITY_PRINCIPAL, StringUtil.EMPTY), username); + final String value; + final int maxLength = getLdapMaxUsernameLengthAsInteger().intValue(); + if (username == null) { + value = StringUtil.EMPTY; + } else if (maxLength >= 0 && username.length() > maxLength) { + value = username.substring(0, maxLength); + } else { + value = username; + } + return String.format(getSystemProperty(Constants.LDAP_SECURITY_PRINCIPAL, StringUtil.EMPTY), value); } public default String getLdapSecurityPrincipal() { @@ -830,4 +844,23 @@ public interface FessProp { return StreamUtil.of(getCrawlerFileProtocolsAsArray()).anyMatch(s -> url.startsWith(s)); } + public default void processSearchPreference(SearchRequestBuilder searchRequestBuilder, OptionalThing userBean) { + userBean.map(user -> { + if (user.hasRoles(getAuthenticationAdminRolesAsArray())) { + return Constants.SEARCH_PREFERENCE_PRIMARY; + } + return user.getUserId(); + }).ifPresent(p -> searchRequestBuilder.setPreference(p)).orElse(() -> LaRequestUtil.getOptionalRequest().map(r -> { + HttpSession session = r.getSession(false); + if (session != null) { + return session.getId(); + } + final String preference = r.getParameter("preference"); + if (preference != null) { + return Integer.toString(preference.hashCode()); + } + return null; + }).ifPresent(p -> searchRequestBuilder.setPreference(p))); + } + } diff --git a/src/main/java/org/codelibs/fess/util/ComponentUtil.java b/src/main/java/org/codelibs/fess/util/ComponentUtil.java index dc385a022..82a55ace8 100644 --- a/src/main/java/org/codelibs/fess/util/ComponentUtil.java +++ b/src/main/java/org/codelibs/fess/util/ComponentUtil.java @@ -15,18 +15,22 @@ */ package org.codelibs.fess.util; +import org.apache.lucene.queryparser.classic.QueryParser; import org.codelibs.core.crypto.CachedCipher; import org.codelibs.core.misc.DynamicProperties; import org.codelibs.fess.api.WebApiManagerFactory; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; import org.codelibs.fess.crawler.entity.EsAccessResult; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.service.DataService; import org.codelibs.fess.dict.DictionaryManager; import org.codelibs.fess.ds.DataStoreFactory; import org.codelibs.fess.es.client.FessEsClient; +import org.codelibs.fess.exception.ContainerNotAvailableException; import org.codelibs.fess.helper.ActivityHelper; import org.codelibs.fess.helper.CrawlingConfigHelper; import org.codelibs.fess.helper.CrawlingInfoHelper; +import org.codelibs.fess.helper.DocumentHelper; import org.codelibs.fess.helper.DuplicateHostHelper; import org.codelibs.fess.helper.FileTypeHelper; import org.codelibs.fess.helper.IndexingHelper; @@ -52,10 +56,16 @@ import org.codelibs.fess.mylasta.direction.FessConfig; import org.lastaflute.core.message.MessageManager; import org.lastaflute.di.core.SingletonLaContainer; import org.lastaflute.di.core.factory.SingletonLaContainerFactory; +import org.lastaflute.di.core.smart.hot.HotdeployUtil; import org.lastaflute.job.JobManager; import org.lastaflute.web.servlet.request.RequestManager; public final class ComponentUtil { + + private static final String QUERY_PARSER = "queryParser"; + + private static final String DOCUMENT_HELPER = "documentHelper"; + private static final String ACTIVITY_HELPER = "activityHelper"; private static final String LDAP_MANAGER = "ldapManager"; @@ -128,175 +138,223 @@ public final class ComponentUtil { private static final String ELASTICSEARCH_CLIENT = FESS_ES_CLIENT; + private static IndexingHelper indexingHelper; + + private static CrawlingConfigHelper crawlingConfigHelper; + + private static SystemHelper systemHelper; + private ComponentUtil() { } public static CachedCipher getCipher(final String cipherName) { - return SingletonLaContainer.getComponent(cipherName); + return getComponent(cipherName); } public static QueryResponseList getQueryResponseList() { - return SingletonLaContainer.getComponent(QUERY_RESPONSE_LIST); + return getComponent(QUERY_RESPONSE_LIST); } public static DynamicProperties getSolrGroupProperties(final String groupName) { - return SingletonLaContainer.getComponent(groupName + PROPERTIES_SUFFIX); + return getComponent(groupName + PROPERTIES_SUFFIX); } public static DynamicProperties getSystemProperties() { - return SingletonLaContainer.getComponent(CRAWLER_PROPERTIES); + return getComponent(CRAWLER_PROPERTIES); } public static SystemHelper getSystemHelper() { - return SingletonLaContainer.getComponent(SYSTEM_HELPER); + if (systemHelper == null || HotdeployUtil.isHotdeploy()) { + systemHelper = getComponent(SYSTEM_HELPER); + } + return systemHelper; } public static ViewHelper getViewHelper() { - return SingletonLaContainer.getComponent(VIEW_HELPER); + return getComponent(VIEW_HELPER); } public static SambaHelper getSambaHelper() { - return SingletonLaContainer.getComponent(SAMBA_HELPER); + return getComponent(SAMBA_HELPER); } public static QueryHelper getQueryHelper() { - return SingletonLaContainer.getComponent(QUERY_HELPER); + return getComponent(QUERY_HELPER); } public static LabelTypeHelper getLabelTypeHelper() { - return SingletonLaContainer.getComponent(LABEL_TYPE_HELPER); + return getComponent(LABEL_TYPE_HELPER); } public static SearchLogHelper getSearchLogHelper() { - return SingletonLaContainer.getComponent(SEARCH_LOG_HELPER); + return getComponent(SEARCH_LOG_HELPER); } public static CrawlingConfigHelper getCrawlingConfigHelper() { - return SingletonLaContainer.getComponent(CRAWLING_CONFIG_HELPER); + if (crawlingConfigHelper == null || HotdeployUtil.isHotdeploy()) { + crawlingConfigHelper = getComponent(CRAWLING_CONFIG_HELPER); + } + return crawlingConfigHelper; } public static CrawlingInfoHelper getCrawlingInfoHelper() { - return SingletonLaContainer.getComponent(CRAWLING_INFO_HELPER); + return getComponent(CRAWLING_INFO_HELPER); } public static PopularWordHelper getPopularWordHelper() { - return SingletonLaContainer.getComponent(POPULAR_WORD_HELPER); + return getComponent(POPULAR_WORD_HELPER); } public static PathMappingHelper getPathMappingHelper() { - return SingletonLaContainer.getComponent(PATH_MAPPING_HELPER); + return getComponent(PATH_MAPPING_HELPER); } public static DuplicateHostHelper getDuplicateHostHelper() { - return SingletonLaContainer.getComponent(DUPLICATE_HOST_HELPER); + return getComponent(DUPLICATE_HOST_HELPER); } public static ProcessHelper getJobHelper() { - return SingletonLaContainer.getComponent(JOB_HELPER); + return getComponent(JOB_HELPER); } public static WebApiManagerFactory getWebApiManagerFactory() { - return SingletonLaContainer.getComponent(WEB_API_MANAGER_FACTORY); + return getComponent(WEB_API_MANAGER_FACTORY); } public static UserAgentHelper getUserAgentHelper() { - return SingletonLaContainer.getComponent(USER_AGENT_HELPER); + return getComponent(USER_AGENT_HELPER); } public static DataStoreFactory getDataStoreFactory() { - return SingletonLaContainer.getComponent(DATA_STORE_FACTORY); + return getComponent(DATA_STORE_FACTORY); } public static IntervalControlHelper getIntervalControlHelper() { - return SingletonLaContainer.getComponent(INTERVAL_CONTROL_HELPER); + return getComponent(INTERVAL_CONTROL_HELPER); } public static ExtractorFactory getExtractorFactory() { - return SingletonLaContainer.getComponent(EXTRACTOR_FACTORY); + return getComponent(EXTRACTOR_FACTORY); } public static JobExecutor getJobExecutor(final String name) { - return SingletonLaContainer.getComponent(name + JOB_EXECUTOR_SUFFIX); + return getComponent(name + JOB_EXECUTOR_SUFFIX); } public static FileTypeHelper getFileTypeHelper() { - return SingletonLaContainer.getComponent(FILE_TYPE_HELPER); + return getComponent(FILE_TYPE_HELPER); } public static IndexUpdater getIndexUpdater() { - return SingletonLaContainer.getComponent(INDEX_UPDATER); + return getComponent(INDEX_UPDATER); } public static String getUserAgentName() { - return SingletonLaContainer.getComponent(USER_AGENT_NAME); + return getComponent(USER_AGENT_NAME); } public static KeyMatchHelper getKeyMatchHelper() { - return SingletonLaContainer.getComponent(KEY_MATCH_HELPER); + return getComponent(KEY_MATCH_HELPER); } public static IndexingHelper getIndexingHelper() { - return SingletonLaContainer.getComponent(INDEXING_HELPER); + if (indexingHelper == null || HotdeployUtil.isHotdeploy()) { + indexingHelper = getComponent(INDEXING_HELPER); + } + return indexingHelper; } public static UserInfoHelper getUserInfoHelper() { - return SingletonLaContainer.getComponent(USER_INFO_HELPER); + return getComponent(USER_INFO_HELPER); } public static FessEsClient getElasticsearchClient() { - return SingletonLaContainer.getComponent(ELASTICSEARCH_CLIENT); + return getComponent(ELASTICSEARCH_CLIENT); } public static MessageManager getMessageManager() { - return SingletonLaContainer.getComponent(MESSAGE_MANAGER); + return getComponent(MESSAGE_MANAGER); } public static DictionaryManager getDictionaryManager() { - return SingletonLaContainer.getComponent(DICTIONARY_MANAGER); + return getComponent(DICTIONARY_MANAGER); } public static DataService getDataService() { - return SingletonLaContainer.getComponent(DATA_SERVICE); + return getComponent(DATA_SERVICE); } public static FessEsClient getFessEsClient() { - return SingletonLaContainer.getComponent(FESS_ES_CLIENT); + return getComponent(FESS_ES_CLIENT); } public static FessConfig getFessConfig() { - return SingletonLaContainer.getComponent(FessConfig.class); + return getComponent(FessConfig.class); } public static SuggestHelper getSuggestHelper() { - return SingletonLaContainer.getComponent(SUGGEST_HELPER); + return getComponent(SUGGEST_HELPER); } public static RoleQueryHelper getRoleQueryHelper() { - return SingletonLaContainer.getComponent(ROLE_QUERY_HELPER); + return getComponent(ROLE_QUERY_HELPER); } public static LdapManager getLdapManager() { - return SingletonLaContainer.getComponent(LDAP_MANAGER); + return getComponent(LDAP_MANAGER); } public static ActivityHelper getActivityHelper() { - return SingletonLaContainer.getComponent(ACTIVITY_HELPER); + return getComponent(ACTIVITY_HELPER); } public static RequestManager getRequestManager() { - return SingletonLaContainer.getComponent(RequestManager.class); + return getComponent(RequestManager.class); } public static JobManager getJobManager() { - return SingletonLaContainer.getComponent(JobManager.class); + return getComponent(JobManager.class); + } + + public static DocumentHelper getDocumentHelper() { + return getComponent(DOCUMENT_HELPER); + } + + public static QueryParser getQueryParser() { + return getComponent(QUERY_PARSER); + } + + public static CrawlerClientFactory getCrawlerClientFactory() { + return getComponent(CrawlerClientFactory.class); } public static T getComponent(final Class clazz) { - return SingletonLaContainer.getComponent(clazz); + try { + return SingletonLaContainer.getComponent(clazz); + } catch (NullPointerException e) { + throw new ContainerNotAvailableException(e); + } + } + + public static T getComponent(final String componentName) { + try { + return SingletonLaContainer.getComponent(componentName); + } catch (NullPointerException e) { + throw new ContainerNotAvailableException(e); + } } public static boolean hasQueryHelper() { return SingletonLaContainerFactory.getContainer().hasComponentDef(QUERY_HELPER); } + public static boolean available() { + try { + return SingletonLaContainer.getComponent(SYSTEM_HELPER) != null; + } catch (Exception e) { + // ignore + } + return false; + } + } diff --git a/src/main/java/org/codelibs/fess/util/DocList.java b/src/main/java/org/codelibs/fess/util/DocList.java new file mode 100644 index 000000000..0ebfa6551 --- /dev/null +++ b/src/main/java/org/codelibs/fess/util/DocList.java @@ -0,0 +1,58 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.util; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Map; + +public class DocList extends ArrayList> { + + private static final long serialVersionUID = 1L; + + private long contentSize = 0; + + private long processingTime = 0; + + public void clear() { + super.clear(); + contentSize = 0; + processingTime = 0; + } + + public long getContentSize() { + return contentSize; + } + + public void addContentSize(long contentSize) { + this.contentSize += contentSize; + } + + public long getProcessingTime() { + return processingTime; + } + + public void addProcessingTime(long processingTime) { + this.processingTime += processingTime; + } + + @Override + public String toString() { + return "DocList [contentSize=" + contentSize + ", processingTime=" + processingTime + ", elementData=" + + Arrays.toString(toArray(new Map[size()])) + "]"; + } + +} diff --git a/src/main/java/org/codelibs/fess/util/InputStreamThread.java b/src/main/java/org/codelibs/fess/util/InputStreamThread.java index 21b47ca1d..52438e799 100644 --- a/src/main/java/org/codelibs/fess/util/InputStreamThread.java +++ b/src/main/java/org/codelibs/fess/util/InputStreamThread.java @@ -70,7 +70,7 @@ public class InputStreamThread extends Thread { } public String getOutput() { - final StringBuilder buf = new StringBuilder(); + final StringBuilder buf = new StringBuilder(100); for (final String value : list) { buf.append(value).append("\n"); } diff --git a/src/main/java/org/codelibs/fess/util/MemoryUtil.java b/src/main/java/org/codelibs/fess/util/MemoryUtil.java new file mode 100644 index 000000000..f97ae293a --- /dev/null +++ b/src/main/java/org/codelibs/fess/util/MemoryUtil.java @@ -0,0 +1,38 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.util; + +import org.apache.commons.io.FileUtils; +import org.codelibs.core.lang.StringUtil; + +public final class MemoryUtil { + private MemoryUtil() { + } + + public static String getMemoryUsageLog() { + final Runtime runtime = Runtime.getRuntime(); + final long freeBytes = runtime.freeMemory(); + final long maxBytes = runtime.maxMemory(); + final long totalBytes = runtime.totalMemory(); + final long usedBytes = totalBytes - freeBytes; + return "Mem:{used " + byteCountToDisplaySize(usedBytes) + ", heap " + byteCountToDisplaySize(totalBytes) + ", max " + + byteCountToDisplaySize(maxBytes) + "}"; + } + + public static String byteCountToDisplaySize(long size) { + return FileUtils.byteCountToDisplaySize(size).replace(" ", StringUtil.EMPTY); + } +} diff --git a/src/main/java/org/codelibs/fess/util/ParameterUtil.java b/src/main/java/org/codelibs/fess/util/ParameterUtil.java index 3c0c43649..c29f721b3 100644 --- a/src/main/java/org/codelibs/fess/util/ParameterUtil.java +++ b/src/main/java/org/codelibs/fess/util/ParameterUtil.java @@ -33,6 +33,8 @@ public class ParameterUtil { protected static final String CLIENT_PREFIX = "client."; + protected static final String CONFIG_PREFIX = "config."; + protected static final String FIELD_PREFIX = "field.config."; protected ParameterUtil() { @@ -72,12 +74,14 @@ public class ParameterUtil { public static Map> createConfigParameterMap(final String configParameters) { final Map> map = new HashMap<>(); + final Map configConfigMap = new HashMap<>(); final Map clientConfigMap = new HashMap<>(); final Map xpathConfigMap = new HashMap<>(); final Map metaConfigMap = new HashMap<>(); final Map valueConfigMap = new HashMap<>(); final Map scriptConfigMap = new HashMap<>(); final Map fieldConfigMap = new HashMap<>(); + map.put(ConfigName.CONFIG, configConfigMap); map.put(ConfigName.CLIENT, clientConfigMap); map.put(ConfigName.XPATH, xpathConfigMap); map.put(ConfigName.META, metaConfigMap); @@ -86,7 +90,9 @@ public class ParameterUtil { map.put(ConfigName.FIELD, fieldConfigMap); for (final Map.Entry entry : ParameterUtil.parse(configParameters).entrySet()) { final String key = entry.getKey(); - if (key.startsWith(CLIENT_PREFIX)) { + if (key.startsWith(CONFIG_PREFIX)) { + configConfigMap.put(key.substring(CONFIG_PREFIX.length()), entry.getValue()); + } else if (key.startsWith(CLIENT_PREFIX)) { clientConfigMap.put(key.substring(CLIENT_PREFIX.length()), entry.getValue()); } else if (key.startsWith(XPATH_PREFIX)) { xpathConfigMap.put(key.substring(XPATH_PREFIX.length()), entry.getValue()); diff --git a/src/main/java/org/codelibs/fess/util/ResourceUtil.java b/src/main/java/org/codelibs/fess/util/ResourceUtil.java index 12b7907b1..10d9f8e63 100644 --- a/src/main/java/org/codelibs/fess/util/ResourceUtil.java +++ b/src/main/java/org/codelibs/fess/util/ResourceUtil.java @@ -28,7 +28,6 @@ import javax.servlet.ServletContext; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.Constants; import org.codelibs.fess.mylasta.direction.FessConfig; -import org.lastaflute.di.core.SingletonLaContainer; import org.lastaflute.web.util.LaServletContextUtil; public class ResourceUtil { @@ -76,7 +75,7 @@ public class ResourceUtil { protected static Path getPath(final String base, final String... names) { try { - final ServletContext servletContext = SingletonLaContainer.getComponent(ServletContext.class); + final ServletContext servletContext = ComponentUtil.getComponent(ServletContext.class); final String webinfPath = servletContext.getRealPath("/WEB-INF/" + base); if (webinfPath != null) { if (Files.exists(Paths.get(webinfPath))) { diff --git a/src/main/java/org/codelibs/fess/util/StreamUtil.java b/src/main/java/org/codelibs/fess/util/StreamUtil.java index 06adcb122..39af27684 100644 --- a/src/main/java/org/codelibs/fess/util/StreamUtil.java +++ b/src/main/java/org/codelibs/fess/util/StreamUtil.java @@ -30,6 +30,14 @@ public class StreamUtil { } } + public static Stream splitOf(final String value, final String regex) { + if (value != null) { + return Arrays.stream(value.split(regex)); + } else { + return Collections. emptyList().stream(); + } + } + public static Stream> of(final Map map) { if (map != null) { return map.entrySet().stream(); diff --git a/src/main/resources/app.xml b/src/main/resources/app.xml index d151cb5eb..46666c1ac 100644 --- a/src/main/resources/app.xml +++ b/src/main/resources/app.xml @@ -22,7 +22,7 @@ - + org.codelibs.fess.Constants.DEFAULT_FIELD diff --git a/src/main/resources/crawler/extractor+tikaExtractor.xml b/src/main/resources/crawler/extractor+tikaExtractor.xml new file mode 100644 index 000000000..c0921c273 --- /dev/null +++ b/src/main/resources/crawler/extractor+tikaExtractor.xml @@ -0,0 +1,14 @@ + + + + + + 2 + 10000000 + 20 + 10 + false + + diff --git a/src/main/resources/fess.xml b/src/main/resources/fess.xml index a5adc13d8..f77c6da01 100644 --- a/src/main/resources/fess.xml +++ b/src/main/resources/fess.xml @@ -16,6 +16,8 @@ + + diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index 8112c90e0..bcb3d9f81 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -21,19 +21,7 @@ app.digest.algorism=sha256 jvm.crawler.options=\ -Djava.awt.headless=true\n\ -server\n\ --Xmx256m\n\ --XX:MaxMetaspaceSize=128m\n\ --XX:CompressedClassSpaceSize=32m\n\ --XX:-UseGCOverheadLimit\n\ --XX:+UseConcMarkSweepGC\n\ --XX:CMSInitiatingOccupancyFraction=75\n\ --XX:+UseParNewGC\n\ --XX:+UseTLAB\n\ --XX:+DisableExplicitGC -jvm.suggest.options=\ --Djava.awt.headless=true\n\ --server\n\ --Xmx256m\n\ +-Xmx512m\n\ -XX:MaxMetaspaceSize=128m\n\ -XX:CompressedClassSpaceSize=32m\n\ -XX:-UseGCOverheadLimit\n\ @@ -46,6 +34,19 @@ jvm.suggest.options=\ -Djcifs.smb.client.soTimeout=35000\n\ -Djcifs.smb.client.responseTimeout=30000\n\ +jvm.suggest.options=\ +-Djava.awt.headless=true\n\ +-server\n\ +-Xmx256m\n\ +-XX:MaxMetaspaceSize=128m\n\ +-XX:CompressedClassSpaceSize=32m\n\ +-XX:-UseGCOverheadLimit\n\ +-XX:+UseConcMarkSweepGC\n\ +-XX:CMSInitiatingOccupancyFraction=75\n\ +-XX:+UseParNewGC\n\ +-XX:+UseTLAB\n\ +-XX:+DisableExplicitGC + #-Xdebug\n\ #-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=127.0.0.1:8000\n\ @@ -77,6 +78,8 @@ crawler.document.site.encoding=UTF-8 crawler.document.unknown.hostname=unknown crawler.document.use.site.encoding.on.english=false crawler.document.append.data=true +crawler.document.max.alphanum.term.size=20 +crawler.document.max.symbol.term.size=10 crawler.crawling.data.encoding=UTF-8 crawler.web.protocols=http,https crawler.file.protocols=file,smb @@ -96,7 +99,6 @@ crawler.document.html.max.digest.length=200 # file crawler.document.file.name.encoding= crawler.document.file.no.title.label=No title. -crawler.document.file.abbreviation.margin.length=10 crawler.document.file.ignore.empty.content=false crawler.document.file.max.title.length=100 crawler.document.file.max.digest.length=200 @@ -116,11 +118,13 @@ indexer.thread.dump.enabled=true indexer.unprocessed.document.size=1000 indexer.click.count.enabled=true indexer.favorite.count.enabled=true -indexer.webfs.commit.margin.time=1000 -indexer.webfs.max.empty.list.conunt=60 +indexer.webfs.commit.margin.time=5000 +indexer.webfs.max.empty.list.count=360 indexer.webfs.update.interval=10000 -indexer.webfs.max.document.cache.size=5 +indexer.webfs.max.document.cache.size=100 +indexer.webfs.max.document.request.size=10485760 indexer.data.max.document.cache.size=5 +indexer.data.max.document.request.size=10485760 # field names index.field.favorite_count=favorite_count @@ -398,6 +402,7 @@ ldap.admin.group.base.dn=ou\=Group,dc\=fess,dc\=codelibs,dc\=org ldap.admin.group.object.classes=groupOfNames ldap.admin.sync.password=true +ldap.max.username.length=-1 ldap.memberof.attribute=memberOf #ldap.memberof.attribute=isMemberOf diff --git a/src/main/resources/fess_ds.xml b/src/main/resources/fess_ds.xml index 7edc1c10c..e619d59b8 100644 --- a/src/main/resources/fess_ds.xml +++ b/src/main/resources/fess_ds.xml @@ -12,8 +12,16 @@ csvDataStore - "FileListDataStore" - fileListDataStore + "CsvListDataStore" + csvListDataStore + + + "EsDataStore" + esDataStore + + + "EsListDataStore" + esListDataStore @@ -24,7 +32,11 @@ new String[] { ".csv", ".tsv" } --> - + + + + + diff --git a/src/main/resources/fess_indices/fess.json b/src/main/resources/fess_indices/fess.json index 858dad7ae..ae35fb05b 100644 --- a/src/main/resources/fess_indices/fess.json +++ b/src/main/resources/fess_indices/fess.json @@ -10,6 +10,11 @@ "mapping_ja_filter": { "type": "mapping", "mappings_path": "${fess.dictionary.path}ja/mapping.txt" + }, + "removeall_filter": { + "type": "pattern_replace", + "pattern":"(.*)", + "replacement":"" } }, "filter": { @@ -24,6 +29,72 @@ "stopword_en_filter": { "type": "stop", "stopwords": "_english_" + }, + "kuromoji_neologd_pos_filter" : { + "type" : "kuromoji_neologd_part_of_speech", + "stoptags" : [ + "その他", + "その他-間投", + "フィラー", + "感動詞", + "記号", + "記号-アルファベット", + "記号-一般", + "記号-括弧開", + "記号-括弧閉", + "記号-句点", + "記号-空白", + "記号-読点", + "形容詞", + "形容詞-自立", + "形容詞-接尾", + "形容詞-非自立", + "語断片", + "助詞", + "助詞-格助詞", + "助詞-格助詞-一般", + "助詞-格助詞-引用", + "助詞-格助詞-連語", + "助詞-間投助詞", + "助詞-係助詞", + "助詞-終助詞", + "助詞-接続助詞", + "助詞-特殊", + "助詞-副詞化", + "助詞-副助詞", + "助詞-副助詞/並立助詞/終助詞", + "助詞-並立助詞", + "助詞-連体化", + "助動詞", + "接続詞", + "接頭詞", + "接頭詞-形容詞接続", + "接頭詞-数接続", + "接頭詞-動詞接続", + "接頭詞-名詞接続", + "動詞", + "動詞-自立", + "動詞-接尾", + "動詞-非自立", + "非言語音", + "副詞", + "副詞-一般", + "副詞-助詞類接続", + "未知語", + "連体詞" + ] + }, + "truncate10_filter" : { + "type" : "truncate", + "length" : 10 + }, + "truncate20_filter" : { + "type" : "truncate", + "length" : 20 + }, + "alphanum_word_filter" : { + "type" : "alphanum_word", + "max_token_length" : 20 } }, "tokenizer": { @@ -40,6 +111,13 @@ "synonyms_path": "${fess.dictionary.path}synonym.txt", "dynamic_reload":true, "reload_interval":"1m" + }, + "bigram_synonym_tokenizer": { + "type": "ngram_synonym", + "n": "2", + "synonyms_path": "${fess.dictionary.path}synonym.txt", + "dynamic_reload":true, + "reload_interval":"1m" } }, "analyzer": { @@ -51,9 +129,10 @@ ], "tokenizer": "kuromoji_neologd_tokenizer", "filter": [ + "truncate10_filter", "kuromoji_neologd_baseform", "kuromoji_neologd_stemmer", - "kuromoji_neologd_part_of_speech", + "kuromoji_neologd_pos_filter", "lowercase" ] }, @@ -61,10 +140,18 @@ "type": "custom", "tokenizer": "standard", "filter": [ + "truncate20_filter", "lowercase", "possessive_stemmer_en_filter" ] }, + "empty_analyzer": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "removeall_filter" + ] + }, "standard_analyzer": { "type": "custom", "char_filter": [ @@ -72,7 +159,7 @@ ], "tokenizer": "unigram_synonym_tokenizer", "filter": [ - "alphanum_word", + "alphanum_word_filter", "cjk_bigram", "stopword_en_filter", "lowercase", diff --git a/src/main/resources/fess_indices/fess/doc.json b/src/main/resources/fess_indices/fess/doc.json index 10c845651..77bd7d806 100644 --- a/src/main/resources/fess_indices/fess/doc.json +++ b/src/main/resources/fess_indices/fess/doc.json @@ -12,7 +12,7 @@ "match": "*_ar", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -21,7 +21,7 @@ "match": "*_bg", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -30,7 +30,7 @@ "match": "*_bn", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -39,7 +39,7 @@ "match": "*_ca", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -48,7 +48,7 @@ "match": "*_cs", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -57,7 +57,7 @@ "match": "*_da", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -66,7 +66,7 @@ "match": "*_de", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -75,7 +75,7 @@ "match": "*_el", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -93,7 +93,7 @@ "match": "*_es", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -102,7 +102,7 @@ "match": "*_et", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -111,7 +111,7 @@ "match": "*_fa", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -120,7 +120,7 @@ "match": "*_fi", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -129,7 +129,7 @@ "match": "*_fr", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -138,7 +138,7 @@ "match": "*_gu", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -147,7 +147,7 @@ "match": "*_he", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -156,7 +156,7 @@ "match": "*_hi", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -165,7 +165,7 @@ "match": "*_hr", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -174,7 +174,7 @@ "match": "*_hu", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -183,7 +183,7 @@ "match": "*_id", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -192,7 +192,7 @@ "match": "*_it", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -210,7 +210,7 @@ "match": "*_ko", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -219,7 +219,7 @@ "match": "*_lt", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -228,7 +228,7 @@ "match": "*_lv", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -237,7 +237,7 @@ "match": "*_mk", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -246,7 +246,7 @@ "match": "*_ml", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -255,7 +255,7 @@ "match": "*_nl", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -264,7 +264,7 @@ "match": "*_no", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -273,7 +273,7 @@ "match": "*_pa", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -282,7 +282,7 @@ "match": "*_pl", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -291,7 +291,7 @@ "match": "*_pt", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -300,7 +300,7 @@ "match": "*_ro", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -309,7 +309,7 @@ "match": "*_ru", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -318,7 +318,7 @@ "match": "*_si", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -327,7 +327,7 @@ "match": "*_sq", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -336,7 +336,7 @@ "match": "*_sv", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -345,7 +345,7 @@ "match": "*_ta", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -354,7 +354,7 @@ "match": "*_te", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -363,7 +363,7 @@ "match": "*_th", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -372,7 +372,7 @@ "match": "*_tl", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -381,7 +381,7 @@ "match": "*_tr", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -390,7 +390,7 @@ "match": "*_uk", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -399,7 +399,7 @@ "match": "*_ur", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -408,7 +408,7 @@ "match": "*_vi", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -417,7 +417,7 @@ "match": "*_zh-cn", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } }, @@ -426,7 +426,7 @@ "match": "*_zh-tw", "mapping": { "type": "string", - "analyzer": "standard_analyzer" + "analyzer": "empty_analyzer" } } } diff --git a/src/main/resources/fess_indices/fess/ja/mapping.txt b/src/main/resources/fess_indices/fess/ja/mapping.txt index 9276761bf..3c0703deb 100644 --- a/src/main/resources/fess_indices/fess/ja/mapping.txt +++ b/src/main/resources/fess_indices/fess/ja/mapping.txt @@ -1,189 +1,189 @@ # Half-width Katakana => Full-width Katakana -"ア" => "ア" -"イ" => "イ" -"ウ" => "ウ" -"エ" => "エ" -"オ" => "オ" -"カ" => "カ" -"キ" => "キ" -"ク" => "ク" -"ケ" => "ケ" -"コ" => "コ" -"サ" => "サ" -"シ" => "シ" -"ス" => "ス" -"セ" => "セ" -"ソ" => "ソ" -"タ" => "タ" -"チ" => "チ" -"ツ" => "ツ" -"テ" => "テ" -"ト" => "ト" -"ナ" => "ナ" -"ニ" => "ニ" -"ヌ" => "ヌ" -"ネ" => "ネ" -"ノ" => "ノ" -"ハ" => "ハ" -"ヒ" => "ヒ" -"フ" => "フ" -"ヘ" => "ヘ" -"ホ" => "ホ" -"マ" => "マ" -"ミ" => "ミ" -"ム" => "ム" -"メ" => "メ" -"モ" => "モ" -"ヤ" => "ヤ" -"ユ" => "ユ" -"ヨ" => "ヨ" -"ラ" => "ラ" -"リ" => "リ" -"ル" => "ル" -"レ" => "レ" -"ロ" => "ロ" -"ワ" => "ワ" -"ヲ" => "ヲ" -"ン" => "ン" -"ッ" => "ッ" -"ァ" => "ァ" -"ィ" => "ィ" -"ゥ" => "ゥ" -"ェ" => "ェ" -"ォ" => "ォ" -"ャ" => "ャ" -"ュ" => "ュ" -"ョ" => "ョ" -"ガ" => "ガ" -"ギ" => "ギ" -"グ" => "グ" -"ゲ" => "ゲ" -"ゴ" => "ゴ" -"ザ" => "ザ" -"ジ" => "ジ" -"ズ" => "ズ" -"ゼ" => "ゼ" -"ゾ" => "ゾ" -"ダ" => "ダ" -"ヂ" => "ヂ" -"ヅ" => "ヅ" -"デ" => "デ" -"ド" => "ド" -"バ" => "バ" -"ビ" => "ビ" -"ブ" => "ブ" -"ベ" => "ベ" -"ボ" => "ボ" -"パ" => "パ" -"ピ" => "ピ" -"プ" => "プ" -"ペ" => "ペ" -"ポ" => "ポ" +ア => ア +イ => イ +ウ => ウ +エ => エ +オ => オ +カ => カ +キ => キ +ク => ク +ケ => ケ +コ => コ +サ => サ +シ => シ +ス => ス +セ => セ +ソ => ソ +タ => タ +チ => チ +ツ => ツ +テ => テ +ト => ト +ナ => ナ +ニ => ニ +ヌ => ヌ +ネ => ネ +ノ => ノ +ハ => ハ +ヒ => ヒ +フ => フ +ヘ => ヘ +ホ => ホ +マ => マ +ミ => ミ +ム => ム +メ => メ +モ => モ +ヤ => ヤ +ユ => ユ +ヨ => ヨ +ラ => ラ +リ => リ +ル => ル +レ => レ +ロ => ロ +ワ => ワ +ヲ => ヲ +ン => ン +ッ => ッ +ァ => ァ +ィ => ィ +ゥ => ゥ +ェ => ェ +ォ => ォ +ャ => ャ +ュ => ュ +ョ => ョ +ガ => ガ +ギ => ギ +グ => グ +ゲ => ゲ +ゴ => ゴ +ザ => ザ +ジ => ジ +ズ => ズ +ゼ => ゼ +ゾ => ゾ +ダ => ダ +ヂ => ヂ +ヅ => ヅ +デ => デ +ド => ド +バ => バ +ビ => ビ +ブ => ブ +ベ => ベ +ボ => ボ +パ => パ +ピ => ピ +プ => プ +ペ => ペ +ポ => ポ # Full-width alpha-numeric => Half-width alpha-numeric -"0" => "0" -"1" => "1" -"2" => "2" -"3" => "3" -"4" => "4" -"5" => "5" -"6" => "6" -"7" => "7" -"8" => "8" -"9" => "9" +0 => 0 +1 => 1 +2 => 2 +3 => 3 +4 => 4 +5 => 5 +6 => 6 +7 => 7 +8 => 8 +9 => 9 -"A" => "A" -"B" => "B" -"C" => "C" -"D" => "D" -"E" => "E" -"F" => "F" -"G" => "G" -"H" => "H" -"I" => "I" -"J" => "J" -"K" => "K" -"L" => "L" -"M" => "M" -"N" => "N" -"O" => "O" -"P" => "P" -"Q" => "Q" -"R" => "R" -"S" => "S" -"T" => "T" -"U" => "U" -"V" => "V" -"W" => "W" -"X" => "X" -"Y" => "Y" -"Z" => "Z" +A => A +B => B +C => C +D => D +E => E +F => F +G => G +H => H +I => I +J => J +K => K +L => L +M => M +N => N +O => O +P => P +Q => Q +R => R +S => S +T => T +U => U +V => V +W => W +X => X +Y => Y +Z => Z -"a" => "a" -"b" => "b" -"c" => "c" -"d" => "d" -"e" => "e" -"f" => "f" -"g" => "g" -"h" => "h" -"i" => "i" -"j" => "j" -"k" => "k" -"l" => "l" -"m" => "m" -"n" => "n" -"o" => "o" -"p" => "p" -"q" => "q" -"r" => "r" -"s" => "s" -"t" => "t" -"u" => "u" -"v" => "v" -"w" => "w" -"x" => "x" -"y" => "y" -"z" => "z" +a => a +b => b +c => c +d => d +e => e +f => f +g => g +h => h +i => i +j => j +k => k +l => l +m => m +n => n +o => o +p => p +q => q +r => r +s => s +t => t +u => u +v => v +w => w +x => x +y => y +z => z -# !"#$%&'()=|-^\@[`{;:]+*}/_<>? -"!" => "!" -"”" => "\"" -"#" => "#" -"$" => "$" -"%" => "%" -"&" => "&" -"’" => "'" -"(" => "(" -")" => ")" -"=" => "=" -"|" => "|" -"-" => "-" -"^" => "^" -"¥" => "\\" -"@" => "@" -"[" => "[" -"`" => "`" -"{" => "{" -";" => ";" -":" => ":" -"]" => "]" -"+" => "+" -"*" => "*" -"}" => "}" -"/" => "/" -"_" => "_" -"<" => "<" -">" => ">" -"?" => "?" -"‘" => "`" +# !#$%&'()=|-^\@[`{;:]+*}/_<>? +! => ! +” => " +# => # +$ => $ +% => % +& => & +’ => ' +( => ( +) => ) += => = +| => | +- => - +^ => ^ +¥ => \\ +@ => @ +[ => [ +` => ` +{ => { +; => ; +: => : +] => ] ++ => + +* => * +} => } +/ => / +_ => _ +< => < +> => > +? => ? +‘ => ` -"①" => "1" -"②" => "2" -"③" => "3" -"④" => "4" -"⑤" => "5" -"⑥" => "6" -"⑦" => "7" -"⑧" => "8" -"⑨" => "9" +① => 1 +② => 2 +③ => 3 +④ => 4 +⑤ => 5 +⑥ => 6 +⑦ => 7 +⑧ => 8 +⑨ => 9 diff --git a/src/main/webapp/WEB-INF/view/error/header.jsp b/src/main/webapp/WEB-INF/view/error/header.jsp index 72ee69bbe..95d025f32 100755 --- a/src/main/webapp/WEB-INF/view/error/header.jsp +++ b/src/main/webapp/WEB-INF/view/error/header.jsp @@ -12,12 +12,12 @@ ${fe:facetForm()}${fe:geoForm()} styleClass="form-control" autocomplete="off" /> diff --git a/src/main/webapp/WEB-INF/view/searchResults.jsp b/src/main/webapp/WEB-INF/view/searchResults.jsp index 939acb71a..3a5a7dfff 100644 --- a/src/main/webapp/WEB-INF/view/searchResults.jsp +++ b/src/main/webapp/WEB-INF/view/searchResults.jsp @@ -58,8 +58,8 @@ - - - + + @@ -68,24 +68,24 @@ - - - + + - - - + + - - - + +
  • class="hidden-phone" + class="hidden-xs" class="active"> ${f:h(pageNumber)} diff --git a/src/main/webapp/css/style.css b/src/main/webapp/css/style.css index ec69f3c93..d0cab84de 100644 --- a/src/main/webapp/css/style.css +++ b/src/main/webapp/css/style.css @@ -242,30 +242,33 @@ body.search #searchOptions.active, body.help #searchOptions.active { /* Large desktop */ @media ( min-width : 74.9em) { - .visible-phone { + .visible-xs { display: none !important; } } /* Large devices (desktops, less than 75em) */ @media ( max-width : 74.9em) { - .visible-phone { + .visible-xs { display: none !important; } - .br-tablet { - display: block; - } } /* Medium devices (tablets, less than 62em) */ @media ( max-width : 61.9em) { + .visible-xs { + display: none !important; + } } /* Small devices (landscape phones, less than 48em) */ @media ( max-width : 47.9em) { - .br-phone { + .br-xs { display: block; } + .hidden-xs { + display: none !important; + } .mainLogo img { width: 200px; } @@ -281,18 +284,18 @@ body.search #searchOptions.active, body.help #searchOptions.active { #result .info { display: none; } - body.search #searchOptions { + body.search #searchOptions, body.help #searchOptions { width: 100%; right: -100%; } - body.search #searchOptions active { - left: 0; + body.search #searchOptions.active, body.help #searchOptions.active { + right: 0; } } /* Extra small devices (portrait phones, less than 34em) */ @media ( max-width : 360px) { - .br-phone { + .br-xs { display: block; } .mainLogo img { diff --git a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java index 5ab8df4a3..192b167f3 100644 --- a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java +++ b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java @@ -218,15 +218,6 @@ public class FessXpathTransformerTest extends UnitFessTestCase { } - public void test_normalizeContent() { - assertEquals("", fessXpathTransformer.normalizeContent("")); - assertEquals(" ", fessXpathTransformer.normalizeContent(" ")); - assertEquals(" ", fessXpathTransformer.normalizeContent(" ")); - assertEquals(" ", fessXpathTransformer.normalizeContent("\t")); - assertEquals(" ", fessXpathTransformer.normalizeContent("\t\t")); - assertEquals(" ", fessXpathTransformer.normalizeContent("\t \t")); - } - public void test_removeCommentTag() { assertEquals("", fessXpathTransformer.removeCommentTag("")); assertEquals(" ", fessXpathTransformer.removeCommentTag("")); diff --git a/src/test/java/org/codelibs/fess/helper/DocumentHelperTest.java b/src/test/java/org/codelibs/fess/helper/DocumentHelperTest.java new file mode 100644 index 000000000..18bf891e5 --- /dev/null +++ b/src/test/java/org/codelibs/fess/helper/DocumentHelperTest.java @@ -0,0 +1,123 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.helper; + +import java.util.HashMap; +import java.util.Map; + +import org.codelibs.fess.crawler.entity.ResponseData; +import org.codelibs.fess.unit.UnitFessTestCase; + +public class DocumentHelperTest extends UnitFessTestCase { + + @Override + public void setUp() throws Exception { + super.setUp(); + } + + public void test_getContent() { + DocumentHelper documentHelper = new DocumentHelper(); + + ResponseData responseData = new ResponseData(); + Map dataMap = new HashMap<>(); + assertEquals("", documentHelper.getContent(responseData, null, dataMap)); + assertEquals("", documentHelper.getContent(responseData, "", dataMap)); + assertEquals("", documentHelper.getContent(responseData, " ", dataMap)); + assertEquals("", documentHelper.getContent(responseData, " ", dataMap)); + assertEquals("", documentHelper.getContent(responseData, "\t", dataMap)); + assertEquals("", documentHelper.getContent(responseData, "\t\t", dataMap)); + assertEquals("", documentHelper.getContent(responseData, "\t \t", dataMap)); + assertEquals("123 abc", documentHelper.getContent(responseData, " 123 abc ", dataMap)); + assertEquals("123 あいう", documentHelper.getContent(responseData, " 123 あいう ", dataMap)); + assertEquals("123 abc", documentHelper.getContent(responseData, " 123\nabc ", dataMap)); + } + + public void test_getContent_maxAlphanum() { + DocumentHelper documentHelper = new DocumentHelper() { + protected int getMaxAlphanumTermSize() { + return 2; + } + }; + + ResponseData responseData = new ResponseData(); + Map dataMap = new HashMap<>(); + assertEquals("", documentHelper.getContent(responseData, null, dataMap)); + assertEquals("", documentHelper.getContent(responseData, "", dataMap)); + assertEquals("", documentHelper.getContent(responseData, " ", dataMap)); + assertEquals("", documentHelper.getContent(responseData, " ", dataMap)); + assertEquals("", documentHelper.getContent(responseData, "\t", dataMap)); + assertEquals("", documentHelper.getContent(responseData, "\t\t", dataMap)); + assertEquals("", documentHelper.getContent(responseData, "\t \t", dataMap)); + assertEquals("12 ab", documentHelper.getContent(responseData, " 123 abc ", dataMap)); + assertEquals("123 あいう", documentHelper.getContent(responseData, " 123 あいう ", dataMap)); + assertEquals("12 ab", documentHelper.getContent(responseData, " 123\nabc ", dataMap)); + assertEquals("12", documentHelper.getContent(responseData, " 123abc ", dataMap)); + } + + public void test_getContent_maxSymbol() { + DocumentHelper documentHelper = new DocumentHelper() { + protected int getMaxSymbolTermSize() { + return 2; + } + }; + + ResponseData responseData = new ResponseData(); + Map dataMap = new HashMap<>(); + assertEquals("", documentHelper.getContent(responseData, null, dataMap)); + assertEquals("", documentHelper.getContent(responseData, "", dataMap)); + assertEquals("", documentHelper.getContent(responseData, " ", dataMap)); + assertEquals("", documentHelper.getContent(responseData, " ", dataMap)); + assertEquals("", documentHelper.getContent(responseData, "\t", dataMap)); + assertEquals("", documentHelper.getContent(responseData, "\t\t", dataMap)); + assertEquals("", documentHelper.getContent(responseData, "\t \t", dataMap)); + assertEquals("123 abc", documentHelper.getContent(responseData, " 123 abc ", dataMap)); + assertEquals("123 あいう", documentHelper.getContent(responseData, " 123 あいう ", dataMap)); + assertEquals("123 abc", documentHelper.getContent(responseData, " 123\nabc ", dataMap)); + assertEquals("123abc", documentHelper.getContent(responseData, " 123abc ", dataMap)); + + assertEquals("!!", documentHelper.getContent(responseData, "!!!", dataMap)); + assertEquals("//", documentHelper.getContent(responseData, "///", dataMap)); + assertEquals("::", documentHelper.getContent(responseData, ":::", dataMap)); + assertEquals("@@", documentHelper.getContent(responseData, "@@@", dataMap)); + assertEquals("[[", documentHelper.getContent(responseData, "[[[", dataMap)); + assertEquals("``", documentHelper.getContent(responseData, "```", dataMap)); + assertEquals("{{", documentHelper.getContent(responseData, "{{{", dataMap)); + assertEquals("~~", documentHelper.getContent(responseData, "~~~", dataMap)); + assertEquals("!\"", documentHelper.getContent(responseData, "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", dataMap)); + } + + public void test_getDigest() { + DocumentHelper documentHelper = new DocumentHelper(); + + ResponseData responseData = new ResponseData(); + Map dataMap = new HashMap<>(); + assertEquals("1234567...", documentHelper.getDigest(responseData, " 1234567890 1234567890 1234567890 ", dataMap, 10)); + assertEquals("1234567...", documentHelper.getDigest(responseData, "123456789012345678901234567890", dataMap, 10)); + assertEquals("1234567...", documentHelper.getDigest(responseData, "123456789012345678901", dataMap, 10)); + assertEquals("1234567...", documentHelper.getDigest(responseData, "12345678901234567890", dataMap, 10)); + assertEquals("1234567...", documentHelper.getDigest(responseData, "1234567890123456789", dataMap, 10)); + assertEquals("1234567...", documentHelper.getDigest(responseData, "12345678901", dataMap, 10)); + assertEquals("1234567890", documentHelper.getDigest(responseData, "1234567890", dataMap, 10)); + assertEquals("123456789", documentHelper.getDigest(responseData, "123456789", dataMap, 10)); + assertEquals("1234567", documentHelper.getDigest(responseData, "1234567", dataMap, 10)); + assertEquals("1", documentHelper.getDigest(responseData, "1", dataMap, 10)); + assertEquals("", documentHelper.getDigest(responseData, "", dataMap, 10)); + assertEquals("", documentHelper.getDigest(responseData, " ", dataMap, 10)); + assertEquals("", documentHelper.getDigest(responseData, null, dataMap, 10)); + assertEquals("1234567...", documentHelper.getDigest(responseData, " 1234567890 1234567890 1234567890 ", dataMap, 10)); + assertEquals("1234567...", documentHelper.getDigest(responseData, "12345678901234567890", dataMap, 10)); + } +} diff --git a/src/test/java/org/codelibs/fess/helper/QueryHelperTest.java b/src/test/java/org/codelibs/fess/helper/QueryHelperTest.java index a898fcb57..38a95459c 100644 --- a/src/test/java/org/codelibs/fess/helper/QueryHelperTest.java +++ b/src/test/java/org/codelibs/fess/helper/QueryHelperTest.java @@ -17,6 +17,7 @@ package org.codelibs.fess.helper; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.ext.ExtendableQueryParser; import org.codelibs.fess.Constants; import org.codelibs.fess.mylasta.direction.FessConfig; @@ -30,12 +31,15 @@ public class QueryHelperTest extends UnitFessTestCase { @Override public void setUp() throws Exception { super.setUp(); - queryHelper = new QueryHelper(); + queryHelper = new QueryHelper() { + protected QueryParser getQueryParser() { + return new ExtendableQueryParser(Constants.DEFAULT_FIELD, new WhitespaceAnalyzer()); + } + }; final FessConfig fessConfig = ComponentUtil.getFessConfig(); registerMockInstance(fessConfig); registerMockInstance(new SystemHelper()); inject(queryHelper); - queryHelper.queryParser = new ExtendableQueryParser(Constants.DEFAULT_FIELD, new WhitespaceAnalyzer()); queryHelper.init(); } diff --git a/src/test/java/org/codelibs/fess/helper/ViewHelperTest.java b/src/test/java/org/codelibs/fess/helper/ViewHelperTest.java index b8a172f9d..be94c3475 100644 --- a/src/test/java/org/codelibs/fess/helper/ViewHelperTest.java +++ b/src/test/java/org/codelibs/fess/helper/ViewHelperTest.java @@ -118,6 +118,11 @@ public class ViewHelperTest extends UnitFessTestCase { docMap.put("urlLink", urlLink); assertEquals(sitePath, viewHelper.getSitePath(docMap)); + urlLink = "://www.qwerty.jp"; + sitePath = "www.qwerty.jp"; + docMap.put("urlLink", urlLink); + assertEquals(sitePath, viewHelper.getSitePath(docMap)); + urlLink = "www.google.com"; sitePath = "www.google.com"; docMap.put("urlLink", urlLink); @@ -128,8 +133,23 @@ public class ViewHelperTest extends UnitFessTestCase { docMap.put("urlLink", urlLink); assertEquals(sitePath, viewHelper.getSitePath(docMap)); + urlLink = "file:/home/user/"; + sitePath = "/home/user/"; + docMap.put("urlLink", urlLink); + assertEquals(sitePath, viewHelper.getSitePath(docMap)); + urlLink = "file://home/user/"; - sitePath = "home/user/"; + sitePath = "/home/user/"; + docMap.put("urlLink", urlLink); + assertEquals(sitePath, viewHelper.getSitePath(docMap)); + + urlLink = "file://c:/home/user/"; + sitePath = "c:/home/user/"; + docMap.put("urlLink", urlLink); + assertEquals(sitePath, viewHelper.getSitePath(docMap)); + + urlLink = "file://1.2.3.4/user/"; + sitePath = "1.2.3.4/user/"; docMap.put("urlLink", urlLink); assertEquals(sitePath, viewHelper.getSitePath(docMap)); } diff --git a/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java b/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java new file mode 100644 index 000000000..fcbe5d8b9 --- /dev/null +++ b/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java @@ -0,0 +1,74 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.mylasta.direction; + +import java.io.File; +import java.io.IOException; + +import org.codelibs.core.io.FileUtil; +import org.codelibs.core.misc.DynamicProperties; +import org.codelibs.fess.unit.UnitFessTestCase; +import org.lastaflute.di.core.factory.SingletonLaContainerFactory; + +public class FessPropTest extends UnitFessTestCase { + + @Override + protected boolean isUseOneTimeContainer() { + return true; + } + + public void test_maxUsernameLength() throws IOException { + FessConfig fessConfig = new FessConfig.SimpleImpl() { + @Override + public Integer getLdapMaxUsernameLengthAsInteger() { + return Integer.valueOf(-1); + } + }; + File file = File.createTempFile("test", ".properties"); + file.deleteOnExit(); + FileUtil.writeBytes(file.getAbsolutePath(), "ldap.security.principal=%s@fess.codelibs.local".getBytes("UTF-8")); + DynamicProperties systemProps = new DynamicProperties(file); + SingletonLaContainerFactory.getContainer().register(systemProps, "systemProperties"); + + assertEquals("@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal(null)); + assertEquals("@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal("")); + assertEquals("123456789@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal("123456789")); + assertEquals("1234567890@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal("1234567890")); + assertEquals("12345678901@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal("12345678901")); + } + + public void test_maxUsernameLength10() throws IOException { + FessConfig fessConfig = new FessConfig.SimpleImpl() { + @Override + public Integer getLdapMaxUsernameLengthAsInteger() { + return Integer.valueOf(10); + } + }; + + File file = File.createTempFile("test", ".properties"); + file.deleteOnExit(); + FileUtil.writeBytes(file.getAbsolutePath(), "ldap.security.principal=%s@fess.codelibs.local".getBytes("UTF-8")); + DynamicProperties systemProps = new DynamicProperties(file); + SingletonLaContainerFactory.getContainer().register(systemProps, "systemProperties"); + + assertEquals("@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal(null)); + assertEquals("@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal("")); + assertEquals("123456789@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal("123456789")); + assertEquals("1234567890@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal("1234567890")); + assertEquals("1234567890@fess.codelibs.local", fessConfig.getLdapSecurityPrincipal("12345678901")); + } + +} diff --git a/src/test/java/org/codelibs/fess/util/StreamUtilTest.java b/src/test/java/org/codelibs/fess/util/StreamUtilTest.java new file mode 100644 index 000000000..69b81627d --- /dev/null +++ b/src/test/java/org/codelibs/fess/util/StreamUtilTest.java @@ -0,0 +1,51 @@ +/* + * Copyright 2012-2016 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.util; + +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; + +import org.codelibs.fess.unit.UnitFessTestCase; + +public class StreamUtilTest extends UnitFessTestCase { + + public void test_ofValues() { + String[] values = { "value1", "value2" }; + Stream stream = StreamUtil.of(values[0], values[1]); + Object[] array = stream.toArray(); + for (int i = 0; i < 2; i++) { + assertEquals(values[i], array[i]); + } + } + + public void test_ofNull() { + assertEquals(0, StreamUtil.of().toArray().length); + Object[] o = {}; + assertEquals(0, StreamUtil.of(o).toArray().length); + Map map = new HashMap(); + assertEquals(0, StreamUtil.of(map).toArray().length); + } + + public void test_ofMap() { + Map map = new HashMap(); + map.put("key1", "value1"); + map.put("key2", "value2"); + Stream> stream = StreamUtil.of(map); + stream.forEach(m -> assertEquals(map.get(m.getKey()), m.getValue())); + } + +}