diff --git a/src/main/java/org/codelibs/fess/app/web/go/GoAction.java b/src/main/java/org/codelibs/fess/app/web/go/GoAction.java index 6365f5119..4f5ea9a95 100644 --- a/src/main/java/org/codelibs/fess/app/web/go/GoAction.java +++ b/src/main/java/org/codelibs/fess/app/web/go/GoAction.java @@ -32,6 +32,7 @@ import org.codelibs.fess.app.web.base.FessSearchAction; import org.codelibs.fess.es.exentity.ClickLog; import org.codelibs.fess.helper.CrawlingConfigHelper; import org.codelibs.fess.helper.SearchLogHelper; +import org.codelibs.fess.helper.ViewHelper; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.robot.util.CharUtil; import org.elasticsearch.index.query.QueryBuilders; @@ -131,9 +132,9 @@ public class GoAction extends FessSearchAction { if (isFileSystemPath(url)) { if (Constants.TRUE.equals(crawlerProperties.getProperty(Constants.SEARCH_FILE_PROXY_PROPERTY, Constants.TRUE))) { - final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper(); + final ViewHelper viewHelper = ComponentUtil.getViewHelper(); try { - crawlingConfigHelper.writeContent(doc); + viewHelper.writeContent(doc); return null; } catch (final Exception e) { logger.error("Failed to load: " + doc, e); diff --git a/src/main/java/org/codelibs/fess/exec/Crawler.java b/src/main/java/org/codelibs/fess/exec/Crawler.java index f03c04e5d..c5f03138f 100644 --- a/src/main/java/org/codelibs/fess/exec/Crawler.java +++ b/src/main/java/org/codelibs/fess/exec/Crawler.java @@ -78,9 +78,6 @@ public class Crawler implements Serializable { @Resource protected FessEsClient fessEsClient; - @Resource - protected ScreenShotManager screenShotManager; - @Resource protected WebFsIndexHelper webFsIndexHelper; diff --git a/src/main/java/org/codelibs/fess/helper/CrawlingConfigHelper.java b/src/main/java/org/codelibs/fess/helper/CrawlingConfigHelper.java index 21b5c5f4f..8a681ba42 100644 --- a/src/main/java/org/codelibs/fess/helper/CrawlingConfigHelper.java +++ b/src/main/java/org/codelibs/fess/helper/CrawlingConfigHelper.java @@ -16,37 +16,16 @@ package org.codelibs.fess.helper; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.io.Serializable; -import java.net.URLDecoder; -import java.net.URLEncoder; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import javax.servlet.http.HttpServletResponse; - -import org.apache.commons.io.IOUtils; -import org.codelibs.core.io.CopyUtil; -import org.codelibs.core.misc.Base64Util; -import org.codelibs.fess.Constants; -import org.codelibs.fess.FessSystemException; import org.codelibs.fess.app.service.DataConfigService; import org.codelibs.fess.app.service.FileConfigService; import org.codelibs.fess.app.service.WebConfigService; import org.codelibs.fess.es.exentity.CrawlingConfig; import org.codelibs.fess.es.exentity.CrawlingConfig.ConfigType; -import org.codelibs.fess.helper.UserAgentHelper.UserAgentType; -import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.robot.builder.RequestDataBuilder; -import org.codelibs.robot.client.S2RobotClient; -import org.codelibs.robot.client.S2RobotClientFactory; -import org.codelibs.robot.entity.ResponseData; import org.lastaflute.di.core.SingletonLaContainer; -import org.lastaflute.web.util.LaResponseUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -121,135 +100,4 @@ public class CrawlingConfigHelper implements Serializable { return crawlingConfigMap.get(sessionId); } - public void writeContent(final Map doc) { - if (logger.isDebugEnabled()) { - logger.debug("writing the content of: " + doc); - } - final FieldHelper fieldHelper = ComponentUtil.getFieldHelper(); - final Object configIdObj = doc.get(fieldHelper.configIdField); - if (configIdObj == null) { - throw new FessSystemException("configId is null."); - } - final String configId = configIdObj.toString(); - if (configId.length() < 2) { - throw new FessSystemException("Invalid configId: " + configIdObj); - } - final ConfigType configType = getConfigType(configId); - CrawlingConfig config = null; - if (logger.isDebugEnabled()) { - logger.debug("configType: " + configType + ", configId: " + configId); - } - if (ConfigType.WEB == configType) { - final WebConfigService webConfigService = SingletonLaContainer.getComponent(WebConfigService.class); - config = webConfigService.getWebConfig(getId(configId)); - } else if (ConfigType.FILE == configType) { - final FileConfigService fileConfigService = SingletonLaContainer.getComponent(FileConfigService.class); - config = fileConfigService.getFileConfig(getId(configId)); - } else if (ConfigType.DATA == configType) { - final DataConfigService dataConfigService = SingletonLaContainer.getComponent(DataConfigService.class); - config = dataConfigService.getDataConfig(getId(configId)); - } - if (config == null) { - throw new FessSystemException("No crawlingConfig: " + configIdObj); - } - final String url = (String) doc.get(fieldHelper.urlField); - final S2RobotClientFactory robotClientFactory = SingletonLaContainer.getComponent(S2RobotClientFactory.class); - config.initializeClientFactory(robotClientFactory); - final S2RobotClient client = robotClientFactory.getClient(url); - if (client == null) { - throw new FessSystemException("No S2RobotClient: " + configIdObj + ", url: " + url); - } - final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build()); - final HttpServletResponse response = LaResponseUtil.getResponse(); - writeFileName(response, responseData); - writeContentType(response, responseData); - writeNoCache(response, responseData); - InputStream is = null; - OutputStream os = null; - try { - is = new BufferedInputStream(responseData.getResponseBody()); - os = new BufferedOutputStream(response.getOutputStream()); - CopyUtil.copy(is, os); - os.flush(); - } catch (final IOException e) { - if (!"ClientAbortException".equals(e.getClass().getSimpleName())) { - throw new FessSystemException("Failed to write a content. configId: " + configIdObj + ", url: " + url, e); - } - } finally { - IOUtils.closeQuietly(is); - IOUtils.closeQuietly(os); - } - if (logger.isDebugEnabled()) { - logger.debug("Finished to write " + url); - } - } - - protected void writeNoCache(final HttpServletResponse response, final ResponseData responseData) { - response.setHeader("Pragma", "no-cache"); - response.setHeader("Cache-Control", "no-cache"); - response.setHeader("Expires", "Thu, 01 Dec 1994 16:00:00 GMT"); - } - - protected void writeFileName(final HttpServletResponse response, final ResponseData responseData) { - final UserAgentHelper userAgentHelper = ComponentUtil.getUserAgentHelper(); - final UserAgentType userAgentType = userAgentHelper.getUserAgentType(); - String charset = responseData.getCharSet(); - if (charset == null) { - charset = Constants.UTF_8; - } - final String name; - final String url = responseData.getUrl(); - final int pos = url.lastIndexOf('/'); - try { - if (pos >= 0 && pos + 1 < url.length()) { - name = URLDecoder.decode(url.substring(pos + 1), charset); - } else { - name = URLDecoder.decode(url, charset); - } - - if (logger.isDebugEnabled()) { - logger.debug("userAgentType: " + userAgentType + ", charset: " + charset + ", name: " + name); - } - - switch (userAgentType) { - case IE: - response.setHeader("Content-Disposition", "attachment; filename=\"" + URLEncoder.encode(name, Constants.UTF_8) + "\""); - break; - case OPERA: - response.setHeader("Content-Disposition", "attachment; filename*=utf-8'ja'" + URLEncoder.encode(name, Constants.UTF_8)); - break; - case SAFARI: - response.setHeader("Content-Disposition", "attachment; filename=\"" + name + "\""); - break; - case CHROME: - case FIREFOX: - case OTHER: - default: - response.setHeader("Content-Disposition", - "attachment; filename=\"=?utf-8?B?" + Base64Util.encode(name.getBytes(Constants.UTF_8)) + "?=\""); - break; - } - } catch (final Exception e) { - logger.warn("Failed to write a filename: " + responseData, e); - } - } - - protected void writeContentType(final HttpServletResponse response, final ResponseData responseData) { - final String mimeType = responseData.getMimeType(); - if (logger.isDebugEnabled()) { - logger.debug("mimeType: " + mimeType); - } - if (mimeType == null) { - return; - } - if (mimeType.startsWith("text/")) { - final String charset = response.getCharacterEncoding(); - if (charset != null) { - response.setContentType(mimeType + "; charset=" + charset); - return; - } - } - response.setContentType(mimeType); - } - } diff --git a/src/main/java/org/codelibs/fess/helper/ViewHelper.java b/src/main/java/org/codelibs/fess/helper/ViewHelper.java index 60414b855..9c68bcb4d 100644 --- a/src/main/java/org/codelibs/fess/helper/ViewHelper.java +++ b/src/main/java/org/codelibs/fess/helper/ViewHelper.java @@ -16,7 +16,12 @@ package org.codelibs.fess.helper; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; @@ -34,21 +39,36 @@ import java.util.regex.Pattern; import javax.annotation.PostConstruct; import javax.annotation.Resource; import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.codelibs.core.CoreLibConstants; +import org.codelibs.core.io.CopyUtil; import org.codelibs.core.lang.StringUtil; +import org.codelibs.core.misc.Base64Util; import org.codelibs.core.misc.DynamicProperties; import org.codelibs.core.net.URLUtil; import org.codelibs.fess.Constants; import org.codelibs.fess.FessSystemException; +import org.codelibs.fess.app.service.DataConfigService; +import org.codelibs.fess.app.service.FileConfigService; +import org.codelibs.fess.app.service.WebConfigService; import org.codelibs.fess.entity.FacetQueryView; +import org.codelibs.fess.es.exentity.CrawlingConfig; +import org.codelibs.fess.es.exentity.CrawlingConfig.ConfigType; import org.codelibs.fess.helper.UserAgentHelper.UserAgentType; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ResourceUtil; +import org.codelibs.robot.builder.RequestDataBuilder; +import org.codelibs.robot.client.S2RobotClient; +import org.codelibs.robot.client.S2RobotClientFactory; +import org.codelibs.robot.entity.ResponseData; import org.codelibs.robot.util.CharUtil; +import org.lastaflute.di.core.SingletonLaContainer; import org.lastaflute.taglib.function.LaFunctions; import org.lastaflute.web.util.LaRequestUtil; +import org.lastaflute.web.util.LaResponseUtil; import org.lastaflute.web.util.LaServletContextUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -456,6 +476,138 @@ public class ViewHelper implements Serializable { return null; } + public void writeContent(final Map doc) { + if (logger.isDebugEnabled()) { + logger.debug("writing the content of: " + doc); + } + final FieldHelper fieldHelper = ComponentUtil.getFieldHelper(); + final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper(); + final Object configIdObj = doc.get(fieldHelper.configIdField); + if (configIdObj == null) { + throw new FessSystemException("configId is null."); + } + final String configId = configIdObj.toString(); + if (configId.length() < 2) { + throw new FessSystemException("Invalid configId: " + configIdObj); + } + final ConfigType configType = crawlingConfigHelper.getConfigType(configId); + CrawlingConfig config = null; + if (logger.isDebugEnabled()) { + logger.debug("configType: " + configType + ", configId: " + configId); + } + if (ConfigType.WEB == configType) { + final WebConfigService webConfigService = SingletonLaContainer.getComponent(WebConfigService.class); + config = webConfigService.getWebConfig(crawlingConfigHelper.getId(configId)); + } else if (ConfigType.FILE == configType) { + final FileConfigService fileConfigService = SingletonLaContainer.getComponent(FileConfigService.class); + config = fileConfigService.getFileConfig(crawlingConfigHelper.getId(configId)); + } else if (ConfigType.DATA == configType) { + final DataConfigService dataConfigService = SingletonLaContainer.getComponent(DataConfigService.class); + config = dataConfigService.getDataConfig(crawlingConfigHelper.getId(configId)); + } + if (config == null) { + throw new FessSystemException("No crawlingConfig: " + configIdObj); + } + final String url = (String) doc.get(fieldHelper.urlField); + final S2RobotClientFactory robotClientFactory = SingletonLaContainer.getComponent(S2RobotClientFactory.class); + config.initializeClientFactory(robotClientFactory); + final S2RobotClient client = robotClientFactory.getClient(url); + if (client == null) { + throw new FessSystemException("No S2RobotClient: " + configIdObj + ", url: " + url); + } + final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build()); + final HttpServletResponse response = LaResponseUtil.getResponse(); + writeFileName(response, responseData); + writeContentType(response, responseData); + writeNoCache(response, responseData); + InputStream is = null; + OutputStream os = null; + try { + is = new BufferedInputStream(responseData.getResponseBody()); + os = new BufferedOutputStream(response.getOutputStream()); + CopyUtil.copy(is, os); + os.flush(); + } catch (final IOException e) { + if (!"ClientAbortException".equals(e.getClass().getSimpleName())) { + throw new FessSystemException("Failed to write a content. configId: " + configIdObj + ", url: " + url, e); + } + } finally { + IOUtils.closeQuietly(is); + IOUtils.closeQuietly(os); + } + if (logger.isDebugEnabled()) { + logger.debug("Finished to write " + url); + } + } + + protected void writeNoCache(final HttpServletResponse response, final ResponseData responseData) { + response.setHeader("Pragma", "no-cache"); + response.setHeader("Cache-Control", "no-cache"); + response.setHeader("Expires", "Thu, 01 Dec 1994 16:00:00 GMT"); + } + + protected void writeFileName(final HttpServletResponse response, final ResponseData responseData) { + final UserAgentHelper userAgentHelper = ComponentUtil.getUserAgentHelper(); + final UserAgentType userAgentType = userAgentHelper.getUserAgentType(); + String charset = responseData.getCharSet(); + if (charset == null) { + charset = Constants.UTF_8; + } + final String name; + final String url = responseData.getUrl(); + final int pos = url.lastIndexOf('/'); + try { + if (pos >= 0 && pos + 1 < url.length()) { + name = URLDecoder.decode(url.substring(pos + 1), charset); + } else { + name = URLDecoder.decode(url, charset); + } + + if (logger.isDebugEnabled()) { + logger.debug("userAgentType: " + userAgentType + ", charset: " + charset + ", name: " + name); + } + + switch (userAgentType) { + case IE: + response.setHeader("Content-Disposition", "attachment; filename=\"" + URLEncoder.encode(name, Constants.UTF_8) + "\""); + break; + case OPERA: + response.setHeader("Content-Disposition", "attachment; filename*=utf-8'ja'" + URLEncoder.encode(name, Constants.UTF_8)); + break; + case SAFARI: + response.setHeader("Content-Disposition", "attachment; filename=\"" + name + "\""); + break; + case CHROME: + case FIREFOX: + case OTHER: + default: + response.setHeader("Content-Disposition", + "attachment; filename=\"=?utf-8?B?" + Base64Util.encode(name.getBytes(Constants.UTF_8)) + "?=\""); + break; + } + } catch (final Exception e) { + logger.warn("Failed to write a filename: " + responseData, e); + } + } + + protected void writeContentType(final HttpServletResponse response, final ResponseData responseData) { + final String mimeType = responseData.getMimeType(); + if (logger.isDebugEnabled()) { + logger.debug("mimeType: " + mimeType); + } + if (mimeType == null) { + return; + } + if (mimeType.startsWith("text/")) { + final String charset = response.getCharacterEncoding(); + if (charset != null) { + response.setContentType(mimeType + "; charset=" + charset); + return; + } + } + response.setContentType(mimeType); + } + public boolean isUseSession() { return useSession; } diff --git a/src/main/java/org/codelibs/fess/util/ResourceUtil.java b/src/main/java/org/codelibs/fess/util/ResourceUtil.java index 093ae3fb9..b60375a5f 100644 --- a/src/main/java/org/codelibs/fess/util/ResourceUtil.java +++ b/src/main/java/org/codelibs/fess/util/ResourceUtil.java @@ -64,7 +64,7 @@ public class ResourceUtil { if (servletContext != null) { path = servletContext.getRealPath("/" + baseName + name); } - } catch (final Exception e) { // NOSONAR + } catch (final Throwable e) { // NOSONAR // ignore } if (path == null) { diff --git a/src/main/resources/app.xml b/src/main/resources/app.xml index 1759f7bbb..ea8dc3070 100644 --- a/src/main/resources/app.xml +++ b/src/main/resources/app.xml @@ -297,4 +297,46 @@ + + + + + + diff --git a/src/main/resources/fess.xml b/src/main/resources/fess.xml index 06c7f57b5..4a1871c1f 100644 --- a/src/main/resources/fess.xml +++ b/src/main/resources/fess.xml @@ -156,45 +156,4 @@ "," --> - - - - - diff --git a/src/main/resources/s2robot/contentlength.xml b/src/main/resources/s2robot/contentlength.xml index 5c3fe0223..7c314d1d6 100644 --- a/src/main/resources/s2robot/contentlength.xml +++ b/src/main/resources/s2robot/contentlength.xml @@ -6,10 +6,10 @@ - 10485760L + 10485760 "text/html" - 2621440L + 2621440 diff --git a/src/main/resources/s2robot/transformer.xml b/src/main/resources/s2robot/transformer.xml index 7dbda0eac..324b77f9d 100644 --- a/src/main/resources/s2robot/transformer.xml +++ b/src/main/resources/s2robot/transformer.xml @@ -14,7 +14,7 @@ @java.util.regex.Pattern@compile("^\\s*javascript:|^\\s*mailto:|^\\s*irc:|^\\s*skype:|^\\s*callto:",@java.util.regex.Pattern@CASE_INSENSITIVE) --> - #{"feed:" : "http:"} + {"feed:" : "http:"} @@ -36,53 +35,53 @@ 5 100 false - + "url.matches(\".*fess.*\")" "1000.0" - - + + "FieldName" "VALUE" - + --> - + "text/html" "html" - - + + "application/msword" "word" - - + + "application/vnd.openxmlformats-officedocument.wordprocessingml.document" "word" - - + + "application/vnd.ms-excel" "excel" - - + + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" "excel" - - + + "application/vnd.ms-powerpoint" "powerpoint" - - + + "application/vnd.openxmlformats-officedocument.presentationml.presentation" "powerpoint" - - + + "application/pdf" "pdf" - +