modify xml files for crawler
This commit is contained in:
parent
af13210d13
commit
bc46f10861
11 changed files with 225 additions and 227 deletions
|
@ -32,6 +32,7 @@ import org.codelibs.fess.app.web.base.FessSearchAction;
|
|||
import org.codelibs.fess.es.exentity.ClickLog;
|
||||
import org.codelibs.fess.helper.CrawlingConfigHelper;
|
||||
import org.codelibs.fess.helper.SearchLogHelper;
|
||||
import org.codelibs.fess.helper.ViewHelper;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.robot.util.CharUtil;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
|
@ -131,9 +132,9 @@ public class GoAction extends FessSearchAction {
|
|||
|
||||
if (isFileSystemPath(url)) {
|
||||
if (Constants.TRUE.equals(crawlerProperties.getProperty(Constants.SEARCH_FILE_PROXY_PROPERTY, Constants.TRUE))) {
|
||||
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
|
||||
final ViewHelper viewHelper = ComponentUtil.getViewHelper();
|
||||
try {
|
||||
crawlingConfigHelper.writeContent(doc);
|
||||
viewHelper.writeContent(doc);
|
||||
return null;
|
||||
} catch (final Exception e) {
|
||||
logger.error("Failed to load: " + doc, e);
|
||||
|
|
|
@ -78,9 +78,6 @@ public class Crawler implements Serializable {
|
|||
@Resource
|
||||
protected FessEsClient fessEsClient;
|
||||
|
||||
@Resource
|
||||
protected ScreenShotManager screenShotManager;
|
||||
|
||||
@Resource
|
||||
protected WebFsIndexHelper webFsIndexHelper;
|
||||
|
||||
|
|
|
@ -16,37 +16,16 @@
|
|||
|
||||
package org.codelibs.fess.helper;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Serializable;
|
||||
import java.net.URLDecoder;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.codelibs.core.io.CopyUtil;
|
||||
import org.codelibs.core.misc.Base64Util;
|
||||
import org.codelibs.fess.Constants;
|
||||
import org.codelibs.fess.FessSystemException;
|
||||
import org.codelibs.fess.app.service.DataConfigService;
|
||||
import org.codelibs.fess.app.service.FileConfigService;
|
||||
import org.codelibs.fess.app.service.WebConfigService;
|
||||
import org.codelibs.fess.es.exentity.CrawlingConfig;
|
||||
import org.codelibs.fess.es.exentity.CrawlingConfig.ConfigType;
|
||||
import org.codelibs.fess.helper.UserAgentHelper.UserAgentType;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.robot.builder.RequestDataBuilder;
|
||||
import org.codelibs.robot.client.S2RobotClient;
|
||||
import org.codelibs.robot.client.S2RobotClientFactory;
|
||||
import org.codelibs.robot.entity.ResponseData;
|
||||
import org.lastaflute.di.core.SingletonLaContainer;
|
||||
import org.lastaflute.web.util.LaResponseUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -121,135 +100,4 @@ public class CrawlingConfigHelper implements Serializable {
|
|||
return crawlingConfigMap.get(sessionId);
|
||||
}
|
||||
|
||||
public void writeContent(final Map<String, Object> doc) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("writing the content of: " + doc);
|
||||
}
|
||||
final FieldHelper fieldHelper = ComponentUtil.getFieldHelper();
|
||||
final Object configIdObj = doc.get(fieldHelper.configIdField);
|
||||
if (configIdObj == null) {
|
||||
throw new FessSystemException("configId is null.");
|
||||
}
|
||||
final String configId = configIdObj.toString();
|
||||
if (configId.length() < 2) {
|
||||
throw new FessSystemException("Invalid configId: " + configIdObj);
|
||||
}
|
||||
final ConfigType configType = getConfigType(configId);
|
||||
CrawlingConfig config = null;
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("configType: " + configType + ", configId: " + configId);
|
||||
}
|
||||
if (ConfigType.WEB == configType) {
|
||||
final WebConfigService webConfigService = SingletonLaContainer.getComponent(WebConfigService.class);
|
||||
config = webConfigService.getWebConfig(getId(configId));
|
||||
} else if (ConfigType.FILE == configType) {
|
||||
final FileConfigService fileConfigService = SingletonLaContainer.getComponent(FileConfigService.class);
|
||||
config = fileConfigService.getFileConfig(getId(configId));
|
||||
} else if (ConfigType.DATA == configType) {
|
||||
final DataConfigService dataConfigService = SingletonLaContainer.getComponent(DataConfigService.class);
|
||||
config = dataConfigService.getDataConfig(getId(configId));
|
||||
}
|
||||
if (config == null) {
|
||||
throw new FessSystemException("No crawlingConfig: " + configIdObj);
|
||||
}
|
||||
final String url = (String) doc.get(fieldHelper.urlField);
|
||||
final S2RobotClientFactory robotClientFactory = SingletonLaContainer.getComponent(S2RobotClientFactory.class);
|
||||
config.initializeClientFactory(robotClientFactory);
|
||||
final S2RobotClient client = robotClientFactory.getClient(url);
|
||||
if (client == null) {
|
||||
throw new FessSystemException("No S2RobotClient: " + configIdObj + ", url: " + url);
|
||||
}
|
||||
final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build());
|
||||
final HttpServletResponse response = LaResponseUtil.getResponse();
|
||||
writeFileName(response, responseData);
|
||||
writeContentType(response, responseData);
|
||||
writeNoCache(response, responseData);
|
||||
InputStream is = null;
|
||||
OutputStream os = null;
|
||||
try {
|
||||
is = new BufferedInputStream(responseData.getResponseBody());
|
||||
os = new BufferedOutputStream(response.getOutputStream());
|
||||
CopyUtil.copy(is, os);
|
||||
os.flush();
|
||||
} catch (final IOException e) {
|
||||
if (!"ClientAbortException".equals(e.getClass().getSimpleName())) {
|
||||
throw new FessSystemException("Failed to write a content. configId: " + configIdObj + ", url: " + url, e);
|
||||
}
|
||||
} finally {
|
||||
IOUtils.closeQuietly(is);
|
||||
IOUtils.closeQuietly(os);
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Finished to write " + url);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeNoCache(final HttpServletResponse response, final ResponseData responseData) {
|
||||
response.setHeader("Pragma", "no-cache");
|
||||
response.setHeader("Cache-Control", "no-cache");
|
||||
response.setHeader("Expires", "Thu, 01 Dec 1994 16:00:00 GMT");
|
||||
}
|
||||
|
||||
protected void writeFileName(final HttpServletResponse response, final ResponseData responseData) {
|
||||
final UserAgentHelper userAgentHelper = ComponentUtil.getUserAgentHelper();
|
||||
final UserAgentType userAgentType = userAgentHelper.getUserAgentType();
|
||||
String charset = responseData.getCharSet();
|
||||
if (charset == null) {
|
||||
charset = Constants.UTF_8;
|
||||
}
|
||||
final String name;
|
||||
final String url = responseData.getUrl();
|
||||
final int pos = url.lastIndexOf('/');
|
||||
try {
|
||||
if (pos >= 0 && pos + 1 < url.length()) {
|
||||
name = URLDecoder.decode(url.substring(pos + 1), charset);
|
||||
} else {
|
||||
name = URLDecoder.decode(url, charset);
|
||||
}
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("userAgentType: " + userAgentType + ", charset: " + charset + ", name: " + name);
|
||||
}
|
||||
|
||||
switch (userAgentType) {
|
||||
case IE:
|
||||
response.setHeader("Content-Disposition", "attachment; filename=\"" + URLEncoder.encode(name, Constants.UTF_8) + "\"");
|
||||
break;
|
||||
case OPERA:
|
||||
response.setHeader("Content-Disposition", "attachment; filename*=utf-8'ja'" + URLEncoder.encode(name, Constants.UTF_8));
|
||||
break;
|
||||
case SAFARI:
|
||||
response.setHeader("Content-Disposition", "attachment; filename=\"" + name + "\"");
|
||||
break;
|
||||
case CHROME:
|
||||
case FIREFOX:
|
||||
case OTHER:
|
||||
default:
|
||||
response.setHeader("Content-Disposition",
|
||||
"attachment; filename=\"=?utf-8?B?" + Base64Util.encode(name.getBytes(Constants.UTF_8)) + "?=\"");
|
||||
break;
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Failed to write a filename: " + responseData, e);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeContentType(final HttpServletResponse response, final ResponseData responseData) {
|
||||
final String mimeType = responseData.getMimeType();
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("mimeType: " + mimeType);
|
||||
}
|
||||
if (mimeType == null) {
|
||||
return;
|
||||
}
|
||||
if (mimeType.startsWith("text/")) {
|
||||
final String charset = response.getCharacterEncoding();
|
||||
if (charset != null) {
|
||||
response.setContentType(mimeType + "; charset=" + charset);
|
||||
return;
|
||||
}
|
||||
}
|
||||
response.setContentType(mimeType);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,7 +16,12 @@
|
|||
|
||||
package org.codelibs.fess.helper;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Serializable;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
|
@ -34,21 +39,36 @@ import java.util.regex.Pattern;
|
|||
import javax.annotation.PostConstruct;
|
||||
import javax.annotation.Resource;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.codelibs.core.CoreLibConstants;
|
||||
import org.codelibs.core.io.CopyUtil;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.core.misc.Base64Util;
|
||||
import org.codelibs.core.misc.DynamicProperties;
|
||||
import org.codelibs.core.net.URLUtil;
|
||||
import org.codelibs.fess.Constants;
|
||||
import org.codelibs.fess.FessSystemException;
|
||||
import org.codelibs.fess.app.service.DataConfigService;
|
||||
import org.codelibs.fess.app.service.FileConfigService;
|
||||
import org.codelibs.fess.app.service.WebConfigService;
|
||||
import org.codelibs.fess.entity.FacetQueryView;
|
||||
import org.codelibs.fess.es.exentity.CrawlingConfig;
|
||||
import org.codelibs.fess.es.exentity.CrawlingConfig.ConfigType;
|
||||
import org.codelibs.fess.helper.UserAgentHelper.UserAgentType;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.fess.util.ResourceUtil;
|
||||
import org.codelibs.robot.builder.RequestDataBuilder;
|
||||
import org.codelibs.robot.client.S2RobotClient;
|
||||
import org.codelibs.robot.client.S2RobotClientFactory;
|
||||
import org.codelibs.robot.entity.ResponseData;
|
||||
import org.codelibs.robot.util.CharUtil;
|
||||
import org.lastaflute.di.core.SingletonLaContainer;
|
||||
import org.lastaflute.taglib.function.LaFunctions;
|
||||
import org.lastaflute.web.util.LaRequestUtil;
|
||||
import org.lastaflute.web.util.LaResponseUtil;
|
||||
import org.lastaflute.web.util.LaServletContextUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -456,6 +476,138 @@ public class ViewHelper implements Serializable {
|
|||
return null;
|
||||
}
|
||||
|
||||
public void writeContent(final Map<String, Object> doc) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("writing the content of: " + doc);
|
||||
}
|
||||
final FieldHelper fieldHelper = ComponentUtil.getFieldHelper();
|
||||
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
|
||||
final Object configIdObj = doc.get(fieldHelper.configIdField);
|
||||
if (configIdObj == null) {
|
||||
throw new FessSystemException("configId is null.");
|
||||
}
|
||||
final String configId = configIdObj.toString();
|
||||
if (configId.length() < 2) {
|
||||
throw new FessSystemException("Invalid configId: " + configIdObj);
|
||||
}
|
||||
final ConfigType configType = crawlingConfigHelper.getConfigType(configId);
|
||||
CrawlingConfig config = null;
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("configType: " + configType + ", configId: " + configId);
|
||||
}
|
||||
if (ConfigType.WEB == configType) {
|
||||
final WebConfigService webConfigService = SingletonLaContainer.getComponent(WebConfigService.class);
|
||||
config = webConfigService.getWebConfig(crawlingConfigHelper.getId(configId));
|
||||
} else if (ConfigType.FILE == configType) {
|
||||
final FileConfigService fileConfigService = SingletonLaContainer.getComponent(FileConfigService.class);
|
||||
config = fileConfigService.getFileConfig(crawlingConfigHelper.getId(configId));
|
||||
} else if (ConfigType.DATA == configType) {
|
||||
final DataConfigService dataConfigService = SingletonLaContainer.getComponent(DataConfigService.class);
|
||||
config = dataConfigService.getDataConfig(crawlingConfigHelper.getId(configId));
|
||||
}
|
||||
if (config == null) {
|
||||
throw new FessSystemException("No crawlingConfig: " + configIdObj);
|
||||
}
|
||||
final String url = (String) doc.get(fieldHelper.urlField);
|
||||
final S2RobotClientFactory robotClientFactory = SingletonLaContainer.getComponent(S2RobotClientFactory.class);
|
||||
config.initializeClientFactory(robotClientFactory);
|
||||
final S2RobotClient client = robotClientFactory.getClient(url);
|
||||
if (client == null) {
|
||||
throw new FessSystemException("No S2RobotClient: " + configIdObj + ", url: " + url);
|
||||
}
|
||||
final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build());
|
||||
final HttpServletResponse response = LaResponseUtil.getResponse();
|
||||
writeFileName(response, responseData);
|
||||
writeContentType(response, responseData);
|
||||
writeNoCache(response, responseData);
|
||||
InputStream is = null;
|
||||
OutputStream os = null;
|
||||
try {
|
||||
is = new BufferedInputStream(responseData.getResponseBody());
|
||||
os = new BufferedOutputStream(response.getOutputStream());
|
||||
CopyUtil.copy(is, os);
|
||||
os.flush();
|
||||
} catch (final IOException e) {
|
||||
if (!"ClientAbortException".equals(e.getClass().getSimpleName())) {
|
||||
throw new FessSystemException("Failed to write a content. configId: " + configIdObj + ", url: " + url, e);
|
||||
}
|
||||
} finally {
|
||||
IOUtils.closeQuietly(is);
|
||||
IOUtils.closeQuietly(os);
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Finished to write " + url);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeNoCache(final HttpServletResponse response, final ResponseData responseData) {
|
||||
response.setHeader("Pragma", "no-cache");
|
||||
response.setHeader("Cache-Control", "no-cache");
|
||||
response.setHeader("Expires", "Thu, 01 Dec 1994 16:00:00 GMT");
|
||||
}
|
||||
|
||||
protected void writeFileName(final HttpServletResponse response, final ResponseData responseData) {
|
||||
final UserAgentHelper userAgentHelper = ComponentUtil.getUserAgentHelper();
|
||||
final UserAgentType userAgentType = userAgentHelper.getUserAgentType();
|
||||
String charset = responseData.getCharSet();
|
||||
if (charset == null) {
|
||||
charset = Constants.UTF_8;
|
||||
}
|
||||
final String name;
|
||||
final String url = responseData.getUrl();
|
||||
final int pos = url.lastIndexOf('/');
|
||||
try {
|
||||
if (pos >= 0 && pos + 1 < url.length()) {
|
||||
name = URLDecoder.decode(url.substring(pos + 1), charset);
|
||||
} else {
|
||||
name = URLDecoder.decode(url, charset);
|
||||
}
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("userAgentType: " + userAgentType + ", charset: " + charset + ", name: " + name);
|
||||
}
|
||||
|
||||
switch (userAgentType) {
|
||||
case IE:
|
||||
response.setHeader("Content-Disposition", "attachment; filename=\"" + URLEncoder.encode(name, Constants.UTF_8) + "\"");
|
||||
break;
|
||||
case OPERA:
|
||||
response.setHeader("Content-Disposition", "attachment; filename*=utf-8'ja'" + URLEncoder.encode(name, Constants.UTF_8));
|
||||
break;
|
||||
case SAFARI:
|
||||
response.setHeader("Content-Disposition", "attachment; filename=\"" + name + "\"");
|
||||
break;
|
||||
case CHROME:
|
||||
case FIREFOX:
|
||||
case OTHER:
|
||||
default:
|
||||
response.setHeader("Content-Disposition",
|
||||
"attachment; filename=\"=?utf-8?B?" + Base64Util.encode(name.getBytes(Constants.UTF_8)) + "?=\"");
|
||||
break;
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Failed to write a filename: " + responseData, e);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeContentType(final HttpServletResponse response, final ResponseData responseData) {
|
||||
final String mimeType = responseData.getMimeType();
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("mimeType: " + mimeType);
|
||||
}
|
||||
if (mimeType == null) {
|
||||
return;
|
||||
}
|
||||
if (mimeType.startsWith("text/")) {
|
||||
final String charset = response.getCharacterEncoding();
|
||||
if (charset != null) {
|
||||
response.setContentType(mimeType + "; charset=" + charset);
|
||||
return;
|
||||
}
|
||||
}
|
||||
response.setContentType(mimeType);
|
||||
}
|
||||
|
||||
public boolean isUseSession() {
|
||||
return useSession;
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ public class ResourceUtil {
|
|||
if (servletContext != null) {
|
||||
path = servletContext.getRealPath("/" + baseName + name);
|
||||
}
|
||||
} catch (final Exception e) { // NOSONAR
|
||||
} catch (final Throwable e) { // NOSONAR
|
||||
// ignore
|
||||
}
|
||||
if (path == null) {
|
||||
|
|
|
@ -297,4 +297,46 @@
|
|||
</postConstruct>
|
||||
</component>
|
||||
|
||||
<component name="screenShotManager" class="org.codelibs.fess.screenshot.ScreenShotManager">
|
||||
<!--
|
||||
<postConstruct name="add">
|
||||
<arg>htmlScreenShotGenerator</arg>
|
||||
</postConstruct>
|
||||
-->
|
||||
</component>
|
||||
<!--
|
||||
<component name="webDriver" class="org.openqa.selenium.phantomjs.PhantomJSDriver">
|
||||
<arg>
|
||||
<component class="org.openqa.selenium.remote.DesiredCapabilities">
|
||||
<postConstruct name="setCapability">
|
||||
<arg>"phantomjs.binary.path"</arg>
|
||||
<arg>"/usr/bin/phantomjs"</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
</arg>
|
||||
<preDestroy name="quit"></preDestroy>
|
||||
</component>
|
||||
<component name="htmlScreenShotGenerator" class="org.codelibs.fess.screenshot.impl.WebDriverGenerator">
|
||||
<property name="webDriver">webDriver</property>
|
||||
<postConstruct name="addCondition">
|
||||
<arg>"mimetype"</arg>
|
||||
<arg>"text/html"</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
-->
|
||||
<!--
|
||||
<component name="htmlScreenShotGenerator" class="org.codelibs.fess.screenshot.impl.CommandGenerator">
|
||||
<property name="commandList">
|
||||
{"bash",
|
||||
"/opt/fess/bin/html-screenshot.sh",
|
||||
"${url}",
|
||||
"${outputFile}"}
|
||||
</property>
|
||||
<postConstruct name="addCondition">
|
||||
<arg>"mimetype"</arg>
|
||||
<arg>"text/html"</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
-->
|
||||
|
||||
</components>
|
||||
|
|
|
@ -156,45 +156,4 @@
|
|||
<property name="roleSeparator">","</property>
|
||||
-->
|
||||
</component>
|
||||
<component name="screenShotManager" class="org.codelibs.fess.screenshot.ScreenShotManager">
|
||||
<!--
|
||||
<postConstruct name="add">
|
||||
<arg>htmlScreenShotGenerator</arg>
|
||||
</postConstruct>
|
||||
-->
|
||||
</component>
|
||||
<!--
|
||||
<component name="webDriver" class="org.openqa.selenium.phantomjs.PhantomJSDriver">
|
||||
<arg>
|
||||
<component class="org.openqa.selenium.remote.DesiredCapabilities">
|
||||
<postConstruct name="setCapability">
|
||||
<arg>"phantomjs.binary.path"</arg>
|
||||
<arg>"/usr/bin/phantomjs"</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
</arg>
|
||||
<preDestroy name="quit"></preDestroy>
|
||||
</component>
|
||||
<component name="htmlScreenShotGenerator" class="org.codelibs.fess.screenshot.impl.WebDriverGenerator">
|
||||
<property name="webDriver">webDriver</property>
|
||||
<postConstruct name="addCondition">
|
||||
<arg>"mimetype"</arg>
|
||||
<arg>"text/html"</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
-->
|
||||
<!--
|
||||
<component name="htmlScreenShotGenerator" class="org.codelibs.fess.screenshot.impl.CommandGenerator">
|
||||
<property name="commandList">
|
||||
{"bash",
|
||||
"/opt/fess/bin/html-screenshot.sh",
|
||||
"${url}",
|
||||
"${outputFile}"}
|
||||
</property>
|
||||
<postConstruct name="addCondition">
|
||||
<arg>"mimetype"</arg>
|
||||
<arg>"text/html"</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
-->
|
||||
</components>
|
||||
|
|
|
@ -6,10 +6,10 @@
|
|||
|
||||
<component name="contentLengthHelper"
|
||||
class="org.codelibs.robot.helper.ContentLengthHelper" instance="singleton">
|
||||
<property name="defaultMaxLength">10485760L</property><!-- 10M -->
|
||||
<property name="defaultMaxLength">10485760</property><!-- 10M -->
|
||||
<postConstruct name="addMaxLength">
|
||||
<arg>"text/html"</arg>
|
||||
<arg>2621440L</arg><!-- 2.5M -->
|
||||
<arg>2621440</arg><!-- 2.5M -->
|
||||
</postConstruct>
|
||||
</component>
|
||||
</components>
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
<property name="invalidUrlPattern">@java.util.regex.Pattern@compile("^\\s*javascript:|^\\s*mailto:|^\\s*irc:|^\\s*skype:|^\\s*callto:",@java.util.regex.Pattern@CASE_INSENSITIVE)</property>
|
||||
-->
|
||||
<property name="convertUrlMap">
|
||||
#{"feed:" : "http:"}
|
||||
{"feed:" : "http:"}
|
||||
</property>
|
||||
<!--
|
||||
<property name="cacheXpath">"//BODY"</property>
|
||||
|
|
Binary file not shown.
|
@ -1,9 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE components PUBLIC "-//SEASAR//DTD S2Container 2.4//EN"
|
||||
"http://www.seasar.org/dtd/components24.dtd">
|
||||
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
|
||||
"http://dbflute.org/meta/lastadi10.dtd">
|
||||
<components>
|
||||
<include path="convention.xml" />
|
||||
<include path="lastaflute.xml"/>
|
||||
<include path="fess.xml" />
|
||||
|
||||
<include path="s2robot_es.xml" />
|
||||
|
@ -20,12 +19,12 @@
|
|||
</component>
|
||||
<component name="intervalControlHelper" class="org.codelibs.fess.helper.IntervalControlHelper">
|
||||
<!--
|
||||
<initMethod name="addIntervalRule">
|
||||
<postConstruct name="addIntervalRule">
|
||||
<arg>"5:00"</arg>
|
||||
<arg>"10:00"</arg>
|
||||
<arg>"2,3,4,5,6"</arg>
|
||||
<arg>3600000</arg>
|
||||
</initMethod>
|
||||
</postConstruct>
|
||||
-->
|
||||
</component>
|
||||
<component name="sambaHelper" class="org.codelibs.fess.helper.SambaHelper">
|
||||
|
@ -36,53 +35,53 @@
|
|||
<property name="maxDocumentCacheSize">5</property>
|
||||
<property name="unprocessedDocumentSize">100</property>
|
||||
<property name="threadDump">false</property>
|
||||
<initMethod name="addBoostDocumentRule">
|
||||
<postConstruct name="addBoostDocumentRule">
|
||||
<arg>
|
||||
<component class="org.codelibs.fess.solr.BoostDocumentRule">
|
||||
<property name="matchExpression">"url.matches(\".*fess.*\")"</property>
|
||||
<property name="boostExpression">"1000.0"</property>
|
||||
</component>
|
||||
</arg>
|
||||
</initMethod>
|
||||
<initMethod name="addDefaultDocValue">
|
||||
</postConstruct>
|
||||
<postConstruct name="addDefaultDocValue">
|
||||
<arg>"FieldName"</arg>
|
||||
<arg>"VALUE"</arg>
|
||||
</initMethod>
|
||||
</postConstruct>
|
||||
-->
|
||||
</component>
|
||||
<component name="fileTypeHelper" class="org.codelibs.fess.helper.FileTypeHelper">
|
||||
<initMethod name="add">
|
||||
<postConstruct name="add">
|
||||
<arg>"text/html"</arg>
|
||||
<arg>"html"</arg>
|
||||
</initMethod>
|
||||
<initMethod name="add">
|
||||
</postConstruct>
|
||||
<postConstruct name="add">
|
||||
<arg>"application/msword"</arg>
|
||||
<arg>"word"</arg>
|
||||
</initMethod>
|
||||
<initMethod name="add">
|
||||
</postConstruct>
|
||||
<postConstruct name="add">
|
||||
<arg>"application/vnd.openxmlformats-officedocument.wordprocessingml.document"</arg>
|
||||
<arg>"word"</arg>
|
||||
</initMethod>
|
||||
<initMethod name="add">
|
||||
</postConstruct>
|
||||
<postConstruct name="add">
|
||||
<arg>"application/vnd.ms-excel"</arg>
|
||||
<arg>"excel"</arg>
|
||||
</initMethod>
|
||||
<initMethod name="add">
|
||||
</postConstruct>
|
||||
<postConstruct name="add">
|
||||
<arg>"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"</arg>
|
||||
<arg>"excel"</arg>
|
||||
</initMethod>
|
||||
<initMethod name="add">
|
||||
</postConstruct>
|
||||
<postConstruct name="add">
|
||||
<arg>"application/vnd.ms-powerpoint"</arg>
|
||||
<arg>"powerpoint"</arg>
|
||||
</initMethod>
|
||||
<initMethod name="add">
|
||||
</postConstruct>
|
||||
<postConstruct name="add">
|
||||
<arg>"application/vnd.openxmlformats-officedocument.presentationml.presentation"</arg>
|
||||
<arg>"powerpoint"</arg>
|
||||
</initMethod>
|
||||
<initMethod name="add">
|
||||
</postConstruct>
|
||||
<postConstruct name="add">
|
||||
<arg>"application/pdf"</arg>
|
||||
<arg>"pdf"</arg>
|
||||
</initMethod>
|
||||
</postConstruct>
|
||||
</component>
|
||||
<component name="fessCrawler" class="org.codelibs.fess.exec.Crawler"
|
||||
instance="prototype">
|
||||
|
|
Loading…
Add table
Reference in a new issue