This commit is contained in:
Shinsuke Sugaya 2013-11-09 00:15:37 +09:00
parent 568f345a5a
commit b08edbe040
19 changed files with 309 additions and 60 deletions

View file

@ -155,6 +155,8 @@ public class Constants extends CoreLibConstants {
public static final String PURGE_BY_BOTS_PROPERTY = "purge.by.bots";
public static final String SEARCH_FILE_PROXY_PROPERTY = "search.file.proxy";
public static final String SEARCH_DESKTOP_PROPERTY = "search.desktop";
public static final String SEARCH_FILE_LAUNCHER_PROPERTY = "search.file.launcher";
@ -316,7 +318,7 @@ public class Constants extends CoreLibConstants {
public static final int EXIT_FAIL = 1;
public static final String DATE_CONFIG_ID_PREFIX = "D";
public static final String DATA_CONFIG_ID_PREFIX = "D";
public static final String FILE_CONFIG_ID_PREFIX = "F";

View file

@ -53,6 +53,7 @@ import jp.sf.fess.entity.SuggestResponse;
import jp.sf.fess.entity.SuggestResponse.SuggestResponseList;
import jp.sf.fess.form.IndexForm;
import jp.sf.fess.helper.BrowserTypeHelper;
import jp.sf.fess.helper.CrawlingConfigHelper;
import jp.sf.fess.helper.HotSearchWordHelper;
import jp.sf.fess.helper.HotSearchWordHelper.Range;
import jp.sf.fess.helper.LabelTypeHelper;
@ -315,8 +316,23 @@ public class IndexAction {
searchLogHelper.addClickLog(clickLog);
}
}
if (url.startsWith("file:")) {
if (Constants.TRUE.equals(crawlerProperties.getProperty(
Constants.SEARCH_FILE_PROXY_PROPERTY, Constants.TRUE))) {
final CrawlingConfigHelper crawlingConfigHelper = SingletonS2Container
.getComponent(CrawlingConfigHelper.class);
try {
crawlingConfigHelper.writeContent(doc);
return null;
} catch (final Exception e) {
logger.error("Failed to load: " + doc, e);
errorMessage = MessageResourcesUtil.getMessage(RequestUtil
.getRequest().getLocale(),
"errors.not_load_from_server", url);
return "error.jsp";
}
} else if (Constants.TRUE.equals(crawlerProperties.getProperty(
Constants.SEARCH_DESKTOP_PROPERTY, Constants.FALSE))) {
final String path = url.replaceFirst("file:/+", "//");
final File file = new File(path);
@ -432,9 +448,8 @@ public class IndexAction {
final int pageStart = Integer.parseInt(indexForm.start);
final int pageNum = Integer.parseInt(indexForm.num);
try {
documentItems = searchService.getDocumentList(query,
indexForm.facet, pageStart, pageNum, indexForm.geo,
indexForm.mlt);
documentItems = searchService.getDocumentList(query, pageStart,
pageNum, indexForm.facet, indexForm.geo, indexForm.mlt);
} catch (final SolrLibQueryException e) {
if (logger.isDebugEnabled()) {
logger.debug(e.getMessage(), e);
@ -692,10 +707,10 @@ public class IndexAction {
buf.append("<doc>");
for (final Map.Entry<String, Object> entry : document
.entrySet()) {
if (StringUtil.isNotBlank(entry.getKey())
&& entry.getValue() != null) {
final String tagName = StringUtil
.decamelize(entry.getKey())
final String name = entry.getKey();
if (StringUtil.isNotBlank(name) && entry.getValue() != null
&& queryHelper.isApiResponseField(name)) {
final String tagName = StringUtil.decamelize(name)
.replaceAll("_", "-").toLowerCase();
buf.append('<');
buf.append(tagName);
@ -1106,15 +1121,17 @@ public class IndexAction {
boolean first2 = true;
for (final Map.Entry<String, Object> entry : document
.entrySet()) {
if (StringUtil.isNotBlank(entry.getKey())
&& entry.getValue() != null) {
final String name = entry.getKey();
if (StringUtil.isNotBlank(name)
&& entry.getValue() != null
&& queryHelper.isApiResponseField(name)) {
if (!first2) {
buf.append(',');
} else {
first2 = false;
}
buf.append('\"');
buf.append(escapeJsonString(entry.getKey()));
buf.append(escapeJsonString(name));
buf.append("\":\"");
buf.append(escapeJsonString(entry.getValue()
.toString()));

View file

@ -144,8 +144,8 @@ public class MobileAction {
final int pageNum = Integer.parseInt(mobileForm.num);
// TODO add GeoInfo if needed...
try {
documentItems = searchService.getDocumentList(mobileForm.query, null,
pageStart, pageNum, null, null);
documentItems = searchService.getDocumentList(mobileForm.query,
pageStart, pageNum, null, null, null);
} catch (final InvalidQueryException e) {
if (logger.isDebugEnabled()) {
logger.debug(e.getMessage(), e);

View file

@ -150,8 +150,8 @@ public class SearchListAction implements Serializable {
final int offset = Integer.parseInt(searchListForm.start);
final int size = Integer.parseInt(searchListForm.num);
try {
documentItems = searchService.getDocumentList(query, null, offset, size,
null, null, false);
documentItems = searchService.getDocumentList(query, offset, size,
null, null, null, false);
} catch (final InvalidQueryException e) {
if (logger.isDebugEnabled()) {
logger.debug(e.getMessage(), e);

View file

@ -191,7 +191,7 @@ public class DataCrawlingConfig extends BsDataCrawlingConfig implements
@Override
public String getConfigId() {
if (getId() != null) {
return Constants.DATE_CONFIG_ID_PREFIX + getId().toString();
return Constants.DATA_CONFIG_ID_PREFIX + getId().toString();
}
return null;
}

View file

@ -16,22 +16,61 @@
package jp.sf.fess.helper;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import javax.servlet.http.HttpServletResponse;
import jp.sf.fess.Constants;
import jp.sf.fess.FessSystemException;
import jp.sf.fess.db.exentity.CrawlingConfig;
import jp.sf.fess.helper.UserAgentHelper.UserAgentType;
import jp.sf.fess.service.DataCrawlingConfigService;
import jp.sf.fess.service.FileCrawlingConfigService;
import jp.sf.fess.service.WebCrawlingConfigService;
import org.apache.commons.io.IOUtils;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.util.Base64Util;
import org.seasar.robot.client.S2RobotClient;
import org.seasar.robot.client.S2RobotClientFactory;
import org.seasar.robot.entity.ResponseData;
import org.seasar.robot.util.StreamUtil;
import org.seasar.struts.util.ResponseUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class CrawlingConfigHelper implements Serializable {
private static final long serialVersionUID = 1L;
private static final Logger logger = LoggerFactory
.getLogger(CrawlingConfigHelper.class);
protected final Map<String, CrawlingConfig> crawlingConfigMap = new ConcurrentHashMap<String, CrawlingConfig>();
protected int count = 1;
protected String configIdField = "cid_s_s";
protected String urlField = "url";
public String getUrlField() {
return urlField;
}
public void setUrlField(final String urlField) {
this.urlField = urlField;
}
public synchronized String store(final String sessionId,
final CrawlingConfig crawlingConfig) {
final String sessionCountId = sessionId + "-" + count;
@ -56,4 +95,129 @@ public class CrawlingConfigHelper implements Serializable {
this.configIdField = configIdField;
}
public void writeContent(final Map<String, Object> doc) {
final Object configIdObj = doc.get(configIdField);
if (configIdObj == null || configIdObj.toString().length() < 2) {
throw new FessSystemException("Invalid configId: " + configIdObj);
}
final String configType = configIdObj.toString().substring(0, 1);
final String idStr = configIdObj.toString().substring(1);
CrawlingConfig config = null;
if (Constants.WEB_CONFIG_ID_PREFIX.equals(configType)) {
final WebCrawlingConfigService webCrawlingConfigService = SingletonS2Container
.getComponent(WebCrawlingConfigService.class);
config = webCrawlingConfigService.getWebCrawlingConfig(Long
.parseLong(idStr));
} else if (Constants.FILE_CONFIG_ID_PREFIX.equals(configType)) {
final FileCrawlingConfigService fileCrawlingConfigService = SingletonS2Container
.getComponent(FileCrawlingConfigService.class);
config = fileCrawlingConfigService.getFileCrawlingConfig(Long
.parseLong(idStr));
} else if (Constants.DATA_CONFIG_ID_PREFIX.equals(configType)) {
final DataCrawlingConfigService dataCrawlingConfigService = SingletonS2Container
.getComponent(DataCrawlingConfigService.class);
config = dataCrawlingConfigService.getDataCrawlingConfig(Long
.parseLong(idStr));
}
if (config == null) {
throw new FessSystemException("No crawlingConfig: " + configIdObj);
}
final String url = (String) doc.get(urlField);
final S2RobotClientFactory robotClientFactory = SingletonS2Container
.getComponent(S2RobotClientFactory.class);
config.initializeClientFactory(robotClientFactory);
final S2RobotClient client = robotClientFactory.getClient(url);
if (client == null) {
throw new FessSystemException("No S2RobotClient: " + configIdObj
+ ", url: " + url);
}
final ResponseData responseData = client.doGet(url);
final HttpServletResponse response = ResponseUtil.getResponse();
writeFileName(response, responseData);
writeContentType(response, responseData);
InputStream is = null;
OutputStream os = null;
try {
is = new BufferedInputStream(responseData.getResponseBody());
os = new BufferedOutputStream(response.getOutputStream());
StreamUtil.drain(is, os);
os.flush();
} catch (final IOException e) {
throw new FessSystemException(
"Failed to write a content. configId: " + configIdObj
+ ", url: " + url, e);
} finally {
IOUtils.closeQuietly(is);
IOUtils.closeQuietly(os);
}
}
protected void writeFileName(final HttpServletResponse response,
final ResponseData responseData) {
final UserAgentHelper userAgentHelper = SingletonS2Container
.getComponent(UserAgentHelper.class);
final UserAgentType userAgentType = userAgentHelper.getUserAgentType();
String charset = responseData.getCharSet();
if (charset == null) {
charset = Constants.UTF_8;
}
final String name;
final String url = responseData.getUrl();
final int pos = url.lastIndexOf('/');
try {
if (pos >= 0 && pos + 1 < url.length()) {
name = URLDecoder.decode(url.substring(pos + 1), charset);
} else {
name = URLDecoder.decode(url, charset);
}
switch (userAgentType) {
case IE:
response.setHeader(
"Content-Disposition",
"attachment; filename=\""
+ URLEncoder.encode(name, Constants.UTF_8)
+ "\"");
break;
case OPERA:
response.setHeader(
"Content-Disposition",
"attachment; filename*=utf-8'ja'"
+ URLEncoder.encode(name, Constants.UTF_8));
break;
case SAFARI:
response.setHeader("Content-Disposition",
"attachment; filename=\"" + name + "\"");
break;
case CHROME:
case FIREFOX:
case OTHER:
response.setHeader(
"Content-Disposition",
"attachment; filename=\"=?utf-8?B?"
+ Base64Util.encode(name
.getBytes(Constants.UTF_8)) + "?=\"");
break;
}
} catch (final Exception e) {
logger.warn("Failed to write a filename: " + responseData, e);
}
}
protected void writeContentType(final HttpServletResponse response,
final ResponseData responseData) {
final String mimeType = responseData.getMimeType();
if (mimeType == null) {
return;
}
if (mimeType.startsWith("text/")) {
final String charset = response.getCharacterEncoding();
if (charset != null) {
response.setContentType(mimeType + "; charset=" + charset);
return;
}
}
response.setContentType(mimeType);
}
}

View file

@ -75,4 +75,5 @@ public interface QueryHelper {
Map<String, String[]> getQueryParamMap();
boolean isApiResponseField(String field);
}

View file

@ -0,0 +1,44 @@
package jp.sf.fess.helper;
import javax.servlet.http.HttpServletRequest;
import org.seasar.struts.util.RequestUtil;
public class UserAgentHelper {
private static final String USER_AGENT = "user-agent";
private static final String USER_AGENT_TYPE = "ViewHelper.UserAgent";
public UserAgentType getUserAgentType() {
final HttpServletRequest request = RequestUtil.getRequest();
UserAgentType uaType = (UserAgentType) request
.getAttribute(USER_AGENT_TYPE);
if (uaType == null) {
final String userAgent = request.getHeader(USER_AGENT);
if (userAgent != null) {
if (userAgent.indexOf("MSIE") >= 0
|| userAgent.indexOf("Trident") >= 0) {
uaType = UserAgentType.IE;
} else if (userAgent.indexOf("Firefox") >= 0) {
uaType = UserAgentType.FIREFOX;
} else if (userAgent.indexOf("Chrome") >= 0) {
uaType = UserAgentType.CHROME;
} else if (userAgent.indexOf("Safari") >= 0) {
uaType = UserAgentType.SAFARI;
} else if (userAgent.indexOf("Opera") >= 0) {
uaType = UserAgentType.OPERA;
}
}
if (uaType == null) {
uaType = UserAgentType.OTHER;
}
request.setAttribute(USER_AGENT_TYPE, uaType);
}
return uaType;
}
public enum UserAgentType {
IE, FIREFOX, CHROME, SAFARI, OPERA, OTHER;
}
}

View file

@ -35,6 +35,7 @@ import javax.servlet.http.HttpServletRequest;
import jp.sf.fess.Constants;
import jp.sf.fess.FessSystemException;
import jp.sf.fess.entity.FacetQueryView;
import jp.sf.fess.helper.UserAgentHelper.UserAgentType;
import org.apache.commons.lang.StringUtils;
import org.codelibs.core.util.DynamicProperties;
@ -51,14 +52,15 @@ public class ViewHelper implements Serializable {
protected static final String GOOGLE_MOBILE_TRANSCODER_LINK = "http://www.google.co.jp/gwt/n?u=";
private static final String USER_AGENT_TYPE = "ViewHelper.UserAgent";
@Resource
protected BrowserTypeHelper browserTypeHelper;
@Resource
protected PathMappingHelper pathMappingHelper;
@Resource
protected UserAgentHelper userAgentHelper;
@Resource
protected DynamicProperties crawlerProperties;
@ -196,7 +198,7 @@ public class ViewHelper implements Serializable {
final int pos = url.indexOf(':', 5);
final boolean isLocalFile = pos > 0 && pos < 12;
final UserAgentType ua = getUserAgentType();
final UserAgentType ua = userAgentHelper.getUserAgentType();
switch (ua) {
case IE:
if (isLocalFile) {
@ -335,37 +337,6 @@ public class ViewHelper implements Serializable {
return buf.toString();
}
protected UserAgentType getUserAgentType() {
final HttpServletRequest request = RequestUtil.getRequest();
UserAgentType uaType = (UserAgentType) request
.getAttribute(USER_AGENT_TYPE);
if (uaType == null) {
final String userAgent = request.getHeader("user-agent");
if (userAgent != null) {
if (userAgent.indexOf("MSIE") >= 0) {
uaType = UserAgentType.IE;
} else if (userAgent.indexOf("Firefox") >= 0) {
uaType = UserAgentType.FIREFOX;
} else if (userAgent.indexOf("Chrome") >= 0) {
uaType = UserAgentType.CHROME;
} else if (userAgent.indexOf("Safari") >= 0) {
uaType = UserAgentType.SAFARI;
} else if (userAgent.indexOf("Opera") >= 0) {
uaType = UserAgentType.OPERA;
}
}
if (uaType == null) {
uaType = UserAgentType.OTHER;
}
request.setAttribute(USER_AGENT_TYPE, uaType);
}
return uaType;
}
protected enum UserAgentType {
IE, FIREFOX, CHROME, SAFARI, OPERA, OTHER;
}
public String getPagePath(final String page) {
final Locale locale = RequestUtil.getRequest().getLocale();
final String lang = locale.getLanguage();

View file

@ -19,6 +19,7 @@ package jp.sf.fess.helper.impl;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
@ -77,10 +78,12 @@ public class QueryHelperImpl implements QueryHelper, Serializable {
@Resource
protected RoleQueryHelper roleQueryHelper;
protected Set<String> apiResponseFieldSet;
protected String[] responseFields = new String[] { "id", "docId", "score",
"boost", "contentLength", "host", "site", "lastModified",
"mimetype", "created", "title", "digest", "url", "clickCount_i",
"favoriteCount_i", "screenshot_s_s" };
"favoriteCount_i", "screenshot_s_s", "cid_s_s" };
protected String[] highlightingFields = new String[] { "content" };
@ -1058,6 +1061,21 @@ public class QueryHelperImpl implements QueryHelper, Serializable {
return buf.toString().trim();
}
public void setApiResponseFields(final String[] fields) {
apiResponseFieldSet = new HashSet<String>();
for (final String field : fields) {
apiResponseFieldSet.add(field);
}
}
@Override
public boolean isApiResponseField(final String field) {
if (apiResponseFieldSet == null) {
return true;
}
return apiResponseFieldSet.contains(field);
}
/**
* @return the responseFields
*/

View file

@ -357,4 +357,11 @@ public class DataCrawlingConfigService extends BsDataCrawlingConfigService
}
public DataCrawlingConfig getDataCrawlingConfig(final long id) {
final DataCrawlingConfigCB cb = new DataCrawlingConfigCB();
cb.query().setId_Equal(id);
cb.query().setDeletedBy_IsNull();
return dataCrawlingConfigBhv.selectEntity(cb);
}
}

View file

@ -357,4 +357,11 @@ public class FileCrawlingConfigService extends BsFileCrawlingConfigService
}
public FileCrawlingConfig getFileCrawlingConfig(final long id) {
final FileCrawlingConfigCB cb = new FileCrawlingConfigCB();
cb.query().setId_Equal(id);
cb.query().setDeletedBy_IsNull();
return fileCrawlingConfigBhv.selectEntity(cb);
}
}

View file

@ -61,8 +61,8 @@ public class SearchService implements Serializable {
protected QueryHelper queryHelper;
public Map<String, Object> getDocument(final String query) {
final List<Map<String, Object>> docList = getDocumentList(query, null,
0, 1, null, null);
final List<Map<String, Object>> docList = getDocumentList(query, 0, 1,
null, null, null);
if (!docList.isEmpty()) {
return docList.get(0);
}
@ -82,18 +82,18 @@ public class SearchService implements Serializable {
}
buf.append("docId:").append(docIds[i]);
}
return getDocumentList(buf.toString(), null, 0, pageSize, null, null);
return getDocumentList(buf.toString(), 0, pageSize, null, null, null);
}
public List<Map<String, Object>> getDocumentList(final String query,
final FacetInfo facetInfo, final int start, final int rows,
final int start, final int rows, final FacetInfo facetInfo,
final GeoInfo geoInfo, final MoreLikeThisInfo mltInfo) {
return getDocumentList(query, facetInfo, start, rows, geoInfo, mltInfo,
return getDocumentList(query, start, rows, facetInfo, geoInfo, mltInfo,
true);
}
public List<Map<String, Object>> getDocumentList(final String query,
final FacetInfo facetInfo, final int start, final int rows,
final int start, final int rows, final FacetInfo facetInfo,
final GeoInfo geoInfo, final MoreLikeThisInfo mltInfo,
final boolean forUser) {
if (start > queryHelper.getMaxSearchResultOffset()) {

View file

@ -355,4 +355,10 @@ public class WebCrawlingConfigService extends BsWebCrawlingConfigService
}
public WebCrawlingConfig getWebCrawlingConfig(final long id) {
final WebCrawlingConfigCB cb = new WebCrawlingConfigCB();
cb.query().setId_Equal(id);
cb.query().setDeletedBy_IsNull();
return webCrawlingConfigBhv.selectEntity(cb);
}
}

View file

@ -12,6 +12,9 @@
<include path="mobylet.dicon"/>
<include path="s2robot_client.dicon" />
<include path="s2robot_mimetype.dicon" />
<component name="actionMessagesThrowsInterceptor" class="jp.sf.fess.interceptor.FessActionMessagesThrowsInterceptor"/>
<component name="authenticationCipher" class="jp.sf.fess.crypto.FessCipher">
@ -55,12 +58,17 @@
<arg>"ko"</arg>
<arg>"cjk"</arg>
</initMethod>
<initMethod name="setApiResponseFields">
<arg>new String[]{"id", "docId", "score", "boost",
"contentLength", "host", "site", "lastModified", "mimetype",
"created", "title", "digest", "url", "clickCount_i", "favoriteCount_i"}</arg>
</initMethod>
<!--
<property name="additionalGeoQuery">"location_i_i:1"</property>
<property name="responseFields">new String[]{"id", "docId", "score", "boost",
"contentLength", "host", "site", "lastModified", "mimetype",
"created", "title", "digest", "url", "clickCount_i", "favoriteCount_i",
"screenshot_s_s"}</property>
"screenshot_s_s", "cid_s_s"}</property>
<property name="highlightingFields">new String[]{"digest", "cache" }</property>
<property name="searchFields">new String[]{"url", "docId", "host",
"title", "content", "contentLength", "lastModified", "mimetype",
@ -163,6 +171,8 @@
</initMethod>
-->
</component>
<component name="userAgentHelper" class="jp.sf.fess.helper.UserAgentHelper">
</component>
<component name="webManagementHelper" class="jp.sf.fess.helper.impl.TomcatManagementHelperImpl">
<initMethod name="addSolrInstance">
<arg>

View file

@ -68,6 +68,7 @@ errors.no_launcher_applet_jar=No launcher file.
errors.unsupported_encoding={0} is not supported as encoding.
errors.docid_not_found=Not found Doc ID:{0}
errors.document_not_found=Not found URL of Doc ID:{0}
errors.not_load_from_server=Could not load from this server: {0}
errors.invalid_query_unknown=The given query is invalid.
errors.invalid_query_quoted=An invalid quote character is used.

View file

@ -68,6 +68,7 @@ errors.no_launcher_applet_jar=\u8d77\u52d5\u30d5\u30a1\u30a4\u30eb\u304c\u3042\u
errors.unsupported_encoding={0}\u306f\u30b5\u30dd\u30fc\u30c8\u3055\u308c\u3066\u3044\u306a\u3044\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u3067\u3059\u3002
errors.docid_not_found=ID:{0}\u304c\u898b\u3064\u304b\u308a\u307e\u305b\u3093\u3002
errors.document_not_found=ID:{0}\u306eURL\u304c\u898b\u3064\u304b\u308a\u307e\u305b\u3093\u3002
errors.not_load_from_server=\u3053\u306e\u30b5\u30fc\u30d0\u304b\u3089\u30ed\u30fc\u30c9\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u305b\u3093\u3067\u3057\u305f: {0}
errors.invalid_query_unknown=\u691c\u7d22\u30af\u30a8\u30ea\u304c\u6b63\u3057\u304f\u3042\u308a\u307e\u305b\u3093\u3002
errors.invalid_query_quoted=\u30af\u30aa\u30fc\u30c8\u6587\u5b57(")\u306e\u5229\u7528\u65b9\u6cd5\u304c\u6b63\u3057\u304f\u3042\u308a\u307e\u305b\u3093\u3002

View file

@ -11,6 +11,8 @@
</arg>
</component>
<component name="crawlingConfigHelper" class="jp.sf.fess.helper.CrawlingConfigHelper">
</component>
<component name="pathMappingHelper" class="jp.sf.fess.helper.PathMappingHelper">
</component>
<component name="systemHelper" class="jp.sf.fess.helper.SystemHelper">

View file

@ -15,8 +15,6 @@
</component>
<component name="dataIndexHelper" class="jp.sf.fess.helper.DataIndexHelper">
</component>
<component name="crawlingConfigHelper" class="jp.sf.fess.helper.CrawlingConfigHelper">
</component>
<component name="overlappingHostHelper" class="jp.sf.fess.helper.OverlappingHostHelper">
</component>
<component name="intervalControlHelper" class="jp.sf.fess.helper.IntervalControlHelper">