This commit is contained in:
parent
dcd9ef4e1a
commit
1ff431a351
11 changed files with 76 additions and 54 deletions
28
pom.xml
28
pom.xml
|
@ -69,7 +69,7 @@
|
|||
<database>h2</database>
|
||||
<databaseGroupId>com.h2database</databaseGroupId>
|
||||
<databaseArtifactId>h2</databaseArtifactId>
|
||||
<databaseVersion>1.3.172</databaseVersion>
|
||||
<databaseVersion>1.4.178</databaseVersion>
|
||||
<databaseDriver>org.h2.Driver</databaseDriver>
|
||||
<databaseUrl>jdbc:h2:file:${basedir}/src/main/webapp/WEB-INF/db/fess</databaseUrl>
|
||||
<databaseTestUrl>jdbc:h2:file:${basedir}/target/test-classes/db/fess</databaseTestUrl>
|
||||
|
@ -83,7 +83,7 @@
|
|||
<database>mysql</database>
|
||||
<databaseGroupId>mysql</databaseGroupId>
|
||||
<databaseArtifactId>mysql-connector-java</databaseArtifactId>
|
||||
<databaseVersion>5.1.30</databaseVersion>
|
||||
<databaseVersion>5.1.31</databaseVersion>
|
||||
<databaseDriver>com.mysql.jdbc.Driver</databaseDriver>
|
||||
<databaseUrl>jdbc:mysql://localhost:3306/fess_db?noDatetimeStringSync=true&zeroDateTimeBehavior=convertToNull&useUnicode=true&characterEncoding=UTF-8&autoReconnect=true</databaseUrl>
|
||||
<databaseTestUrl>jdbc:mysql://localhost:3306/fess_testdb?noDatetimeStringSync=true&zeroDateTimeBehavior=convertToNull&useUnicode=true&characterEncoding=UTF-8&autoReconnect=true</databaseTestUrl>
|
||||
|
@ -108,7 +108,7 @@
|
|||
</profiles>
|
||||
<properties>
|
||||
<dbflute.version>1.0.4K</dbflute.version>
|
||||
<s2robot.version>0.7.2</s2robot.version>
|
||||
<s2robot.version>0.8.0-SNAPSHOT</s2robot.version>
|
||||
<solr.version>4.8.1</solr.version>
|
||||
<slf4j.version>1.7.7</slf4j.version>
|
||||
<poi.version>3.10-FINAL</poi.version>
|
||||
|
@ -601,15 +601,15 @@
|
|||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet.jsp</groupId>
|
||||
<artifactId>jsp-api</artifactId>
|
||||
<version>2.2</version>
|
||||
<scope>provided</scope>
|
||||
<groupId>javax.servlet.jsp</groupId>
|
||||
<artifactId>jsp-api</artifactId>
|
||||
<version>2.2</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.mail</groupId>
|
||||
<artifactId>javax.mail-api</artifactId>
|
||||
<version>1.5.1</version>
|
||||
<version>1.5.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.activation</groupId>
|
||||
|
@ -658,7 +658,7 @@
|
|||
<dependency>
|
||||
<groupId>args4j</groupId>
|
||||
<artifactId>args4j</artifactId>
|
||||
<version>2.0.26</version>
|
||||
<version>2.0.29</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
|
@ -688,7 +688,7 @@
|
|||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>1.8.4</version>
|
||||
<version>1.8.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
|
@ -714,22 +714,22 @@
|
|||
<dependency>
|
||||
<groupId>org.javassist</groupId>
|
||||
<artifactId>javassist</artifactId>
|
||||
<version>3.18.1-GA</version>
|
||||
<version>3.18.2-GA</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.arnx</groupId>
|
||||
<artifactId>jsonic</artifactId>
|
||||
<version>1.3.3</version>
|
||||
<version>1.3.5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.jknack</groupId>
|
||||
<artifactId>handlebars</artifactId>
|
||||
<version>1.3.0</version>
|
||||
<version>1.3.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.groovy</groupId>
|
||||
<artifactId>groovy-all</artifactId>
|
||||
<version>2.2.2</version>
|
||||
<version>2.3.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.detro</groupId>
|
||||
|
|
|
@ -6,7 +6,7 @@ DROP TABLE IF EXISTS CLICK_LOG;
|
|||
DROP TABLE IF EXISTS LABEL_TYPE_TO_ROLE_TYPE_MAPPING;
|
||||
DROP TABLE IF EXISTS SEARCH_LOG;
|
||||
DROP TABLE IF EXISTS USER_INFO;
|
||||
DROP TABLE IF EXISTS DATA_CONFIG_TO_BROWSER_TYPE_MAPPING
|
||||
DROP TABLE IF EXISTS DATA_CONFIG_TO_BROWSER_TYPE_MAPPING;
|
||||
DROP TABLE IF EXISTS DATA_CONFIG_TO_LABEL_TYPE_MAPPING;
|
||||
DROP TABLE IF EXISTS DATA_CONFIG_TO_ROLE_TYPE_MAPPING;
|
||||
DROP TABLE IF EXISTS DATA_CRAWLING_CONFIG;
|
||||
|
|
BIN
src/main/h2/webapp/WEB-INF/db/fess.mv.db
Normal file
BIN
src/main/h2/webapp/WEB-INF/db/fess.mv.db
Normal file
Binary file not shown.
BIN
src/main/h2/webapp/WEB-INF/db/robot.mv.db
Normal file
BIN
src/main/h2/webapp/WEB-INF/db/robot.mv.db
Normal file
Binary file not shown.
|
@ -34,6 +34,7 @@ import org.codelibs.solr.lib.SolrGroup;
|
|||
import org.seasar.framework.container.SingletonS2Container;
|
||||
import org.seasar.framework.util.SerializeUtil;
|
||||
import org.seasar.robot.RobotSystemException;
|
||||
import org.seasar.robot.builder.RequestDataBuilder;
|
||||
import org.seasar.robot.client.S2RobotClient;
|
||||
import org.seasar.robot.client.S2RobotClientFactory;
|
||||
import org.seasar.robot.entity.ResponseData;
|
||||
|
@ -196,7 +197,9 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
|
|||
}
|
||||
|
||||
final long startTime = System.currentTimeMillis();
|
||||
final ResponseData responseData = client.doGet(url);
|
||||
final ResponseData responseData = client
|
||||
.execute(RequestDataBuilder.newRequestData().get()
|
||||
.url(url).build());
|
||||
responseData.setExecutionTime(System.currentTimeMillis()
|
||||
- startTime);
|
||||
responseData.setSessionId((String) dataMap
|
||||
|
|
|
@ -42,6 +42,7 @@ import jp.sf.fess.util.ComponentUtil;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.seasar.framework.container.SingletonS2Container;
|
||||
import org.seasar.framework.util.Base64Util;
|
||||
import org.seasar.robot.builder.RequestDataBuilder;
|
||||
import org.seasar.robot.client.S2RobotClient;
|
||||
import org.seasar.robot.client.S2RobotClientFactory;
|
||||
import org.seasar.robot.entity.ResponseData;
|
||||
|
@ -179,7 +180,8 @@ public class CrawlingConfigHelper implements Serializable {
|
|||
throw new FessSystemException("No S2RobotClient: " + configIdObj
|
||||
+ ", url: " + url);
|
||||
}
|
||||
final ResponseData responseData = client.doGet(url);
|
||||
final ResponseData responseData = client.execute(RequestDataBuilder
|
||||
.newRequestData().get().url(url).build());
|
||||
final HttpServletResponse response = ResponseUtil.getResponse();
|
||||
writeFileName(response, responseData);
|
||||
writeContentType(response, responseData);
|
||||
|
|
|
@ -47,8 +47,10 @@ import org.codelibs.solr.lib.SolrGroup;
|
|||
import org.codelibs.solr.lib.SolrGroupManager;
|
||||
import org.codelibs.solr.lib.policy.QueryType;
|
||||
import org.seasar.robot.S2RobotThread;
|
||||
import org.seasar.robot.builder.RequestDataBuilder;
|
||||
import org.seasar.robot.client.S2RobotClient;
|
||||
import org.seasar.robot.client.smb.SmbClient;
|
||||
import org.seasar.robot.entity.RequestData;
|
||||
import org.seasar.robot.entity.ResponseData;
|
||||
import org.seasar.robot.entity.UrlQueue;
|
||||
import org.seasar.robot.log.LogType;
|
||||
|
@ -88,7 +90,9 @@ public class FessS2RobotThread extends S2RobotThread {
|
|||
ResponseData responseData = null;
|
||||
try {
|
||||
// head method
|
||||
responseData = client.doHead(urlQueue.getUrl());
|
||||
responseData = client
|
||||
.execute(RequestDataBuilder.newRequestData().head()
|
||||
.url(urlQueue.getUrl()).build());
|
||||
if (responseData == null) {
|
||||
return true;
|
||||
}
|
||||
|
@ -217,7 +221,7 @@ public class FessS2RobotThread extends S2RobotThread {
|
|||
}
|
||||
|
||||
protected void storeChildUrlsToQueue(final UrlQueue urlQueue,
|
||||
final Set<String> childUrlSet) {
|
||||
final Set<RequestData> childUrlSet) {
|
||||
if (childUrlSet != null) {
|
||||
synchronized (robotContext.getAccessCountLock()) {
|
||||
// add an url
|
||||
|
@ -229,7 +233,7 @@ public class FessS2RobotThread extends S2RobotThread {
|
|||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Set<String> getAnchorSet(final Object obj) {
|
||||
protected Set<RequestData> getAnchorSet(final Object obj) {
|
||||
List<String> anchorList;
|
||||
if (obj instanceof String) {
|
||||
anchorList = new ArrayList<String>();
|
||||
|
@ -244,9 +248,10 @@ public class FessS2RobotThread extends S2RobotThread {
|
|||
return null;
|
||||
}
|
||||
|
||||
final Set<String> childUrlSet = new LinkedHashSet<String>();
|
||||
final Set<RequestData> childUrlSet = new LinkedHashSet<>();
|
||||
for (final String anchor : anchorList) {
|
||||
childUrlSet.add(anchor);
|
||||
childUrlSet.add(RequestDataBuilder.newRequestData().get()
|
||||
.url(anchor).build());
|
||||
}
|
||||
return childUrlSet;
|
||||
}
|
||||
|
@ -294,7 +299,7 @@ public class FessS2RobotThread extends S2RobotThread {
|
|||
return null;
|
||||
}
|
||||
|
||||
protected Set<String> getChildUrlSet(final String id) {
|
||||
protected Set<RequestData> getChildUrlSet(final String id) {
|
||||
final SolrGroupManager solrGroupManager = ComponentUtil
|
||||
.getSolrGroupManager();
|
||||
final SolrGroup solrGroup = solrGroupManager
|
||||
|
@ -313,11 +318,12 @@ public class FessS2RobotThread extends S2RobotThread {
|
|||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Found solr documents: " + docList);
|
||||
}
|
||||
final Set<String> urlSet = new HashSet<String>(docList.size());
|
||||
final Set<RequestData> urlSet = new HashSet<>(docList.size());
|
||||
for (final SolrDocument doc : docList) {
|
||||
final Object obj = doc.get("url");
|
||||
if (obj != null) {
|
||||
urlSet.add(obj.toString());
|
||||
urlSet.add(RequestDataBuilder.newRequestData().get()
|
||||
.url(obj.toString()).build());
|
||||
}
|
||||
}
|
||||
return urlSet;
|
||||
|
|
|
@ -54,8 +54,10 @@ import org.seasar.framework.util.InputStreamUtil;
|
|||
import org.seasar.framework.util.SerializeUtil;
|
||||
import org.seasar.robot.RobotCrawlAccessException;
|
||||
import org.seasar.robot.RobotSystemException;
|
||||
import org.seasar.robot.builder.RequestDataBuilder;
|
||||
import org.seasar.robot.client.fs.ChildUrlsException;
|
||||
import org.seasar.robot.entity.AccessResultData;
|
||||
import org.seasar.robot.entity.RequestData;
|
||||
import org.seasar.robot.entity.ResponseData;
|
||||
import org.seasar.robot.entity.ResultData;
|
||||
import org.seasar.robot.entity.UrlQueue;
|
||||
|
@ -66,7 +68,6 @@ import org.slf4j.LoggerFactory;
|
|||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.traversal.NodeIterator;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
||||
|
@ -201,8 +202,9 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
final String canonicalUrl = getCanonicalUrl(responseData, document);
|
||||
if (canonicalUrl != null
|
||||
&& !canonicalUrl.equals(responseData.getUrl())) {
|
||||
final Set<String> childUrlSet = new HashSet<String>();
|
||||
childUrlSet.add(canonicalUrl);
|
||||
final Set<RequestData> childUrlSet = new HashSet<>();
|
||||
childUrlSet.add(RequestDataBuilder.newRequestData().get()
|
||||
.url(canonicalUrl).build());
|
||||
throw new ChildUrlsException(childUrlSet);
|
||||
}
|
||||
}
|
||||
|
@ -498,18 +500,21 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
}
|
||||
}
|
||||
|
||||
protected List<String> getAnchorList(final Document document,
|
||||
protected List<RequestData> getAnchorList(final Document document,
|
||||
final ResponseData responseData) {
|
||||
List<String> anchorList = new ArrayList<String>();
|
||||
List<RequestData> anchorList = new ArrayList<>();
|
||||
final String baseHref = getBaseHref(document);
|
||||
try {
|
||||
final URL url = new URL(baseHref != null ? baseHref
|
||||
: responseData.getUrl());
|
||||
for (final Map.Entry<String, String> entry : childUrlRuleMap
|
||||
.entrySet()) {
|
||||
anchorList.addAll(getUrlFromTagAttribute(url, document,
|
||||
for (String u : getUrlFromTagAttribute(url, document,
|
||||
entry.getKey(), entry.getValue(),
|
||||
responseData.getCharSet()));
|
||||
responseData.getCharSet())) {
|
||||
anchorList.add(RequestDataBuilder.newRequestData().get()
|
||||
.url(u).build());
|
||||
}
|
||||
}
|
||||
anchorList = convertChildUrlList(anchorList);
|
||||
} catch (final Exception e) {
|
||||
|
@ -521,20 +526,19 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected List<String> convertChildUrlList(final List<String> urlList) {
|
||||
|
||||
final List<String> newUrlList = new ArrayList<String>();
|
||||
protected List<RequestData> convertChildUrlList(
|
||||
final List<RequestData> urlList) {
|
||||
if (urlList != null) {
|
||||
for (String url : urlList) {
|
||||
for (RequestData requestData : urlList) {
|
||||
String url = requestData.getUrl();
|
||||
for (final Map.Entry<String, String> entry : convertUrlMap
|
||||
.entrySet()) {
|
||||
url = url.replaceAll(entry.getKey(), entry.getValue());
|
||||
}
|
||||
|
||||
newUrlList.add(replaceOverlappingHost(url));
|
||||
requestData.setUrl(replaceOverlappingHost(url));
|
||||
}
|
||||
}
|
||||
return newUrlList;
|
||||
return urlList;
|
||||
}
|
||||
|
||||
public void addPrunedTag(final String tagName) {
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -33,7 +33,9 @@ import javax.xml.transform.stream.StreamResult;
|
|||
import org.cyberneko.html.parsers.DOMParser;
|
||||
import org.seasar.extension.unit.S2TestCase;
|
||||
import org.seasar.framework.container.ComponentNotFoundRuntimeException;
|
||||
import org.seasar.robot.builder.RequestDataBuilder;
|
||||
import org.seasar.robot.client.fs.ChildUrlsException;
|
||||
import org.seasar.robot.entity.RequestData;
|
||||
import org.seasar.robot.entity.ResponseData;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
|
@ -178,32 +180,37 @@ public class FessXpathTransformerTest extends S2TestCase {
|
|||
}
|
||||
|
||||
public void test_convertChildUrlList() {
|
||||
List<String> urlList = new ArrayList<String>();
|
||||
List<RequestData> urlList = new ArrayList<>();
|
||||
|
||||
urlList = fessXpathTransformer.convertChildUrlList(urlList);
|
||||
assertEquals(0, urlList.size());
|
||||
|
||||
urlList.clear();
|
||||
urlList.add("http://www.example.com");
|
||||
urlList.add(RequestDataBuilder.newRequestData().get()
|
||||
.url("http://www.example.com").build());
|
||||
urlList = fessXpathTransformer.convertChildUrlList(urlList);
|
||||
assertEquals(1, urlList.size());
|
||||
assertEquals("http://www.example.com", urlList.get(0));
|
||||
assertEquals("http://www.example.com", urlList.get(0).getUrl());
|
||||
|
||||
urlList.clear();
|
||||
urlList.add("http://www.example.com");
|
||||
urlList.add("http://www.test.com");
|
||||
urlList.add(RequestDataBuilder.newRequestData().get()
|
||||
.url("http://www.example.com").build());
|
||||
urlList.add(RequestDataBuilder.newRequestData().get()
|
||||
.url("http://www.test.com").build());
|
||||
urlList = fessXpathTransformer.convertChildUrlList(urlList);
|
||||
assertEquals(2, urlList.size());
|
||||
assertEquals("http://www.example.com", urlList.get(0));
|
||||
assertEquals("http://www.test.com", urlList.get(1));
|
||||
assertEquals("http://www.example.com", urlList.get(0).getUrl());
|
||||
assertEquals("http://www.test.com", urlList.get(1).getUrl());
|
||||
|
||||
urlList.clear();
|
||||
urlList.add("feed://www.example.com");
|
||||
urlList.add("http://www.test.com");
|
||||
urlList.add(RequestDataBuilder.newRequestData().get()
|
||||
.url("feed://www.example.com").build());
|
||||
urlList.add(RequestDataBuilder.newRequestData().get()
|
||||
.url("http://www.test.com").build());
|
||||
urlList = fessXpathTransformer.convertChildUrlList(urlList);
|
||||
assertEquals(2, urlList.size());
|
||||
assertEquals("http://www.example.com", urlList.get(0));
|
||||
assertEquals("http://www.test.com", urlList.get(1));
|
||||
assertEquals("http://www.example.com", urlList.get(0).getUrl());
|
||||
assertEquals("http://www.test.com", urlList.get(1).getUrl());
|
||||
|
||||
}
|
||||
|
||||
|
@ -277,10 +284,10 @@ public class FessXpathTransformerTest extends S2TestCase {
|
|||
transformer.putAdditionalData(dataMap, responseData, document);
|
||||
fail();
|
||||
} catch (final ChildUrlsException e) {
|
||||
final Set<String> childUrlList = e.getChildUrlList();
|
||||
final Set<RequestData> childUrlList = e.getChildUrlList();
|
||||
assertEquals(1, childUrlList.size());
|
||||
assertEquals("http://example.com/hoge", childUrlList.iterator()
|
||||
.next());
|
||||
.next().getUrl());
|
||||
}
|
||||
|
||||
data = "<html><link rel=\"canonical\" href=\"http://example.com/hoge\"><body>aaa</body></html>";
|
||||
|
@ -289,10 +296,10 @@ public class FessXpathTransformerTest extends S2TestCase {
|
|||
transformer.putAdditionalData(dataMap, responseData, document);
|
||||
fail();
|
||||
} catch (final ChildUrlsException e) {
|
||||
final Set<String> childUrlList = e.getChildUrlList();
|
||||
final Set<RequestData> childUrlList = e.getChildUrlList();
|
||||
assertEquals(1, childUrlList.size());
|
||||
assertEquals("http://example.com/hoge", childUrlList.iterator()
|
||||
.next());
|
||||
.next().getUrl());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue