fix #2688 add fess-crawler-playwright
This commit is contained in:
parent
34d552f9e9
commit
0b154758bd
4 changed files with 67 additions and 1 deletions
5
pom.xml
5
pom.xml
|
@ -1391,6 +1391,11 @@
|
|||
<artifactId>fess-crawler-es</artifactId>
|
||||
<version>${crawler.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codelibs.fess</groupId>
|
||||
<artifactId>fess-crawler-playwright</artifactId>
|
||||
<version>${crawler.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>args4j</groupId>
|
||||
<artifactId>args4j</artifactId>
|
||||
|
|
|
@ -275,7 +275,7 @@ public class FessCrawlerThread extends CrawlerThread {
|
|||
final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(crawlerContext.getSessionId());
|
||||
final Map<String, String> clientConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.CLIENT);
|
||||
final String value = clientConfigMap.get(CRAWLER_CLIENTS);
|
||||
return getClientRuleList(value).stream().map(e -> {
|
||||
final CrawlerClient client = getClientRuleList(value).stream().map(e -> {
|
||||
if (e.getSecond().matcher(url).matches()) {
|
||||
return e.getFirst();
|
||||
}
|
||||
|
@ -283,6 +283,10 @@ public class FessCrawlerThread extends CrawlerThread {
|
|||
}).filter(StringUtil::isNotBlank).findFirst()//
|
||||
.map(s -> clientFactory.getClient(s + ":" + url))//
|
||||
.orElseGet(() -> clientFactory.getClient(url));
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("CrawlerClient: {}", client.getClass().getCanonicalName());
|
||||
}
|
||||
return client;
|
||||
}
|
||||
|
||||
protected List<Pair<String, Pattern>> getClientRuleList(final String value) {
|
||||
|
|
|
@ -420,6 +420,13 @@ public class WebFsIndexHelper {
|
|||
}
|
||||
}
|
||||
}
|
||||
crawlerList.forEach(crawler -> {
|
||||
try {
|
||||
crawler.close();
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Failed to close the crawler.", e);
|
||||
}
|
||||
});
|
||||
crawlerList.clear();
|
||||
crawlerStatusList.clear();
|
||||
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright 2012-2022 CodeLibs Project and the Others.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific language
|
||||
* governing permissions and limitations under the License.
|
||||
*/
|
||||
package org.codelibs.fess.crawler;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.codelibs.core.misc.Pair;
|
||||
import org.codelibs.fess.unit.UnitFessTestCase;
|
||||
|
||||
public class FessCrawlerThreadTest extends UnitFessTestCase {
|
||||
|
||||
public void test_getClientRuleList() {
|
||||
FessCrawlerThread crawlerThread = new FessCrawlerThread();
|
||||
|
||||
List<Pair<String, Pattern>> list = crawlerThread.getClientRuleList(null);
|
||||
assertEquals(0, list.size());
|
||||
|
||||
list = crawlerThread.getClientRuleList("");
|
||||
assertEquals(0, list.size());
|
||||
|
||||
list = crawlerThread.getClientRuleList(" ");
|
||||
assertEquals(0, list.size());
|
||||
|
||||
list = crawlerThread.getClientRuleList("playwright:http://.*");
|
||||
assertEquals(1, list.size());
|
||||
assertEquals("playwright", list.get(0).getFirst());
|
||||
assertEquals("http://.*", list.get(0).getSecond().pattern());
|
||||
|
||||
list = crawlerThread.getClientRuleList("playwright:http://.*,playwright:https://.*");
|
||||
assertEquals(2, list.size());
|
||||
assertEquals("playwright", list.get(0).getFirst());
|
||||
assertEquals("http://.*", list.get(0).getSecond().pattern());
|
||||
assertEquals("playwright", list.get(1).getFirst());
|
||||
assertEquals("https://.*", list.get(1).getSecond().pattern());
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue