fix #446 : add config parameters
This commit is contained in:
parent
6ff0658e4e
commit
3f3f3c5d4f
4 changed files with 85 additions and 39 deletions
|
@ -376,4 +376,10 @@ public class Constants extends CoreLibConstants {
|
|||
public static final String REQUEST_LANGUAGES = "requestLanguages";
|
||||
|
||||
public static final String SEARCH_PREFERENCE_PRIMARY = "_primary";
|
||||
|
||||
public static final String CONFIG_IGNORE_FAILURE_URLS = "ignore.failureUrls";
|
||||
|
||||
public static final String CONFIG_CLEANUP_FILTERS = "cleanup.urlFilters";
|
||||
|
||||
public static final String CONFIG_CLEANUP_ALL = "cleanup.all";
|
||||
}
|
||||
|
|
|
@ -63,6 +63,6 @@ public interface CrawlingConfig {
|
|||
}
|
||||
|
||||
public enum ConfigName {
|
||||
CLIENT, XPATH, META, VALUE, SCRIPT, FIELD;
|
||||
CLIENT, XPATH, META, VALUE, SCRIPT, FIELD, CONFIG;
|
||||
}
|
||||
}
|
|
@ -19,6 +19,7 @@ import java.io.Serializable;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
|
@ -36,6 +37,7 @@ import org.codelibs.fess.crawler.interval.FessIntervalController;
|
|||
import org.codelibs.fess.crawler.service.impl.EsDataService;
|
||||
import org.codelibs.fess.crawler.service.impl.EsUrlFilterService;
|
||||
import org.codelibs.fess.crawler.service.impl.EsUrlQueueService;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
|
||||
import org.codelibs.fess.es.config.exentity.FileConfig;
|
||||
import org.codelibs.fess.es.config.exentity.WebConfig;
|
||||
import org.codelibs.fess.indexer.IndexUpdater;
|
||||
|
@ -179,6 +181,18 @@ public class WebFsIndexHelper implements Serializable {
|
|||
crawlerContext.setMaxAccessCount(maxCount);
|
||||
|
||||
webConfig.initializeClientFactory(crawler.getClientFactory());
|
||||
final Map<String, String> configParamMap = webConfig.getConfigParameterMap(ConfigName.CONFIG);
|
||||
|
||||
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
|
||||
deleteCrawlData(sid);
|
||||
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
|
||||
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
|
||||
try {
|
||||
urlFilterService.delete(sid);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to delete url filters for " + sid);
|
||||
}
|
||||
}
|
||||
|
||||
// set urls
|
||||
final String[] urls = urlsStr.split("[\r\n]");
|
||||
|
@ -223,13 +237,15 @@ public class WebFsIndexHelper implements Serializable {
|
|||
}
|
||||
|
||||
// failure url
|
||||
final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(webConfig.getConfigId());
|
||||
for (final String u : excludedUrlList) {
|
||||
if (StringUtil.isNotBlank(u)) {
|
||||
final String urlValue = u.trim();
|
||||
crawler.addExcludeFilter(urlValue);
|
||||
if (logger.isInfoEnabled()) {
|
||||
logger.info("Excluded URL from failures: " + urlValue);
|
||||
if (!Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_IGNORE_FAILURE_URLS))) {
|
||||
final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(webConfig.getConfigId());
|
||||
for (final String u : excludedUrlList) {
|
||||
if (StringUtil.isNotBlank(u)) {
|
||||
final String urlValue = u.trim();
|
||||
crawler.addExcludeFilter(urlValue);
|
||||
if (logger.isInfoEnabled()) {
|
||||
logger.info("Excluded URL from failures: " + urlValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -282,6 +298,18 @@ public class WebFsIndexHelper implements Serializable {
|
|||
crawlerContext.setMaxAccessCount(maxCount);
|
||||
|
||||
fileConfig.initializeClientFactory(crawler.getClientFactory());
|
||||
final Map<String, String> configParamMap = fileConfig.getConfigParameterMap(ConfigName.CONFIG);
|
||||
|
||||
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
|
||||
deleteCrawlData(sid);
|
||||
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
|
||||
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
|
||||
try {
|
||||
urlFilterService.delete(sid);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to delete url filters for " + sid);
|
||||
}
|
||||
}
|
||||
|
||||
// set paths
|
||||
final String[] paths = pathsStr.split("[\r\n]");
|
||||
|
@ -353,14 +381,16 @@ public class WebFsIndexHelper implements Serializable {
|
|||
}
|
||||
|
||||
// failure url
|
||||
final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(fileConfig.getConfigId());
|
||||
if (excludedUrlList != null) {
|
||||
for (final String u : excludedUrlList) {
|
||||
if (StringUtil.isNotBlank(u)) {
|
||||
final String urlValue = u.trim();
|
||||
crawler.addExcludeFilter(urlValue);
|
||||
if (logger.isInfoEnabled()) {
|
||||
logger.info("Excluded Path from failures: " + urlValue);
|
||||
if (!Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_IGNORE_FAILURE_URLS))) {
|
||||
final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(fileConfig.getConfigId());
|
||||
if (excludedUrlList != null) {
|
||||
for (final String u : excludedUrlList) {
|
||||
if (StringUtil.isNotBlank(u)) {
|
||||
final String urlValue = u.trim();
|
||||
crawler.addExcludeFilter(urlValue);
|
||||
if (logger.isInfoEnabled()) {
|
||||
logger.info("Excluded Path from failures: " + urlValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -472,34 +502,38 @@ public class WebFsIndexHelper implements Serializable {
|
|||
return;
|
||||
}
|
||||
|
||||
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
|
||||
final EsUrlQueueService urlQueueService = ComponentUtil.getComponent(EsUrlQueueService.class);
|
||||
final EsDataService dataService = ComponentUtil.getComponent(EsDataService.class);
|
||||
for (final String sid : sessionIdList) {
|
||||
// remove config
|
||||
crawlingConfigHelper.remove(sid);
|
||||
deleteCrawlData(sid);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// clear url filter
|
||||
urlFilterService.delete(sid);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to delete UrlFilter for " + sid, e);
|
||||
}
|
||||
protected void deleteCrawlData(final String sid) {
|
||||
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
|
||||
final EsUrlQueueService urlQueueService = ComponentUtil.getComponent(EsUrlQueueService.class);
|
||||
final EsDataService dataService = ComponentUtil.getComponent(EsDataService.class);
|
||||
|
||||
try {
|
||||
// clear queue
|
||||
urlQueueService.clearCache();
|
||||
urlQueueService.delete(sid);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to delete UrlQueue for " + sid, e);
|
||||
}
|
||||
try {
|
||||
// clear url filter
|
||||
urlFilterService.delete(sid);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to delete UrlFilter for " + sid, e);
|
||||
}
|
||||
|
||||
try {
|
||||
// clear
|
||||
dataService.delete(sid);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to delete AccessResult for " + sid, e);
|
||||
}
|
||||
try {
|
||||
// clear queue
|
||||
urlQueueService.clearCache();
|
||||
urlQueueService.delete(sid);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to delete UrlQueue for " + sid, e);
|
||||
}
|
||||
|
||||
try {
|
||||
// clear
|
||||
dataService.delete(sid);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to delete AccessResult for " + sid, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@ public class ParameterUtil {
|
|||
|
||||
protected static final String CLIENT_PREFIX = "client.";
|
||||
|
||||
protected static final String CONFIG_PREFIX = "config.";
|
||||
|
||||
protected static final String FIELD_PREFIX = "field.config.";
|
||||
|
||||
protected ParameterUtil() {
|
||||
|
@ -72,12 +74,14 @@ public class ParameterUtil {
|
|||
|
||||
public static Map<ConfigName, Map<String, String>> createConfigParameterMap(final String configParameters) {
|
||||
final Map<ConfigName, Map<String, String>> map = new HashMap<>();
|
||||
final Map<String, String> configConfigMap = new HashMap<>();
|
||||
final Map<String, String> clientConfigMap = new HashMap<>();
|
||||
final Map<String, String> xpathConfigMap = new HashMap<>();
|
||||
final Map<String, String> metaConfigMap = new HashMap<>();
|
||||
final Map<String, String> valueConfigMap = new HashMap<>();
|
||||
final Map<String, String> scriptConfigMap = new HashMap<>();
|
||||
final Map<String, String> fieldConfigMap = new HashMap<>();
|
||||
map.put(ConfigName.CONFIG, configConfigMap);
|
||||
map.put(ConfigName.CLIENT, clientConfigMap);
|
||||
map.put(ConfigName.XPATH, xpathConfigMap);
|
||||
map.put(ConfigName.META, metaConfigMap);
|
||||
|
@ -86,7 +90,9 @@ public class ParameterUtil {
|
|||
map.put(ConfigName.FIELD, fieldConfigMap);
|
||||
for (final Map.Entry<String, String> entry : ParameterUtil.parse(configParameters).entrySet()) {
|
||||
final String key = entry.getKey();
|
||||
if (key.startsWith(CLIENT_PREFIX)) {
|
||||
if (key.startsWith(CONFIG_PREFIX)) {
|
||||
configConfigMap.put(key.substring(CONFIG_PREFIX.length()), entry.getValue());
|
||||
} else if (key.startsWith(CLIENT_PREFIX)) {
|
||||
clientConfigMap.put(key.substring(CLIENT_PREFIX.length()), entry.getValue());
|
||||
} else if (key.startsWith(XPATH_PREFIX)) {
|
||||
xpathConfigMap.put(key.substring(XPATH_PREFIX.length()), entry.getValue());
|
||||
|
|
Loading…
Add table
Reference in a new issue