fix #446 : add config parameters

This commit is contained in:
Shinsuke Sugaya 2016-03-23 00:02:29 +09:00
parent 6ff0658e4e
commit 3f3f3c5d4f
4 changed files with 85 additions and 39 deletions

View file

@ -376,4 +376,10 @@ public class Constants extends CoreLibConstants {
public static final String REQUEST_LANGUAGES = "requestLanguages";
public static final String SEARCH_PREFERENCE_PRIMARY = "_primary";
public static final String CONFIG_IGNORE_FAILURE_URLS = "ignore.failureUrls";
public static final String CONFIG_CLEANUP_FILTERS = "cleanup.urlFilters";
public static final String CONFIG_CLEANUP_ALL = "cleanup.all";
}

View file

@ -63,6 +63,6 @@ public interface CrawlingConfig {
}
public enum ConfigName {
CLIENT, XPATH, META, VALUE, SCRIPT, FIELD;
CLIENT, XPATH, META, VALUE, SCRIPT, FIELD, CONFIG;
}
}

View file

@ -19,6 +19,7 @@ import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
@ -36,6 +37,7 @@ import org.codelibs.fess.crawler.interval.FessIntervalController;
import org.codelibs.fess.crawler.service.impl.EsDataService;
import org.codelibs.fess.crawler.service.impl.EsUrlFilterService;
import org.codelibs.fess.crawler.service.impl.EsUrlQueueService;
import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
import org.codelibs.fess.es.config.exentity.FileConfig;
import org.codelibs.fess.es.config.exentity.WebConfig;
import org.codelibs.fess.indexer.IndexUpdater;
@ -179,6 +181,18 @@ public class WebFsIndexHelper implements Serializable {
crawlerContext.setMaxAccessCount(maxCount);
webConfig.initializeClientFactory(crawler.getClientFactory());
final Map<String, String> configParamMap = webConfig.getConfigParameterMap(ConfigName.CONFIG);
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
deleteCrawlData(sid);
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
try {
urlFilterService.delete(sid);
} catch (Exception e) {
logger.warn("Failed to delete url filters for " + sid);
}
}
// set urls
final String[] urls = urlsStr.split("[\r\n]");
@ -223,13 +237,15 @@ public class WebFsIndexHelper implements Serializable {
}
// failure url
final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(webConfig.getConfigId());
for (final String u : excludedUrlList) {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
crawler.addExcludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Excluded URL from failures: " + urlValue);
if (!Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_IGNORE_FAILURE_URLS))) {
final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(webConfig.getConfigId());
for (final String u : excludedUrlList) {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
crawler.addExcludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Excluded URL from failures: " + urlValue);
}
}
}
}
@ -282,6 +298,18 @@ public class WebFsIndexHelper implements Serializable {
crawlerContext.setMaxAccessCount(maxCount);
fileConfig.initializeClientFactory(crawler.getClientFactory());
final Map<String, String> configParamMap = fileConfig.getConfigParameterMap(ConfigName.CONFIG);
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
deleteCrawlData(sid);
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
try {
urlFilterService.delete(sid);
} catch (Exception e) {
logger.warn("Failed to delete url filters for " + sid);
}
}
// set paths
final String[] paths = pathsStr.split("[\r\n]");
@ -353,14 +381,16 @@ public class WebFsIndexHelper implements Serializable {
}
// failure url
final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(fileConfig.getConfigId());
if (excludedUrlList != null) {
for (final String u : excludedUrlList) {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
crawler.addExcludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Excluded Path from failures: " + urlValue);
if (!Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_IGNORE_FAILURE_URLS))) {
final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(fileConfig.getConfigId());
if (excludedUrlList != null) {
for (final String u : excludedUrlList) {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
crawler.addExcludeFilter(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Excluded Path from failures: " + urlValue);
}
}
}
}
@ -472,34 +502,38 @@ public class WebFsIndexHelper implements Serializable {
return;
}
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
final EsUrlQueueService urlQueueService = ComponentUtil.getComponent(EsUrlQueueService.class);
final EsDataService dataService = ComponentUtil.getComponent(EsDataService.class);
for (final String sid : sessionIdList) {
// remove config
crawlingConfigHelper.remove(sid);
deleteCrawlData(sid);
}
}
try {
// clear url filter
urlFilterService.delete(sid);
} catch (Exception e) {
logger.warn("Failed to delete UrlFilter for " + sid, e);
}
protected void deleteCrawlData(final String sid) {
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
final EsUrlQueueService urlQueueService = ComponentUtil.getComponent(EsUrlQueueService.class);
final EsDataService dataService = ComponentUtil.getComponent(EsDataService.class);
try {
// clear queue
urlQueueService.clearCache();
urlQueueService.delete(sid);
} catch (Exception e) {
logger.warn("Failed to delete UrlQueue for " + sid, e);
}
try {
// clear url filter
urlFilterService.delete(sid);
} catch (Exception e) {
logger.warn("Failed to delete UrlFilter for " + sid, e);
}
try {
// clear
dataService.delete(sid);
} catch (Exception e) {
logger.warn("Failed to delete AccessResult for " + sid, e);
}
try {
// clear queue
urlQueueService.clearCache();
urlQueueService.delete(sid);
} catch (Exception e) {
logger.warn("Failed to delete UrlQueue for " + sid, e);
}
try {
// clear
dataService.delete(sid);
} catch (Exception e) {
logger.warn("Failed to delete AccessResult for " + sid, e);
}
}

View file

@ -33,6 +33,8 @@ public class ParameterUtil {
protected static final String CLIENT_PREFIX = "client.";
protected static final String CONFIG_PREFIX = "config.";
protected static final String FIELD_PREFIX = "field.config.";
protected ParameterUtil() {
@ -72,12 +74,14 @@ public class ParameterUtil {
public static Map<ConfigName, Map<String, String>> createConfigParameterMap(final String configParameters) {
final Map<ConfigName, Map<String, String>> map = new HashMap<>();
final Map<String, String> configConfigMap = new HashMap<>();
final Map<String, String> clientConfigMap = new HashMap<>();
final Map<String, String> xpathConfigMap = new HashMap<>();
final Map<String, String> metaConfigMap = new HashMap<>();
final Map<String, String> valueConfigMap = new HashMap<>();
final Map<String, String> scriptConfigMap = new HashMap<>();
final Map<String, String> fieldConfigMap = new HashMap<>();
map.put(ConfigName.CONFIG, configConfigMap);
map.put(ConfigName.CLIENT, clientConfigMap);
map.put(ConfigName.XPATH, xpathConfigMap);
map.put(ConfigName.META, metaConfigMap);
@ -86,7 +90,9 @@ public class ParameterUtil {
map.put(ConfigName.FIELD, fieldConfigMap);
for (final Map.Entry<String, String> entry : ParameterUtil.parse(configParameters).entrySet()) {
final String key = entry.getKey();
if (key.startsWith(CLIENT_PREFIX)) {
if (key.startsWith(CONFIG_PREFIX)) {
configConfigMap.put(key.substring(CONFIG_PREFIX.length()), entry.getValue());
} else if (key.startsWith(CLIENT_PREFIX)) {
clientConfigMap.put(key.substring(CLIENT_PREFIX.length()), entry.getValue());
} else if (key.startsWith(XPATH_PREFIX)) {
xpathConfigMap.put(key.substring(XPATH_PREFIX.length()), entry.getValue());