#2561 add *.default.script

This commit is contained in:
Shinsuke Sugaya 2021-04-20 07:14:38 +09:00
parent 999097a7b0
commit 5d232e2db0
16 changed files with 137 additions and 27 deletions

View file

@ -59,6 +59,14 @@
<param name="jar.version" value="13.12.0" />
<param name="file.version" value="13.12.0" />
</antcall>
<!-- fess-script-groovy -->
<antcall target="install.plugin.jar">
<param name="repo.url" value="${maven.snapshot.repo.url}" />
<param name="jar.groupId" value="org/codelibs/fess" />
<param name="jar.artifactId" value="fess-script-groovy" />
<param name="jar.version" value="13.12.0-SNAPSHOT" />
<param name="file.version" value="13.12.0-20210419.212149-1" />
</antcall>
</target>
<target name="install.env.jar">

View file

@ -317,19 +317,20 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
putResultDataBody(dataMap, fessConfig.getIndexFieldThumbnail(), responseData.getUrl());
// from config
final String scriptType = crawlingConfig.getScriptType();
final Map<String, String> scriptConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.SCRIPT);
final Map<String, String> metaConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.META);
for (final Map.Entry<String, String> entry : metaConfigMap.entrySet()) {
final String key = entry.getKey();
final String[] values = entry.getValue().split(",");
for (final String value : values) {
putResultDataWithTemplate(dataMap, key, metaDataMap.get(value), scriptConfigMap.get(key));
putResultDataWithTemplate(dataMap, key, metaDataMap.get(value), scriptConfigMap.get(key), scriptType);
}
}
final Map<String, String> valueConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.VALUE);
for (final Map.Entry<String, String> entry : valueConfigMap.entrySet()) {
final String key = entry.getKey();
putResultDataWithTemplate(dataMap, key, entry.getValue(), scriptConfigMap.get(key));
putResultDataWithTemplate(dataMap, key, entry.getValue(), scriptConfigMap.get(key), scriptType);
}
return dataMap;

View file

@ -136,7 +136,13 @@ public interface FessTransformer {
}
}
@Deprecated
default void putResultDataWithTemplate(final Map<String, Object> dataMap, final String key, final Object value, final String template) {
putResultDataWithTemplate(dataMap, key, value, template, Constants.DEFAULT_SCRIPT);
}
default void putResultDataWithTemplate(final Map<String, Object> dataMap, final String key, final Object value, final String template,
final String scriptType) {
Object target = value;
if (template != null) {
final Map<String, Object> contextMap = new HashMap<>();
@ -145,19 +151,24 @@ public interface FessTransformer {
paramMap.putAll(dataMap);
paramMap.put("value", target);
paramMap.put("context", contextMap);
target = evaluateValue(template, paramMap);
target = evaluateValue(scriptType, template, paramMap);
}
if (key != null && target != null) {
putResultDataBody(dataMap, key, target);
}
}
default Object evaluateValue(final String template, final Map<String, Object> paramMap) {
@Deprecated
default Object evaluateValue(String template, final Map<String, Object> paramMap) {
return evaluateValue(Constants.DEFAULT_SCRIPT, template, paramMap);
}
default Object evaluateValue(final String scriptType, String template, final Map<String, Object> paramMap) {
if (StringUtil.isEmpty(template)) {
return StringUtil.EMPTY;
}
return ComponentUtil.getScriptEngineFactory().getScriptEngine(Constants.DEFAULT_SCRIPT).evaluate(template, paramMap);
return ComponentUtil.getScriptEngineFactory().getScriptEngine(scriptType).evaluate(template, paramMap);
}
default int getMaxSiteLength() {

View file

@ -493,16 +493,17 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
}
// from config
final String scriptType = crawlingConfig.getScriptType();
final Map<String, String> scriptConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.SCRIPT);
xpathConfigMap.entrySet().stream().filter(e -> !e.getKey().startsWith("default.")).forEach(e -> {
final String key = e.getKey();
final String value = getSingleNodeValue(document, e.getValue(), true);
putResultDataWithTemplate(dataMap, key, value, scriptConfigMap.get(key));
putResultDataWithTemplate(dataMap, key, value, scriptConfigMap.get(key), scriptType);
});
crawlingConfig.getConfigParameterMap(ConfigName.VALUE).entrySet().stream().forEach(e -> {
final String key = e.getKey();
final String value = e.getValue();
putResultDataWithTemplate(dataMap, key, value, scriptConfigMap.get(key));
putResultDataWithTemplate(dataMap, key, value, scriptConfigMap.get(key), scriptType);
});
}

View file

@ -120,7 +120,12 @@ public abstract class AbstractDataStore implements DataStore {
}
@Deprecated
protected Object convertValue(final String template, final Map<String, Object> paramMap) {
return convertValue(ComponentUtil.getFessConfig().getCrawlerDefaultScript(), template, paramMap);
}
protected Object convertValue(final String scriptType, final String template, final Map<String, Object> paramMap) {
if (StringUtil.isEmpty(template)) {
return StringUtil.EMPTY;
}
@ -129,7 +134,7 @@ public abstract class AbstractDataStore implements DataStore {
return paramMap.get(template);
}
return ComponentUtil.getScriptEngineFactory().getScriptEngine(Constants.DEFAULT_SCRIPT).evaluate(template, paramMap);
return ComponentUtil.getScriptEngineFactory().getScriptEngine(scriptType).evaluate(template, paramMap);
}
protected long getReadInterval(final Map<String, String> paramMap) {

View file

@ -20,6 +20,7 @@ import java.util.function.Supplier;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.Constants;
import org.codelibs.fess.crawler.client.CrawlerClientFactory;
import org.codelibs.fess.crawler.client.ftp.FtpClient;
import org.codelibs.fess.crawler.client.http.HcHttpClient;
@ -65,6 +66,14 @@ public interface CrawlingConfig {
}
}
default String getScriptType() {
final String scriptType = getConfigParameterMap(ConfigName.CONFIG).get(Param.Config.SCRIPT_TYPE);
if (StringUtil.isNotBlank(scriptType)) {
return scriptType;
}
return Constants.DEFAULT_SCRIPT;
}
public enum ConfigType {
WEB("W"), FILE("F"), DATA("D");
@ -121,6 +130,7 @@ public interface CrawlingConfig {
public static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
public static final String PIPELINE = "pipeline";
public static final String IGNORE_ROBOTS_TAGS = "ignore.robots.tags";
public static final String SCRIPT_TYPE = "script.type";
}
// meta.*

View file

@ -27,13 +27,16 @@ public class DocBoostMatcher {
private String matchExpression;
private String scriptType;
public DocBoostMatcher() {
// nothing
scriptType = Constants.DEFAULT_SCRIPT;
}
public DocBoostMatcher(final BoostDocumentRule rule) {
matchExpression = rule.getUrlExpr();
boostExpression = rule.getBoostExpr();
scriptType = ComponentUtil.getFessConfig().getCrawlerDefaultScript();
}
public boolean match(final Map<String, Object> map) {
@ -42,8 +45,7 @@ public class DocBoostMatcher {
return false;
}
final Object value =
ComponentUtil.getScriptEngineFactory().getScriptEngine(Constants.DEFAULT_SCRIPT).evaluate(matchExpression, map);
final Object value = ComponentUtil.getScriptEngineFactory().getScriptEngine(scriptType).evaluate(matchExpression, map);
if (value instanceof Boolean) {
return ((Boolean) value);
}
@ -56,8 +58,7 @@ public class DocBoostMatcher {
return 0.0f;
}
final Object value =
ComponentUtil.getScriptEngineFactory().getScriptEngine(Constants.DEFAULT_SCRIPT).evaluate(boostExpression, map);
final Object value = ComponentUtil.getScriptEngineFactory().getScriptEngine(scriptType).evaluate(boostExpression, map);
if (value instanceof Integer) {
return ((Integer) value).floatValue();
}

View file

@ -15,10 +15,17 @@
*/
package org.codelibs.fess.job;
import org.codelibs.fess.Constants;
public abstract class JobExecutor {
protected ShutdownListener shutdownListener;
public abstract Object execute(String script);
@Deprecated
public Object execute(String script) {
return execute(Constants.DEFAULT_SCRIPT, script);
}
public abstract Object execute(String scriptType, String script);
public void shutdown() {
shutdownListener.onShutdown();

View file

@ -18,18 +18,18 @@ package org.codelibs.fess.job.impl;
import java.util.HashMap;
import java.util.Map;
import org.codelibs.fess.Constants;
import org.codelibs.fess.job.JobExecutor;
import org.codelibs.fess.util.ComponentUtil;
@Deprecated
public class GroovyExecutor extends JobExecutor {
@Override
public Object execute(final String script) {
public Object execute(final String scriptType, final String script) {
final Map<String, Object> params = new HashMap<>();
params.put("executor", this);
return ComponentUtil.getScriptEngineFactory().getScriptEngine(Constants.DEFAULT_SCRIPT).evaluate(script, params);
return ComponentUtil.getScriptEngineFactory().getScriptEngine(scriptType).evaluate(script, params);
}
}

View file

@ -0,0 +1,34 @@
/*
* Copyright 2012-2021 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.job.impl;
import java.util.HashMap;
import java.util.Map;
import org.codelibs.fess.job.JobExecutor;
import org.codelibs.fess.util.ComponentUtil;
public class ScriptExecutor extends JobExecutor {
@Override
public Object execute(final String scriptType, final String script) {
final Map<String, Object> params = new HashMap<>();
params.put("executor", this);
return ComponentUtil.getScriptEngineFactory().getScriptEngine(scriptType).evaluate(script, params);
}
}

View file

@ -181,6 +181,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. 0 */
String JOB_MAX_CRAWLER_PROCESSES = "job.max.crawler.processes";
/** The key of the configuration. e.g. groovy */
String JOB_DEFAULT_SCRIPT = "job.default.script";
/** The key of the configuration. e.g. 0 */
String PROCESSORS = "processors";
@ -283,6 +286,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. 262144 */
String HTTP_FILEUPLOAD_THRESHOLD_SIZE = "http.fileupload.threshold.size";
/** The key of the configuration. e.g. groovy */
String CRAWLER_DEFAULT_SCRIPT = "crawler.default.script";
/** The key of the configuration. e.g. 50 */
String CRAWLER_DOCUMENT_MAX_SITE_LENGTH = "crawler.document.max.site.length";
@ -1967,6 +1973,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
Integer getJobMaxCrawlerProcessesAsInteger();
/**
* Get the value for the key 'job.default.script'. <br>
* The value is, e.g. groovy <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getJobDefaultScript();
/**
* Get the value for the key 'processors'. <br>
* The value is, e.g. 0 <br>
@ -2376,10 +2389,17 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
Integer getHttpFileuploadThresholdSizeAsInteger();
/**
* Get the value for the key 'crawler.default.script'. <br>
* The value is, e.g. groovy <br>
* comment: common
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerDefaultScript();
/**
* Get the value for the key 'crawler.document.max.site.length'. <br>
* The value is, e.g. 50 <br>
* comment: common
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerDocumentMaxSiteLength();
@ -2387,7 +2407,6 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/**
* Get the value for the key 'crawler.document.max.site.length' as {@link Integer}. <br>
* The value is, e.g. 50 <br>
* comment: common
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
@ -7099,6 +7118,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return getAsInteger(FessConfig.JOB_MAX_CRAWLER_PROCESSES);
}
public String getJobDefaultScript() {
return get(FessConfig.JOB_DEFAULT_SCRIPT);
}
public String getProcessors() {
return get(FessConfig.PROCESSORS);
}
@ -7323,6 +7346,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return getAsInteger(FessConfig.HTTP_FILEUPLOAD_THRESHOLD_SIZE);
}
public String getCrawlerDefaultScript() {
return get(FessConfig.CRAWLER_DEFAULT_SCRIPT);
}
public String getCrawlerDocumentMaxSiteLength() {
return get(FessConfig.CRAWLER_DOCUMENT_MAX_SITE_LENGTH);
}
@ -9809,6 +9836,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.JOB_TEMPLATE_SCRIPT,
"return container.getComponent(\"crawlJob\").logLevel(\"info\").sessionId(\"{3}\").webConfigIds([{0}] as String[]).fileConfigIds([{1}] as String[]).dataConfigIds([{2}] as String[]).jobExecutor(executor).execute();");
defaultMap.put(FessConfig.JOB_MAX_CRAWLER_PROCESSES, "0");
defaultMap.put(FessConfig.JOB_DEFAULT_SCRIPT, "groovy");
defaultMap.put(FessConfig.PROCESSORS, "0");
defaultMap.put(FessConfig.JAVA_COMMAND_PATH, "java");
defaultMap.put(FessConfig.PYTHON_COMMAND_PATH, "python");
@ -9844,6 +9872,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.HTTP_PROXY_PASSWORD, "");
defaultMap.put(FessConfig.HTTP_FILEUPLOAD_MAX_SIZE, "262144000");
defaultMap.put(FessConfig.HTTP_FILEUPLOAD_THRESHOLD_SIZE, "262144");
defaultMap.put(FessConfig.CRAWLER_DEFAULT_SCRIPT, "groovy");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_MAX_SITE_LENGTH, "50");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_SITE_ENCODING, "UTF-8");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_UNKNOWN_HOSTNAME, "unknown");

View file

@ -322,7 +322,10 @@ public final class ComponentUtil {
}
public static JobExecutor getJobExecutor(final String name) {
return getComponent(name + JOB_EXECUTOR_SUFFIX);
if (name.endsWith(JOB_EXECUTOR_SUFFIX)) {
return getComponent(name);
}
return getComponent("script" + JOB_EXECUTOR_SUFFIX);
}
public static FileTypeHelper getFileTypeHelper() {

View file

@ -144,6 +144,7 @@ job.template.title.file=File Crawler - {0}
job.template.title.data=Data Crawler - {0}
job.template.script=return container.getComponent("crawlJob").logLevel("info").sessionId("{3}").webConfigIds([{0}] as String[]).fileConfigIds([{1}] as String[]).dataConfigIds([{2}] as String[]).jobExecutor(executor).execute();
job.max.crawler.processes=0
job.default.script=groovy
processors=0
java.command.path=java
@ -189,6 +190,7 @@ http.fileupload.threshold.size=262144
# ====
# common
crawler.default.script=groovy
crawler.document.max.site.length=50
crawler.document.site.encoding=UTF-8
crawler.document.unknown.hostname=unknown

View file

@ -2,7 +2,7 @@
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
"http://dbflute.org/meta/lastadi10.dtd">
<components>
<component name="groovyJobExecutor" class="org.codelibs.fess.job.impl.GroovyExecutor" instance="prototype">
<component name="scriptJobExecutor" class="org.codelibs.fess.job.impl.ScriptExecutor" instance="prototype">
</component>
<!-- Jobs -->

View file

@ -22,16 +22,16 @@ import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.ext.ExtendableQueryParser;
import org.codelibs.core.io.FileUtil;
import org.codelibs.core.misc.DynamicProperties;
import org.codelibs.fess.Constants;
import org.codelibs.fess.entity.SearchRequestParams.SearchRequestType;
import org.codelibs.fess.unit.UnitFessTestCase;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fesen.index.query.BoolQueryBuilder;
import org.codelibs.fesen.index.query.MatchPhraseQueryBuilder;
import org.codelibs.fesen.index.query.PrefixQueryBuilder;
import org.codelibs.fesen.index.query.QueryBuilder;
import org.codelibs.fesen.index.query.QueryBuilders;
import org.codelibs.fesen.index.query.functionscore.ScoreFunctionBuilders;
import org.codelibs.fess.Constants;
import org.codelibs.fess.entity.SearchRequestParams.SearchRequestType;
import org.codelibs.fess.unit.UnitFessTestCase;
import org.codelibs.fess.util.ComponentUtil;
public class QueryHelperTest extends UnitFessTestCase {

View file

@ -22,9 +22,7 @@ import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;
import org.codelibs.core.crypto.CachedCipher;
import org.codelibs.core.exception.IllegalBlockSizeRuntimeException;
import org.codelibs.fess.unit.UnitFessTestCase;
import org.codelibs.fess.util.ComponentUtil;
public class RoleQueryHelperTest extends UnitFessTestCase {
public CachedCipher cipher;