diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java index 2432bf7b7..af0fce7f4 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java @@ -44,6 +44,7 @@ import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.impl.TikaExtractor; import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer; import org.codelibs.fess.crawler.util.CrawlingParameterUtil; +import org.codelibs.fess.crawler.util.FieldConfigs; import org.codelibs.fess.es.config.exentity.CrawlingConfig; import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName; import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config; @@ -181,7 +182,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im final String indexingTarget = crawlingConfig.getIndexingTarget(url); url = pathMappingHelper.replaceUrl(sessionId, url); - final Map fieldConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.FIELD); + final FieldConfigs fieldConfigs = new FieldConfigs(crawlingConfig.getConfigParameterMap(ConfigName.FIELD)); String urlEncoding; final UrlQueue urlQueue = CrawlingParameterUtil.getUrlQueue(); @@ -221,7 +222,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im responseData.addMetaData(Extractor.class.getSimpleName(), extractor); final String body = documentHelper.getContent(crawlingConfig, responseData, bodyBase, dataMap); putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), body); - if ((Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache())) + if ((fieldConfigs.getConfig(fessConfig.getIndexFieldCache()).map(config -> config.isCache()).orElse(false) || fessConfig.isCrawlerDocumentCacheEnabled()) && fessConfig.isSupportedDocumentCacheMimetypes(mimeType)) { if (responseData.getContentLength() > 0 && responseData.getContentLength() <= fessConfig.getCrawlerDocumentCacheMaxSizeAsInteger().longValue()) { @@ -334,7 +335,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im putResultDataWithTemplate(dataMap, key, entry.getValue(), scriptConfigMap.get(key), scriptType); } - return dataMap; + return processFieldConfigs(dataMap, fieldConfigs); } protected Date getLastModified(final Map dataMap, final ResponseData responseData) { diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java index 9f4933f87..77430ebe9 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java @@ -20,6 +20,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.Map; import org.apache.commons.lang3.StringUtils; @@ -31,6 +32,7 @@ import org.codelibs.fess.crawler.entity.AccessResult; import org.codelibs.fess.crawler.entity.AccessResultData; import org.codelibs.fess.crawler.entity.UrlQueue; import org.codelibs.fess.crawler.util.CrawlingParameterUtil; +import org.codelibs.fess.crawler.util.FieldConfigs; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; @@ -248,4 +250,17 @@ public interface FessTransformer { } return null; } + + default Map processFieldConfigs(final Map dataMap, final FieldConfigs fieldConfigs) { + final Map newDataMap = new LinkedHashMap<>(); + for (Map.Entry e : dataMap.entrySet()) { + if (fieldConfigs.getConfig(e.getKey()).map(FieldConfigs.Config::isOverwrite).orElse(false) + && e.getValue() instanceof Object[] values && values.length > 0) { + newDataMap.put(e.getKey(), values[values.length - 1]); + } else { + newDataMap.put(e.getKey(), e.getValue()); + } + } + return newDataMap; + } } diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java index 1401cfd5b..22b299477 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java @@ -56,6 +56,7 @@ import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.CrawlingAccessException; import org.codelibs.fess.crawler.transformer.impl.XpathTransformer; import org.codelibs.fess.crawler.util.CrawlingParameterUtil; +import org.codelibs.fess.crawler.util.FieldConfigs; import org.codelibs.fess.es.config.exentity.CrawlingConfig; import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName; import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config; @@ -152,7 +153,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf processMetaRobots(responseData, resultData, document); processXRobotsTag(responseData, resultData); - final Map dataMap = new LinkedHashMap<>(); + Map dataMap = new LinkedHashMap<>(); for (final Map.Entry entry : fieldRuleMap.entrySet()) { final String path = entry.getValue(); try { @@ -184,7 +185,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf } } - putAdditionalData(dataMap, responseData, document); + dataMap = processAdditionalData(dataMap, responseData, document); normalizeData(responseData, dataMap); try { @@ -336,7 +337,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf return true; } - protected void putAdditionalData(final Map dataMap, final ResponseData responseData, final Document document) { + protected Map processAdditionalData(final Map dataMap, final ResponseData responseData, + final Document document) { // canonical final String canonicalUrl = getCanonicalUrl(responseData, document); if (canonicalUrl != null && !canonicalUrl.equals(responseData.getUrl()) && isValidUrl(canonicalUrl) @@ -362,7 +364,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf url = pathMappingHelper.replaceUrl(sessionId, url); final String mimeType = responseData.getMimeType(); - final Map fieldConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.FIELD); + final FieldConfigs fieldConfigs = new FieldConfigs(crawlingConfig.getConfigParameterMap(ConfigName.FIELD)); final Map xpathConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.XPATH); String urlEncoding; @@ -394,7 +396,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf prunedContent ? node -> pruneNode(node, crawlingConfig) : node -> node); final String fileName = getFileName(url, urlEncoding); putResultDataContent(dataMap, responseData, fessConfig, crawlingConfig, documentHelper, body, fileName); - if ((Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache())) + if ((fieldConfigs.getConfig(fessConfig.getIndexFieldCache()).map(config -> config.isCache()).orElse(false) || fessConfig.isCrawlerDocumentCacheEnabled()) && fessConfig.isSupportedDocumentCacheMimetypes(mimeType)) { if (responseData.getContentLength() > 0 && responseData.getContentLength() <= fessConfig.getCrawlerDocumentCacheMaxSizeAsInteger().longValue()) { @@ -499,6 +501,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf final String value = e.getValue(); putResultDataWithTemplate(dataMap, key, value, scriptConfigMap.get(key), scriptType); }); + + return processFieldConfigs(dataMap, fieldConfigs); } protected void putResultDataContent(final Map dataMap, final ResponseData responseData, final FessConfig fessConfig, diff --git a/src/main/java/org/codelibs/fess/crawler/util/FieldConfigs.java b/src/main/java/org/codelibs/fess/crawler/util/FieldConfigs.java new file mode 100644 index 000000000..112819466 --- /dev/null +++ b/src/main/java/org/codelibs/fess/crawler/util/FieldConfigs.java @@ -0,0 +1,76 @@ +/* + * Copyright 2012-2024 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.crawler.util; + +import java.util.Map; +import java.util.regex.Pattern; + +import org.codelibs.core.lang.StringUtil; +import org.codelibs.core.stream.StreamUtil; +import org.codelibs.fess.Constants; +import org.dbflute.optional.OptionalThing; + +public class FieldConfigs { + + private final Map params; + + public FieldConfigs(Map params) { + this.params = params; + } + + public OptionalThing getConfig(String fieldName) { + String value = params.get(fieldName); + if (StringUtil.isNotBlank(value)) { + return OptionalThing.of(new Config(value)); + } + return OptionalThing.empty(); + } + + public static class Config { + + private final String[] values; + + public Config(String value) { + values = StreamUtil.split(value, Pattern.quote("|")).get(stream -> stream.map(s -> s.trim()).toArray(n -> new String[n])); + } + + public boolean isCache() { + for (final String value : values) { + if ("cache".equalsIgnoreCase(value)) { + return true; + } + } + // backward compatibility + if (values.length == 1 && Constants.TRUE.equalsIgnoreCase(values[0])) { + return true; + } + return false; + } + + public boolean isOverwrite() { + for (final String value : values) { + if ("overwrite".equalsIgnoreCase(value)) { + return true; + } + } + return false; + } + + public String[] getValues() { + return values; + } + } +} diff --git a/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java b/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java index 5de72b834..8b587e195 100644 --- a/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java +++ b/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java @@ -17,8 +17,11 @@ package org.codelibs.fess.crawler.transformer; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; +import java.util.Map; +import org.apache.groovy.util.Maps; import org.codelibs.fess.Constants; +import org.codelibs.fess.crawler.util.FieldConfigs; import org.codelibs.fess.exception.FessSystemException; import org.codelibs.fess.unit.UnitFessTestCase; @@ -269,6 +272,21 @@ public class FessFileTransformerTest extends UnitFessTestCase { assertEquals(exp, transformer.getSiteOnFile(url, "UTF-8")); } + public void test_processFieldConfigs() { + final FessFileTransformer transformer = createInstance(); + final Map params = Maps.of("foo", "cache", "bar", "overwrite", "baz", "cache|overwrite"); + FieldConfigs fieldConfigs = new FieldConfigs(params); + final Map dataMap = Map.of(// + "foo", new String[] { "aaa", "bbb" }, // + "bar", new String[] { "ccc", "ddd" }, // + "baz", new String[] { "eee", "fff" }); + final Map resultMap = transformer.processFieldConfigs(dataMap, fieldConfigs); + assertEquals("aaa", ((String[]) resultMap.get("foo"))[0]); + assertEquals("bbb", ((String[]) resultMap.get("foo"))[1]); + assertEquals("ddd", resultMap.get("bar")); + assertEquals("fff", resultMap.get("baz")); + } + private FessFileTransformer createInstance() { final FessFileTransformer transformer = new FessFileTransformer(); transformer.init(); diff --git a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java index 6d5776adc..3fbedfab7 100644 --- a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java +++ b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java @@ -32,6 +32,7 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.groovy.util.Maps; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.lang.ClassUtil; @@ -42,6 +43,7 @@ import org.codelibs.fess.crawler.entity.RequestData; import org.codelibs.fess.crawler.entity.ResponseData; import org.codelibs.fess.crawler.entity.ResultData; import org.codelibs.fess.crawler.exception.ChildUrlsException; +import org.codelibs.fess.crawler.util.FieldConfigs; import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName; import org.codelibs.fess.es.config.exentity.WebConfig; import org.codelibs.fess.helper.CrawlingConfigHelper; @@ -626,7 +628,7 @@ public class FessXpathTransformerTest extends UnitFessTestCase { String data = "aaa"; Document document = getDocument(data); try { - transformer.putAdditionalData(dataMap, responseData, document); + transformer.processAdditionalData(dataMap, responseData, document); fail(); } catch (final ComponentNotFoundException e) { // ignore @@ -635,7 +637,7 @@ public class FessXpathTransformerTest extends UnitFessTestCase { data = "aaa"; document = getDocument(data); try { - transformer.putAdditionalData(dataMap, responseData, document); + transformer.processAdditionalData(dataMap, responseData, document); fail(); } catch (final ComponentNotFoundException e) { // ignore @@ -644,7 +646,7 @@ public class FessXpathTransformerTest extends UnitFessTestCase { data = "aaa"; document = getDocument(data); try { - transformer.putAdditionalData(dataMap, responseData, document); + transformer.processAdditionalData(dataMap, responseData, document); fail(); } catch (final ChildUrlsException e) { final Set childUrlList = e.getChildUrlList(); @@ -655,7 +657,7 @@ public class FessXpathTransformerTest extends UnitFessTestCase { data = "aaa"; document = getDocument(data); try { - transformer.putAdditionalData(dataMap, responseData, document); + transformer.processAdditionalData(dataMap, responseData, document); fail(); } catch (final ChildUrlsException e) { final Set childUrlList = e.getChildUrlList(); @@ -904,4 +906,19 @@ public class FessXpathTransformerTest extends UnitFessTestCase { assertFalse(transformer.isValidUrl("http://")); assertFalse(transformer.isValidUrl("http://http://www.example.com")); } + + public void test_processFieldConfigs() { + final FessXpathTransformer transformer = new FessXpathTransformer(); + final Map params = Maps.of("foo", "cache", "bar", "overwrite", "baz", "cache|overwrite"); + FieldConfigs fieldConfigs = new FieldConfigs(params); + final Map dataMap = Map.of(// + "foo", new String[] { "aaa", "bbb" }, // + "bar", new String[] { "ccc", "ddd" }, // + "baz", new String[] { "eee", "fff" }); + final Map resultMap = transformer.processFieldConfigs(dataMap, fieldConfigs); + assertEquals("aaa", ((String[]) resultMap.get("foo"))[0]); + assertEquals("bbb", ((String[]) resultMap.get("foo"))[1]); + assertEquals("ddd", resultMap.get("bar")); + assertEquals("fff", resultMap.get("baz")); + } } diff --git a/src/test/java/org/codelibs/fess/crawler/util/FieldConfigsTest.java b/src/test/java/org/codelibs/fess/crawler/util/FieldConfigsTest.java new file mode 100644 index 000000000..7a6b3049f --- /dev/null +++ b/src/test/java/org/codelibs/fess/crawler/util/FieldConfigsTest.java @@ -0,0 +1,71 @@ +/* + * Copyright 2012-2024 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.crawler.util; + +import java.util.Collections; +import java.util.Map; + +import org.apache.groovy.util.Maps; +import org.codelibs.fess.unit.UnitFessTestCase; + +public class FieldConfigsTest extends UnitFessTestCase { + public void test_empty() { + final FieldConfigs fieldConfigs = new FieldConfigs(Collections.emptyMap()); + assertTrue(fieldConfigs.getConfig("test").isEmpty()); + } + + public void test_values() { + final Map params = Maps.of("foo", "bar"); + FieldConfigs fieldConfigs = new FieldConfigs(params); + assertTrue(fieldConfigs.getConfig("test").isEmpty()); + assertFalse(fieldConfigs.getConfig("foo").isEmpty()); + assertFalse(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isCache).orElse(false)); + assertFalse(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isOverwrite).orElse(false)); + assertEquals("bar", fieldConfigs.getConfig("foo").map(FieldConfigs.Config::getValues).orElse(new String[0])[0]); + } + + public void test_cache_true() { + final Map params = Maps.of("foo", "true"); + FieldConfigs fieldConfigs = new FieldConfigs(params); + assertTrue(fieldConfigs.getConfig("test").isEmpty()); + assertTrue(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isCache).orElse(false)); + assertFalse(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isOverwrite).orElse(false)); + } + + public void test_cache() { + final Map params = Maps.of("foo", "cache"); + FieldConfigs fieldConfigs = new FieldConfigs(params); + assertTrue(fieldConfigs.getConfig("test").isEmpty()); + assertTrue(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isCache).orElse(false)); + assertFalse(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isOverwrite).orElse(false)); + } + + public void test_overwrite() { + final Map params = Maps.of("foo", "overwrite"); + FieldConfigs fieldConfigs = new FieldConfigs(params); + assertTrue(fieldConfigs.getConfig("test").isEmpty()); + assertFalse(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isCache).orElse(false)); + assertTrue(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isOverwrite).orElse(false)); + } + + public void test_cache_overwrite() { + final Map params = Maps.of("foo", "cache|overwrite"); + FieldConfigs fieldConfigs = new FieldConfigs(params); + assertTrue(fieldConfigs.getConfig("test").isEmpty()); + assertTrue(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isCache).orElse(false)); + assertTrue(fieldConfigs.getConfig("foo").map(FieldConfigs.Config::isOverwrite).orElse(false)); + } +}