diff --git a/pom.xml b/pom.xml index 187a0351f..54c450c02 100644 --- a/pom.xml +++ b/pom.xml @@ -1355,6 +1355,11 @@ bcprov-jdk18on ${bouncycastle.version} + + com.esotericsoftware + kryo + ${kryo.version} + diff --git a/src/main/java/org/codelibs/fess/app/web/admin/upgrade/AdminUpgradeAction.java b/src/main/java/org/codelibs/fess/app/web/admin/upgrade/AdminUpgradeAction.java index 14a438b8f..cb7adedba 100644 --- a/src/main/java/org/codelibs/fess/app/web/admin/upgrade/AdminUpgradeAction.java +++ b/src/main/java/org/codelibs/fess/app/web/admin/upgrade/AdminUpgradeAction.java @@ -18,18 +18,13 @@ package org.codelibs.fess.app.web.admin.upgrade; import java.io.IOException; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.codelibs.core.lang.StringUtil; -import org.codelibs.core.stream.StreamUtil; import org.codelibs.curl.CurlResponse; -import org.codelibs.fess.Constants; import org.codelibs.fess.annotation.Secured; import org.codelibs.fess.app.service.ScheduledJobService; import org.codelibs.fess.app.web.base.FessAdminAction; -import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.opensearch.client.SearchEngineClient; import org.codelibs.fess.opensearch.config.exbhv.DataConfigBhv; import org.codelibs.fess.opensearch.config.exbhv.ElevateWordBhv; @@ -39,7 +34,6 @@ import org.codelibs.fess.opensearch.config.exbhv.RoleTypeBhv; import org.codelibs.fess.opensearch.config.exbhv.WebConfigBhv; import org.codelibs.fess.opensearch.user.exbhv.RoleBhv; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.fess.util.UpgradeUtil; import org.codelibs.opensearch.runner.net.OpenSearchCurl; import org.lastaflute.web.Execute; import org.lastaflute.web.response.HtmlResponse; diff --git a/src/main/java/org/codelibs/fess/crawler/serializer/DataSerializer.java b/src/main/java/org/codelibs/fess/crawler/serializer/DataSerializer.java new file mode 100644 index 000000000..f9d718f40 --- /dev/null +++ b/src/main/java/org/codelibs/fess/crawler/serializer/DataSerializer.java @@ -0,0 +1,93 @@ +/* + * Copyright 2012-2024 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.crawler.serializer; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.codelibs.core.exception.IORuntimeException; +import org.codelibs.core.io.SerializeUtil; +import org.codelibs.fess.util.ComponentUtil; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; + +public class DataSerializer { + + private static final Logger logger = LogManager.getLogger(DataSerializer.class); + + protected static final String JAVABIN = "javabin"; + + protected static final String KRYO = "kryo"; + + protected final ThreadLocal kryoThreadLocal; + + public DataSerializer() { + kryoThreadLocal = ThreadLocal.withInitial(() -> { + final Kryo kryo = new Kryo(); + // TODO use kryo.register + kryo.setRegistrationRequired(false); + if (logger.isDebugEnabled()) { + kryo.setWarnUnregisteredClasses(true); + } + return kryo; + }); + } + + protected String getSerializerType() { + return ComponentUtil.getFessConfig().getCrawlerDataSerializer(); + } + + public byte[] fromObjectToBinary(final Object obj) { + final String serializer = getSerializerType(); + return switch (serializer) { + case KRYO -> serializeWithKryo(obj); + case JAVABIN -> SerializeUtil.fromObjectToBinary(obj); + default -> throw new IllegalArgumentException("Unexpected value: " + serializer); + }; + } + + public Object fromBinaryToObject(final byte[] bytes) { + final String serializer = getSerializerType(); + return switch (serializer) { + case KRYO -> deserializeWithKryo(bytes); + case JAVABIN -> SerializeUtil.fromBinaryToObject(bytes); + default -> throw new IllegalArgumentException("Unexpected value: " + serializer); + }; + } + + protected byte[] serializeWithKryo(final Object obj) { + final Kryo kryo = kryoThreadLocal.get(); + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); final Output output = new Output(baos)) { + kryo.writeClassAndObject(output, obj); + output.flush(); + return baos.toByteArray(); + } catch (final IOException e) { + throw new IORuntimeException(e); + } + } + + protected Object deserializeWithKryo(final byte[] bytes) { + final Kryo kryo = kryoThreadLocal.get(); + try (final Input input = new Input(new ByteArrayInputStream(bytes))) { + return kryo.readClassAndObject(input); + } + } +} diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java index c72df851e..879bcdcc1 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java @@ -29,7 +29,6 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.codelibs.core.io.SerializeUtil; import org.codelibs.core.lang.StringUtil; import org.codelibs.core.misc.Tuple3; import org.codelibs.fess.Constants; @@ -42,6 +41,7 @@ import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.CrawlingAccessException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.impl.TikaExtractor; +import org.codelibs.fess.crawler.serializer.DataSerializer; import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer; import org.codelibs.fess.crawler.util.CrawlingParameterUtil; import org.codelibs.fess.crawler.util.FieldConfigs; @@ -68,6 +68,8 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im protected FessConfig fessConfig; + protected DataSerializer dataSerializer; + protected abstract Extractor getExtractor(ResponseData responseData); @Override @@ -79,7 +81,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im final ResultData resultData = new ResultData(); resultData.setTransformerName(getName()); try { - resultData.setData(SerializeUtil.fromObjectToBinary(generateData(responseData))); + resultData.setData(dataSerializer.fromObjectToBinary(generateData(responseData))); } catch (final Exception e) { throw new CrawlingAccessException("Could not serialize object", e); } @@ -485,7 +487,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im final byte[] data = accessResultData.getData(); if (data != null) { try { - return SerializeUtil.fromBinaryToObject(data); + return dataSerializer.fromBinaryToObject(data); } catch (final Exception e) { throw new CrawlerSystemException("Could not create an instanced from bytes.", e); } diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java index 5149c2147..3b83d13d7 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java @@ -35,6 +35,7 @@ public class FessFileTransformer extends AbstractFessFileTransformer { logger.debug("Initialize {}", this.getClass().getSimpleName()); } fessConfig = ComponentUtil.getFessConfig(); + dataSerializer = ComponentUtil.getComponent("dataSerializer"); } @Override diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java index 6f0d687f0..cae88f01d 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java @@ -35,6 +35,7 @@ public class FessStandardTransformer extends AbstractFessFileTransformer { logger.debug("Initialize {}", this.getClass().getSimpleName()); } fessConfig = ComponentUtil.getFessConfig(); + dataSerializer = ComponentUtil.getComponent("dataSerializer"); } @Override diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java index babb3c1e8..fa6e1fcfa 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java @@ -263,4 +263,5 @@ public interface FessTransformer { } return newDataMap; } + } diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java index 1b2c85713..426804ed0 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java @@ -42,7 +42,6 @@ import javax.xml.xpath.XPathNodes; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.InputStreamUtil; -import org.codelibs.core.io.SerializeUtil; import org.codelibs.core.lang.StringUtil; import org.codelibs.core.misc.Pair; import org.codelibs.core.misc.ValueHolder; @@ -56,6 +55,7 @@ import org.codelibs.fess.crawler.entity.UrlQueue; import org.codelibs.fess.crawler.exception.ChildUrlsException; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.CrawlingAccessException; +import org.codelibs.fess.crawler.serializer.DataSerializer; import org.codelibs.fess.crawler.transformer.impl.XpathTransformer; import org.codelibs.fess.crawler.util.CrawlingParameterUtil; import org.codelibs.fess.crawler.util.FieldConfigs; @@ -109,6 +109,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf protected FessConfig fessConfig; + protected DataSerializer dataSerializer; + protected boolean useGoogleOffOn = true; protected Map fieldPrunedRuleMap = new HashMap<>(); @@ -121,6 +123,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf logger.debug("Initialize {}", this.getClass().getSimpleName()); } fessConfig = ComponentUtil.getFessConfig(); + dataSerializer = ComponentUtil.getComponent("dataSerializer"); } @Override @@ -193,7 +196,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf normalizeData(responseData, dataMap); try { - resultData.setData(SerializeUtil.fromObjectToBinary(dataMap)); + resultData.setData(dataSerializer.fromObjectToBinary(dataMap)); } catch (final Exception e) { throw new CrawlingAccessException("Could not serialize object: " + responseData.getUrl(), e); } @@ -816,7 +819,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf final byte[] data = accessResultData.getData(); if (data != null) { try { - return SerializeUtil.fromBinaryToObject(data); + return dataSerializer.fromBinaryToObject(data); } catch (final Exception e) { throw new CrawlerSystemException("Could not create an instanced from bytes.", e); } diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index d98adb21d..f99a25649 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -319,6 +319,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. 0 */ String CRAWLER_HTTP_thread_pool_SIZE = "crawler.http.thread_pool.size"; + /** The key of the configuration. e.g. kryo */ + String CRAWLER_DATA_SERIALIZER = "crawler.data.serializer"; + /** The key of the configuration. e.g. 100 */ String CRAWLER_DOCUMENT_MAX_SITE_LENGTH = "crawler.document.max.site.length"; @@ -2687,6 +2690,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ Integer getCrawlerHttpThreadPoolSizeAsInteger(); + /** + * Get the value for the key 'crawler.data.serializer'.
+ * The value is, e.g. kryo
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getCrawlerDataSerializer(); + /** * Get the value for the key 'crawler.document.max.site.length'.
* The value is, e.g. 100
@@ -8259,6 +8269,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return getAsInteger(FessConfig.CRAWLER_HTTP_thread_pool_SIZE); } + public String getCrawlerDataSerializer() { + return get(FessConfig.CRAWLER_DATA_SERIALIZER); + } + public String getCrawlerDocumentMaxSiteLength() { return get(FessConfig.CRAWLER_DOCUMENT_MAX_SITE_LENGTH); } @@ -11095,6 +11109,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction defaultMap.put(FessConfig.HTTP_FILEUPLOAD_MAX_FILE_COUNT, "10"); defaultMap.put(FessConfig.CRAWLER_DEFAULT_SCRIPT, "groovy"); defaultMap.put(FessConfig.CRAWLER_HTTP_thread_pool_SIZE, "0"); + defaultMap.put(FessConfig.CRAWLER_DATA_SERIALIZER, "kryo"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_MAX_SITE_LENGTH, "100"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_SITE_ENCODING, "UTF-8"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_UNKNOWN_HOSTNAME, "unknown"); diff --git a/src/main/resources/crawler/transformer.xml b/src/main/resources/crawler/transformer.xml index bf0b890fc..95b821a57 100644 --- a/src/main/resources/crawler/transformer.xml +++ b/src/main/resources/crawler/transformer.xml @@ -35,4 +35,7 @@ "fessStandardTransformer" + + + diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index 21830806f..5e30571e7 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -204,6 +204,7 @@ http.fileupload.max.file.count=10 # common crawler.default.script=groovy crawler.http.thread_pool.size=0 +crawler.data.serializer=kryo crawler.document.max.site.length=100 crawler.document.site.encoding=UTF-8 crawler.document.unknown.hostname=unknown diff --git a/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java b/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java index 8b587e195..a482a36b9 100644 --- a/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java +++ b/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java @@ -21,12 +21,26 @@ import java.util.Map; import org.apache.groovy.util.Maps; import org.codelibs.fess.Constants; +import org.codelibs.fess.crawler.serializer.DataSerializer; import org.codelibs.fess.crawler.util.FieldConfigs; import org.codelibs.fess.exception.FessSystemException; import org.codelibs.fess.unit.UnitFessTestCase; +import org.codelibs.fess.util.ComponentUtil; public class FessFileTransformerTest extends UnitFessTestCase { + @Override + public void setUp() throws Exception { + super.setUp(); + ComponentUtil.register(new DataSerializer(), "dataSerializer"); + } + + @Override + public void tearDown() throws Exception { + ComponentUtil.setFessConfig(null); + super.tearDown(); + } + private String encodeUrl(final String url) { try { return URLEncoder.encode(url, Constants.UTF_8); @@ -292,4 +306,5 @@ public class FessFileTransformerTest extends UnitFessTestCase { transformer.init(); return transformer; } + } diff --git a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java index b68ccf63f..8ac753e50 100644 --- a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java +++ b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java @@ -44,6 +44,7 @@ import org.codelibs.fess.crawler.entity.RequestData; import org.codelibs.fess.crawler.entity.ResponseData; import org.codelibs.fess.crawler.entity.ResultData; import org.codelibs.fess.crawler.exception.ChildUrlsException; +import org.codelibs.fess.crawler.serializer.DataSerializer; import org.codelibs.fess.crawler.util.FieldConfigs; import org.codelibs.fess.helper.CrawlingConfigHelper; import org.codelibs.fess.helper.CrawlingInfoHelper; @@ -70,6 +71,18 @@ import org.xml.sax.InputSource; public class FessXpathTransformerTest extends UnitFessTestCase { private static final Logger logger = LogManager.getLogger(FessXpathTransformerTest.class); + @Override + public void setUp() throws Exception { + super.setUp(); + ComponentUtil.register(new DataSerializer(), "dataSerializer"); + } + + @Override + public void tearDown() throws Exception { + ComponentUtil.setFessConfig(null); + super.tearDown(); + } + public void test_transform() throws Exception { String data = "Test

Header1

This is a pen.

";