diff --git a/src/main/java/org/codelibs/fess/Constants.java b/src/main/java/org/codelibs/fess/Constants.java index 4ef69a9d4..e3bce22b2 100644 --- a/src/main/java/org/codelibs/fess/Constants.java +++ b/src/main/java/org/codelibs/fess/Constants.java @@ -79,6 +79,10 @@ public class Constants extends CoreLibConstants { public static final long DEFAULT_CRAWLING_EXECUTION_INTERVAL = 5000L; + public static final String CRAWLING_USER_AGENT_PREFIX = "Mozilla/5.0 (compatible; Fess/"; + + public static final String CRAWLING_USER_AGENT_SUFFIX = "; +http://fess.codelibs.org/bot.html)"; + // fess properties public static final String USER_INFO_PROPERTY = "user.info"; diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java b/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java index 52bb5db23..3dee1a21f 100644 --- a/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java +++ b/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java @@ -173,7 +173,13 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig { final String userAgent = getUserAgent(); if (StringUtil.isNotBlank(userAgent)) { - paramMap.put(Client.USER_AGENT, userAgent); + if (userAgent.startsWith(Constants.CRAWLING_USER_AGENT_PREFIX) && userAgent.endsWith(Constants.CRAWLING_USER_AGENT_SUFFIX)) { + paramMap.put(Client.USER_AGENT, fessConfig.getUserAgentName()); + } else { + paramMap.put(Client.USER_AGENT, userAgent); + } + } else { + paramMap.put(Client.USER_AGENT, fessConfig.getUserAgentName()); } final List webAuthList = webAuthenticationService.getWebAuthenticationList(getId()); diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java index 3ea727119..4ccf47429 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java @@ -694,8 +694,8 @@ public interface FessProp { } default String getUserAgentName() { - return getSystemProperty(Constants.CRAWLING_USER_AGENT_PROPERTY, "Mozilla/5.0 (compatible; Fess/" - + ComponentUtil.getSystemHelper().getProductVersion() + "; +http://fess.codelibs.org/bot.html)"); + return getSystemProperty(Constants.CRAWLING_USER_AGENT_PROPERTY, Constants.CRAWLING_USER_AGENT_PREFIX + + ComponentUtil.getSystemHelper().getProductVersion() + Constants.CRAWLING_USER_AGENT_SUFFIX); } default void setLtrModelName(final String value) { diff --git a/src/main/java/org/codelibs/fess/util/ComponentUtil.java b/src/main/java/org/codelibs/fess/util/ComponentUtil.java index 2afcc36e1..b5427d5f9 100644 --- a/src/main/java/org/codelibs/fess/util/ComponentUtil.java +++ b/src/main/java/org/codelibs/fess/util/ComponentUtil.java @@ -88,6 +88,7 @@ import org.codelibs.fess.thumbnail.ThumbnailManager; import org.lastaflute.core.message.MessageManager; import org.lastaflute.core.security.PrimaryCipher; import org.lastaflute.di.core.SingletonLaContainer; +import org.lastaflute.di.core.exception.AutoBindingFailureException; import org.lastaflute.di.core.exception.ComponentNotFoundException; import org.lastaflute.di.core.factory.SingletonLaContainerFactory; import org.lastaflute.di.core.smart.hot.HotdeployUtil; @@ -520,6 +521,7 @@ public final class ComponentUtil { return getComponent(RANK_FUSION_PROCESSOR); } + @SuppressWarnings("unchecked") public static T getComponent(final Class clazz) { try { return SingletonLaContainer.getComponent(clazz); @@ -528,6 +530,11 @@ public final class ComponentUtil { throw new ContainerNotAvailableException(clazz.getCanonicalName(), e); } throw new ContainerNotAvailableException(clazz.getCanonicalName()); + } catch (final ComponentNotFoundException | AutoBindingFailureException e) { + if (componentMap.containsKey(clazz.getCanonicalName())) { + return (T) componentMap.get(clazz.getCanonicalName()); + } + throw e; } } @@ -540,7 +547,7 @@ public final class ComponentUtil { throw new ContainerNotAvailableException(componentName, e); } throw new ContainerNotAvailableException(componentName); - } catch (final ComponentNotFoundException e) { + } catch (final ComponentNotFoundException | AutoBindingFailureException e) { if (componentMap.containsKey(componentName)) { return (T) componentMap.get(componentName); } diff --git a/src/test/java/org/codelibs/fess/es/config/exentity/WebConfigTest.java b/src/test/java/org/codelibs/fess/es/config/exentity/WebConfigTest.java new file mode 100644 index 000000000..25dba4412 --- /dev/null +++ b/src/test/java/org/codelibs/fess/es/config/exentity/WebConfigTest.java @@ -0,0 +1,179 @@ +/* + * Copyright 2012-2023 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.es.config.exentity; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.codelibs.core.lang.StringUtil; +import org.codelibs.fess.Constants; +import org.codelibs.fess.app.service.RequestHeaderService; +import org.codelibs.fess.app.service.WebAuthenticationService; +import org.codelibs.fess.crawler.client.CrawlerClientFactory; +import org.codelibs.fess.crawler.client.http.Authentication; +import org.codelibs.fess.helper.SystemHelper; +import org.codelibs.fess.mylasta.direction.FessConfig; +import org.codelibs.fess.mylasta.direction.FessProp; +import org.codelibs.fess.unit.UnitFessTestCase; +import org.codelibs.fess.util.ComponentUtil; +import org.opensearch.common.SetOnce; + +public class WebConfigTest extends UnitFessTestCase { + + @Override + protected boolean isUseOneTimeContainer() { + return true; + } + + public void test_initializeClientFactory() { + final Map systemPropMap = new HashMap<>(); + FessProp.propMap.clear(); + FessConfig fessConfig = new FessConfig.SimpleImpl() { + @Override + public String getSystemProperty(final String key, final String defaultValue) { + return systemPropMap.getOrDefault(key, defaultValue); + } + + @Override + public boolean isCrawlerIgnoreRobotsTxt() { + return false; + } + + @Override + public String getHttpProxyHost() { + return StringUtil.EMPTY; + } + + @Override + public String getHttpProxyPort() { + return StringUtil.EMPTY; + } + }; + ComponentUtil.setFessConfig(fessConfig); + SystemHelper systemHelper = new SystemHelper() { + @Override + public String getProductVersion() { + return "98.76"; + } + }; + ComponentUtil.register(systemHelper, "systemHelper"); + WebAuthenticationService webAuthenticationService = new WebAuthenticationService() { + @Override + public List getWebAuthenticationList(final String webConfigId) { + return Collections.emptyList(); + } + }; + ComponentUtil.register(webAuthenticationService, WebAuthenticationService.class.getCanonicalName()); + RequestHeaderService requestHeaderService = new RequestHeaderService() { + @Override + public List getRequestHeaderList(final String webConfigId) { + return Collections.emptyList(); + } + }; + ComponentUtil.register(requestHeaderService, RequestHeaderService.class.getCanonicalName()); + + final SetOnce> initParamMapSet = new SetOnce<>(); + WebConfig webConfig = new WebConfig(); + webConfig.setUserAgent(Constants.CRAWLING_USER_AGENT_PREFIX + "1.0" + Constants.CRAWLING_USER_AGENT_SUFFIX); + CrawlerClientFactory crawlerClientFactory = webConfig.initializeClientFactory(() -> new CrawlerClientFactory() { + public void setInitParameterMap(final Map params) { + initParamMapSet.set(params); + } + }); + assertNotNull(crawlerClientFactory); + Map initParamMap = initParamMapSet.get(); + assertNotNull(initParamMap); + assertEquals(0, ((org.codelibs.fess.crawler.client.http.RequestHeader[]) initParamMap.get("requestHeaders")).length); + assertEquals("Mozilla/5.0 (compatible; Fess/98.76; +http://fess.codelibs.org/bot.html)", initParamMap.get("userAgent")); + assertEquals(0, ((Authentication[]) initParamMap.get("basicAuthentications")).length); + assertTrue(Boolean.valueOf(initParamMap.get("robotsTxtEnabled").toString()).booleanValue()); + } + + public void test_initializeClientFactoryWithConfigParameter() { + final Map systemPropMap = new HashMap<>(); + FessProp.propMap.clear(); + FessConfig fessConfig = new FessConfig.SimpleImpl() { + @Override + public String getSystemProperty(final String key, final String defaultValue) { + return systemPropMap.getOrDefault(key, defaultValue); + } + + @Override + public boolean isCrawlerIgnoreRobotsTxt() { + return false; + } + + @Override + public String getHttpProxyHost() { + return StringUtil.EMPTY; + } + + @Override + public String getHttpProxyPort() { + return StringUtil.EMPTY; + } + + @Override + public String getAppEncryptPropertyPattern() { + return ".*password|.*key|.*token|.*secret"; + } + }; + ComponentUtil.setFessConfig(fessConfig); + SystemHelper systemHelper = new SystemHelper() { + @Override + public String getProductVersion() { + return "98.76"; + } + }; + ComponentUtil.register(systemHelper, "systemHelper"); + WebAuthenticationService webAuthenticationService = new WebAuthenticationService() { + @Override + public List getWebAuthenticationList(final String webConfigId) { + return Collections.emptyList(); + } + }; + ComponentUtil.register(webAuthenticationService, WebAuthenticationService.class.getCanonicalName()); + RequestHeaderService requestHeaderService = new RequestHeaderService() { + @Override + public List getRequestHeaderList(final String webConfigId) { + return Collections.emptyList(); + } + }; + ComponentUtil.register(requestHeaderService, RequestHeaderService.class.getCanonicalName()); + + final SetOnce> initParamMapSet = new SetOnce<>(); + WebConfig webConfig = new WebConfig(); + final String userAgent = "TestAgent"; + webConfig.setUserAgent(userAgent); + webConfig.setConfigParameter(""" + client.robotsTxtEnabled=false + """); + CrawlerClientFactory crawlerClientFactory = webConfig.initializeClientFactory(() -> new CrawlerClientFactory() { + public void setInitParameterMap(final Map params) { + initParamMapSet.set(params); + } + }); + assertNotNull(crawlerClientFactory); + Map initParamMap = initParamMapSet.get(); + assertNotNull(initParamMap); + assertEquals(0, ((org.codelibs.fess.crawler.client.http.RequestHeader[]) initParamMap.get("requestHeaders")).length); + assertEquals(userAgent, initParamMap.get("userAgent")); + assertEquals(0, ((Authentication[]) initParamMap.get("basicAuthentications")).length); + assertFalse(Boolean.valueOf(initParamMap.get("robotsTxtEnabled").toString()).booleanValue()); + } +} diff --git a/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java b/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java index 9f62560f7..054235776 100644 --- a/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java +++ b/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java @@ -21,10 +21,14 @@ import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.Locale; +import java.util.Map; import org.codelibs.core.io.FileUtil; import org.codelibs.core.misc.DynamicProperties; +import org.codelibs.fess.Constants; +import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.unit.UnitFessTestCase; +import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.PrunedTag; import org.codelibs.nekohtml.parsers.DOMParser; import org.lastaflute.di.core.factory.SingletonLaContainerFactory; @@ -262,6 +266,30 @@ public class FessPropTest extends UnitFessTestCase { assertFalse(fessConfig.isValidUserCode("123456789?")); } + public void test_getUserAgentName() throws IOException { + final Map systemPropMap = new HashMap<>(); + FessProp.propMap.clear(); + FessConfig fessConfig = new FessConfig.SimpleImpl() { + @Override + public String getSystemProperty(final String key, final String defaultValue) { + return systemPropMap.getOrDefault(key, defaultValue); + } + }; + ComponentUtil.setFessConfig(fessConfig); + SystemHelper systemHelper = new SystemHelper() { + @Override + public String getProductVersion() { + return "98.76"; + } + }; + ComponentUtil.register(systemHelper, "systemHelper"); + + assertEquals("Mozilla/5.0 (compatible; Fess/98.76; +http://fess.codelibs.org/bot.html)", fessConfig.getUserAgentName()); + + systemPropMap.put(Constants.CRAWLING_USER_AGENT_PROPERTY, "TestAgent"); + assertEquals("TestAgent", fessConfig.getUserAgentName()); + } + private void assertArrays(final String[] expected, final String[] actual) { Arrays.sort(expected); Arrays.sort(actual);