Browse Source

fix #2819 add ProtocolHelper for URL protocol management and update related classes

Shinsuke Sugaya 1 year ago
parent
commit
1f01d21446

+ 86 - 0
src/main/java/org/codelibs/fess/helper/ProtocolHelper.java

@@ -0,0 +1,86 @@
+/*
+ * Copyright 2012-2024 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.helper;
+
+import static org.codelibs.core.stream.StreamUtil.split;
+import static org.codelibs.core.stream.StreamUtil.stream;
+
+import java.util.Arrays;
+
+import javax.annotation.PostConstruct;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.codelibs.core.lang.StringUtil;
+import org.codelibs.fess.mylasta.direction.FessConfig;
+import org.codelibs.fess.util.ComponentUtil;
+
+public class ProtocolHelper {
+    private static final Logger logger = LogManager.getLogger(ProtocolHelper.class);
+
+    protected String[] webProtocols = StringUtil.EMPTY_STRINGS;
+
+    protected String[] fileProtocols = StringUtil.EMPTY_STRINGS;
+
+    @PostConstruct
+    public void init() {
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        webProtocols = split(fessConfig.getCrawlerWebProtocols(), ",")
+                .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
+        fileProtocols = split(fessConfig.getCrawlerFileProtocols(), ",")
+                .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
+        if (logger.isDebugEnabled()) {
+            logger.debug("web protocols: {}", Arrays.toString(webProtocols));
+            logger.debug("file protocols: {}", Arrays.toString(fileProtocols));
+        }
+    }
+
+    public String[] getWebProtocols() {
+        return webProtocols;
+    }
+
+    public String[] getFileProtocols() {
+        return fileProtocols;
+    }
+
+    public boolean isValidWebProtocol(final String url) {
+        return stream(webProtocols).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
+    }
+
+    public boolean isValidFileProtocol(final String url) {
+        return stream(fileProtocols).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
+    }
+
+    public void addWebProtocol(final String protocol) {
+        final String prefix = protocol + ":";
+        if (stream(webProtocols).get(stream -> stream.anyMatch(s -> s.equals(prefix)))) {
+            logger.debug("web protocols contains {}.", protocol);
+            return;
+        }
+        webProtocols = Arrays.copyOf(webProtocols, webProtocols.length + 1);
+        webProtocols[webProtocols.length - 1] = prefix;
+    }
+
+    public void addFileProtocol(final String protocol) {
+        final String prefix = protocol + ":";
+        if (stream(fileProtocols).get(stream -> stream.anyMatch(s -> s.equals(prefix)))) {
+            logger.debug("file protocols contains {}.", protocol);
+            return;
+        }
+        fileProtocols = Arrays.copyOf(fileProtocols, fileProtocols.length + 1);
+        fileProtocols[fileProtocols.length - 1] = prefix;
+    }
+}

+ 3 - 2
src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java

@@ -93,6 +93,7 @@ public class WebFsIndexHelper {
 
 
         final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
         final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
         final FessConfig fessConfig = ComponentUtil.getFessConfig();
         final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        final ProtocolHelper protocolHelper = ComponentUtil.getProtocolHelper();
 
 
         final long startTime = systemHelper.getCurrentTimeAsLong();
         final long startTime = systemHelper.getCurrentTimeAsLong();
 
 
@@ -154,7 +155,7 @@ public class WebFsIndexHelper {
 
 
             // set urls
             // set urls
             split(urlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
             split(urlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
-                if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(urlValue)) {
+                if (!urlValue.startsWith("#") && protocolHelper.isValidWebProtocol(urlValue)) {
                     final String u = duplicateHostHelper.convert(urlValue);
                     final String u = duplicateHostHelper.convert(urlValue);
                     crawler.addUrl(u);
                     crawler.addUrl(u);
                     if (logger.isInfoEnabled()) {
                     if (logger.isInfoEnabled()) {
@@ -280,7 +281,7 @@ public class WebFsIndexHelper {
             split(pathsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
             split(pathsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
                 if (!urlValue.startsWith("#")) {
                 if (!urlValue.startsWith("#")) {
                     final String u;
                     final String u;
-                    if (!fessConfig.isValidCrawlerFileProtocol(urlValue)) {
+                    if (!protocolHelper.isValidFileProtocol(urlValue)) {
                         if (urlValue.startsWith("/")) {
                         if (urlValue.startsWith("/")) {
                             u = "file:" + urlValue;
                             u = "file:" + urlValue;
                         } else {
                         } else {

+ 4 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java

@@ -1190,22 +1190,26 @@ public interface FessProp {
 
 
     String getCrawlerWebProtocols();
     String getCrawlerWebProtocols();
 
 
+    @Deprecated
     default String[] getCrawlerWebProtocolsAsArray() {
     default String[] getCrawlerWebProtocolsAsArray() {
         return split(getCrawlerWebProtocols(), ",")
         return split(getCrawlerWebProtocols(), ",")
                 .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
                 .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
     }
     }
 
 
+    @Deprecated
     default boolean isValidCrawlerWebProtocol(final String url) {
     default boolean isValidCrawlerWebProtocol(final String url) {
         return stream(getCrawlerWebProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
         return stream(getCrawlerWebProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
     }
     }
 
 
     String getCrawlerFileProtocols();
     String getCrawlerFileProtocols();
 
 
+    @Deprecated
     default String[] getCrawlerFileProtocolsAsArray() {
     default String[] getCrawlerFileProtocolsAsArray() {
         return split(getCrawlerFileProtocols(), ",")
         return split(getCrawlerFileProtocols(), ",")
                 .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
                 .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
     }
     }
 
 
+    @Deprecated
     default boolean isValidCrawlerFileProtocol(final String url) {
     default boolean isValidCrawlerFileProtocol(final String url) {
         return stream(getCrawlerFileProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
         return stream(getCrawlerFileProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
     }
     }

+ 7 - 0
src/main/java/org/codelibs/fess/util/ComponentUtil.java

@@ -58,6 +58,7 @@ import org.codelibs.fess.helper.PermissionHelper;
 import org.codelibs.fess.helper.PluginHelper;
 import org.codelibs.fess.helper.PluginHelper;
 import org.codelibs.fess.helper.PopularWordHelper;
 import org.codelibs.fess.helper.PopularWordHelper;
 import org.codelibs.fess.helper.ProcessHelper;
 import org.codelibs.fess.helper.ProcessHelper;
+import org.codelibs.fess.helper.ProtocolHelper;
 import org.codelibs.fess.helper.QueryHelper;
 import org.codelibs.fess.helper.QueryHelper;
 import org.codelibs.fess.helper.RelatedContentHelper;
 import org.codelibs.fess.helper.RelatedContentHelper;
 import org.codelibs.fess.helper.RelatedQueryHelper;
 import org.codelibs.fess.helper.RelatedQueryHelper;
@@ -216,6 +217,8 @@ public final class ComponentUtil {
 
 
     private static final String RANK_FUSION_PROCESSOR = "rankFusionProcessor";
     private static final String RANK_FUSION_PROCESSOR = "rankFusionProcessor";
 
 
+    private static final String PROTOCOL_HELPER = "protocolHelper";
+
     private static IndexingHelper indexingHelper;
     private static IndexingHelper indexingHelper;
 
 
     private static CrawlingConfigHelper crawlingConfigHelper;
     private static CrawlingConfigHelper crawlingConfigHelper;
@@ -521,6 +524,10 @@ public final class ComponentUtil {
         return getComponent(RANK_FUSION_PROCESSOR);
         return getComponent(RANK_FUSION_PROCESSOR);
     }
     }
 
 
+    public static ProtocolHelper getProtocolHelper() {
+        return getComponent(PROTOCOL_HELPER);
+    }
+
     @SuppressWarnings("unchecked")
     @SuppressWarnings("unchecked")
     public static <T> T getComponent(final Class<T> clazz) {
     public static <T> T getComponent(final Class<T> clazz) {
         try {
         try {

+ 2 - 2
src/main/java/org/codelibs/fess/validation/UriTypeValidator.java

@@ -28,8 +28,8 @@ public class UriTypeValidator implements ConstraintValidator<UriType, String> {
     @Override
     @Override
     public void initialize(final UriType uriType) {
     public void initialize(final UriType uriType) {
         protocols = switch (uriType.protocolType()) {
         protocols = switch (uriType.protocolType()) {
-        case WEB -> ComponentUtil.getFessConfig().getCrawlerWebProtocolsAsArray();
-        case FILE -> ComponentUtil.getFessConfig().getCrawlerFileProtocolsAsArray();
+        case WEB -> ComponentUtil.getProtocolHelper().getWebProtocols();
+        case FILE -> ComponentUtil.getProtocolHelper().getFileProtocols();
         default -> throw new ConstraintDefinitionException("protocolType is emtpy.");
         default -> throw new ConstraintDefinitionException("protocolType is emtpy.");
         };
         };
     }
     }

+ 3 - 1
src/main/resources/fess.xml

@@ -36,9 +36,11 @@
 	</component>
 	</component>
 	<component name="pathMappingHelper" class="org.codelibs.fess.helper.PathMappingHelper">
 	<component name="pathMappingHelper" class="org.codelibs.fess.helper.PathMappingHelper">
 	</component>
 	</component>
+	<component name="permissionHelper" class="org.codelibs.fess.helper.PermissionHelper">
+	</component>
 	<component name="processHelper" class="org.codelibs.fess.helper.ProcessHelper">
 	<component name="processHelper" class="org.codelibs.fess.helper.ProcessHelper">
 	</component>
 	</component>
-	<component name="permissionHelper" class="org.codelibs.fess.helper.PermissionHelper">
+	<component name="protocolHelper" class="org.codelibs.fess.helper.ProtocolHelper">
 	</component>
 	</component>
 	<component name="sambaHelper" class="org.codelibs.fess.helper.SambaHelper">
 	<component name="sambaHelper" class="org.codelibs.fess.helper.SambaHelper">
 	</component>
 	</component>

+ 102 - 0
src/test/java/org/codelibs/fess/helper/ProtocolHelperTest.java

@@ -0,0 +1,102 @@
+/*
+ * Copyright 2012-2024 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.helper;
+
+import org.codelibs.fess.mylasta.direction.FessConfig;
+import org.codelibs.fess.unit.UnitFessTestCase;
+import org.codelibs.fess.util.ComponentUtil;
+
+public class ProtocolHelperTest extends UnitFessTestCase {
+    public void test_add_httpx() {
+        ComponentUtil.setFessConfig(new FessConfig.SimpleImpl() {
+            @Override
+            public String getCrawlerWebProtocols() {
+                return "http,https";
+            }
+
+            @Override
+            public String getCrawlerFileProtocols() {
+                return "file,smb";
+            }
+        });
+
+        final ProtocolHelper protocolHelper = new ProtocolHelper();
+        protocolHelper.init();
+        assertEquals(2, protocolHelper.getWebProtocols().length);
+        assertEquals("http:", protocolHelper.getWebProtocols()[0]);
+        assertEquals("https:", protocolHelper.getWebProtocols()[1]);
+        assertEquals(2, protocolHelper.getFileProtocols().length);
+        assertEquals("file:", protocolHelper.getFileProtocols()[0]);
+        assertEquals("smb:", protocolHelper.getFileProtocols()[1]);
+
+        assertFalse(protocolHelper.isValidWebProtocol("httpx://test"));
+
+        protocolHelper.addWebProtocol("httpx");
+        assertEquals(3, protocolHelper.getWebProtocols().length);
+        assertEquals("http:", protocolHelper.getWebProtocols()[0]);
+        assertEquals("https:", protocolHelper.getWebProtocols()[1]);
+        assertEquals("httpx:", protocolHelper.getWebProtocols()[2]);
+        assertEquals(2, protocolHelper.getFileProtocols().length);
+        assertEquals("file:", protocolHelper.getFileProtocols()[0]);
+        assertEquals("smb:", protocolHelper.getFileProtocols()[1]);
+
+        assertTrue(protocolHelper.isValidWebProtocol("httpx://test"));
+
+        protocolHelper.addWebProtocol("httpx");
+        assertEquals(3, protocolHelper.getWebProtocols().length);
+        assertEquals(2, protocolHelper.getFileProtocols().length);
+    }
+
+    public void test_add_smbx() {
+        ComponentUtil.setFessConfig(new FessConfig.SimpleImpl() {
+            @Override
+            public String getCrawlerWebProtocols() {
+                return "http,https";
+            }
+
+            @Override
+            public String getCrawlerFileProtocols() {
+                return "file,smb";
+            }
+        });
+
+        final ProtocolHelper protocolHelper = new ProtocolHelper();
+        protocolHelper.init();
+        assertEquals(2, protocolHelper.getWebProtocols().length);
+        assertEquals("http:", protocolHelper.getWebProtocols()[0]);
+        assertEquals("https:", protocolHelper.getWebProtocols()[1]);
+        assertEquals(2, protocolHelper.getFileProtocols().length);
+        assertEquals("file:", protocolHelper.getFileProtocols()[0]);
+        assertEquals("smb:", protocolHelper.getFileProtocols()[1]);
+
+        assertFalse(protocolHelper.isValidFileProtocol("smbx://test"));
+
+        protocolHelper.addFileProtocol("smbx");
+        assertEquals(2, protocolHelper.getWebProtocols().length);
+        assertEquals("http:", protocolHelper.getWebProtocols()[0]);
+        assertEquals("https:", protocolHelper.getWebProtocols()[1]);
+        assertEquals(3, protocolHelper.getFileProtocols().length);
+        assertEquals("file:", protocolHelper.getFileProtocols()[0]);
+        assertEquals("smb:", protocolHelper.getFileProtocols()[1]);
+        assertEquals("smbx:", protocolHelper.getFileProtocols()[2]);
+
+        assertTrue(protocolHelper.isValidFileProtocol("smbx://test"));
+
+        protocolHelper.addFileProtocol("smbx");
+        assertEquals(2, protocolHelper.getWebProtocols().length);
+        assertEquals(3, protocolHelper.getFileProtocols().length);
+    }
+}