fix #2819 add ProtocolHelper for URL protocol management and update related classes

This commit is contained in:
Shinsuke Sugaya 2024-06-17 22:35:51 +09:00
parent e097d9a20b
commit 1f01d21446
7 changed files with 207 additions and 5 deletions

View file

@ -0,0 +1,86 @@
/*
* Copyright 2012-2024 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.helper;
import static org.codelibs.core.stream.StreamUtil.split;
import static org.codelibs.core.stream.StreamUtil.stream;
import java.util.Arrays;
import javax.annotation.PostConstruct;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
public class ProtocolHelper {
private static final Logger logger = LogManager.getLogger(ProtocolHelper.class);
protected String[] webProtocols = StringUtil.EMPTY_STRINGS;
protected String[] fileProtocols = StringUtil.EMPTY_STRINGS;
@PostConstruct
public void init() {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
webProtocols = split(fessConfig.getCrawlerWebProtocols(), ",")
.get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
fileProtocols = split(fessConfig.getCrawlerFileProtocols(), ",")
.get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
if (logger.isDebugEnabled()) {
logger.debug("web protocols: {}", Arrays.toString(webProtocols));
logger.debug("file protocols: {}", Arrays.toString(fileProtocols));
}
}
public String[] getWebProtocols() {
return webProtocols;
}
public String[] getFileProtocols() {
return fileProtocols;
}
public boolean isValidWebProtocol(final String url) {
return stream(webProtocols).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
}
public boolean isValidFileProtocol(final String url) {
return stream(fileProtocols).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
}
public void addWebProtocol(final String protocol) {
final String prefix = protocol + ":";
if (stream(webProtocols).get(stream -> stream.anyMatch(s -> s.equals(prefix)))) {
logger.debug("web protocols contains {}.", protocol);
return;
}
webProtocols = Arrays.copyOf(webProtocols, webProtocols.length + 1);
webProtocols[webProtocols.length - 1] = prefix;
}
public void addFileProtocol(final String protocol) {
final String prefix = protocol + ":";
if (stream(fileProtocols).get(stream -> stream.anyMatch(s -> s.equals(prefix)))) {
logger.debug("file protocols contains {}.", protocol);
return;
}
fileProtocols = Arrays.copyOf(fileProtocols, fileProtocols.length + 1);
fileProtocols[fileProtocols.length - 1] = prefix;
}
}

View file

@ -93,6 +93,7 @@ public class WebFsIndexHelper {
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final ProtocolHelper protocolHelper = ComponentUtil.getProtocolHelper();
final long startTime = systemHelper.getCurrentTimeAsLong();
@ -154,7 +155,7 @@ public class WebFsIndexHelper {
// set urls
split(urlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(urlValue)) {
if (!urlValue.startsWith("#") && protocolHelper.isValidWebProtocol(urlValue)) {
final String u = duplicateHostHelper.convert(urlValue);
crawler.addUrl(u);
if (logger.isInfoEnabled()) {
@ -280,7 +281,7 @@ public class WebFsIndexHelper {
split(pathsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
if (!urlValue.startsWith("#")) {
final String u;
if (!fessConfig.isValidCrawlerFileProtocol(urlValue)) {
if (!protocolHelper.isValidFileProtocol(urlValue)) {
if (urlValue.startsWith("/")) {
u = "file:" + urlValue;
} else {

View file

@ -1190,22 +1190,26 @@ public interface FessProp {
String getCrawlerWebProtocols();
@Deprecated
default String[] getCrawlerWebProtocolsAsArray() {
return split(getCrawlerWebProtocols(), ",")
.get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
}
@Deprecated
default boolean isValidCrawlerWebProtocol(final String url) {
return stream(getCrawlerWebProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
}
String getCrawlerFileProtocols();
@Deprecated
default String[] getCrawlerFileProtocolsAsArray() {
return split(getCrawlerFileProtocols(), ",")
.get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
}
@Deprecated
default boolean isValidCrawlerFileProtocol(final String url) {
return stream(getCrawlerFileProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
}

View file

@ -58,6 +58,7 @@ import org.codelibs.fess.helper.PermissionHelper;
import org.codelibs.fess.helper.PluginHelper;
import org.codelibs.fess.helper.PopularWordHelper;
import org.codelibs.fess.helper.ProcessHelper;
import org.codelibs.fess.helper.ProtocolHelper;
import org.codelibs.fess.helper.QueryHelper;
import org.codelibs.fess.helper.RelatedContentHelper;
import org.codelibs.fess.helper.RelatedQueryHelper;
@ -216,6 +217,8 @@ public final class ComponentUtil {
private static final String RANK_FUSION_PROCESSOR = "rankFusionProcessor";
private static final String PROTOCOL_HELPER = "protocolHelper";
private static IndexingHelper indexingHelper;
private static CrawlingConfigHelper crawlingConfigHelper;
@ -521,6 +524,10 @@ public final class ComponentUtil {
return getComponent(RANK_FUSION_PROCESSOR);
}
public static ProtocolHelper getProtocolHelper() {
return getComponent(PROTOCOL_HELPER);
}
@SuppressWarnings("unchecked")
public static <T> T getComponent(final Class<T> clazz) {
try {

View file

@ -28,8 +28,8 @@ public class UriTypeValidator implements ConstraintValidator<UriType, String> {
@Override
public void initialize(final UriType uriType) {
protocols = switch (uriType.protocolType()) {
case WEB -> ComponentUtil.getFessConfig().getCrawlerWebProtocolsAsArray();
case FILE -> ComponentUtil.getFessConfig().getCrawlerFileProtocolsAsArray();
case WEB -> ComponentUtil.getProtocolHelper().getWebProtocols();
case FILE -> ComponentUtil.getProtocolHelper().getFileProtocols();
default -> throw new ConstraintDefinitionException("protocolType is emtpy.");
};
}

View file

@ -36,9 +36,11 @@
</component>
<component name="pathMappingHelper" class="org.codelibs.fess.helper.PathMappingHelper">
</component>
<component name="permissionHelper" class="org.codelibs.fess.helper.PermissionHelper">
</component>
<component name="processHelper" class="org.codelibs.fess.helper.ProcessHelper">
</component>
<component name="permissionHelper" class="org.codelibs.fess.helper.PermissionHelper">
<component name="protocolHelper" class="org.codelibs.fess.helper.ProtocolHelper">
</component>
<component name="sambaHelper" class="org.codelibs.fess.helper.SambaHelper">
</component>

View file

@ -0,0 +1,102 @@
/*
* Copyright 2012-2024 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.helper;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.unit.UnitFessTestCase;
import org.codelibs.fess.util.ComponentUtil;
public class ProtocolHelperTest extends UnitFessTestCase {
public void test_add_httpx() {
ComponentUtil.setFessConfig(new FessConfig.SimpleImpl() {
@Override
public String getCrawlerWebProtocols() {
return "http,https";
}
@Override
public String getCrawlerFileProtocols() {
return "file,smb";
}
});
final ProtocolHelper protocolHelper = new ProtocolHelper();
protocolHelper.init();
assertEquals(2, protocolHelper.getWebProtocols().length);
assertEquals("http:", protocolHelper.getWebProtocols()[0]);
assertEquals("https:", protocolHelper.getWebProtocols()[1]);
assertEquals(2, protocolHelper.getFileProtocols().length);
assertEquals("file:", protocolHelper.getFileProtocols()[0]);
assertEquals("smb:", protocolHelper.getFileProtocols()[1]);
assertFalse(protocolHelper.isValidWebProtocol("httpx://test"));
protocolHelper.addWebProtocol("httpx");
assertEquals(3, protocolHelper.getWebProtocols().length);
assertEquals("http:", protocolHelper.getWebProtocols()[0]);
assertEquals("https:", protocolHelper.getWebProtocols()[1]);
assertEquals("httpx:", protocolHelper.getWebProtocols()[2]);
assertEquals(2, protocolHelper.getFileProtocols().length);
assertEquals("file:", protocolHelper.getFileProtocols()[0]);
assertEquals("smb:", protocolHelper.getFileProtocols()[1]);
assertTrue(protocolHelper.isValidWebProtocol("httpx://test"));
protocolHelper.addWebProtocol("httpx");
assertEquals(3, protocolHelper.getWebProtocols().length);
assertEquals(2, protocolHelper.getFileProtocols().length);
}
public void test_add_smbx() {
ComponentUtil.setFessConfig(new FessConfig.SimpleImpl() {
@Override
public String getCrawlerWebProtocols() {
return "http,https";
}
@Override
public String getCrawlerFileProtocols() {
return "file,smb";
}
});
final ProtocolHelper protocolHelper = new ProtocolHelper();
protocolHelper.init();
assertEquals(2, protocolHelper.getWebProtocols().length);
assertEquals("http:", protocolHelper.getWebProtocols()[0]);
assertEquals("https:", protocolHelper.getWebProtocols()[1]);
assertEquals(2, protocolHelper.getFileProtocols().length);
assertEquals("file:", protocolHelper.getFileProtocols()[0]);
assertEquals("smb:", protocolHelper.getFileProtocols()[1]);
assertFalse(protocolHelper.isValidFileProtocol("smbx://test"));
protocolHelper.addFileProtocol("smbx");
assertEquals(2, protocolHelper.getWebProtocols().length);
assertEquals("http:", protocolHelper.getWebProtocols()[0]);
assertEquals("https:", protocolHelper.getWebProtocols()[1]);
assertEquals(3, protocolHelper.getFileProtocols().length);
assertEquals("file:", protocolHelper.getFileProtocols()[0]);
assertEquals("smb:", protocolHelper.getFileProtocols()[1]);
assertEquals("smbx:", protocolHelper.getFileProtocols()[2]);
assertTrue(protocolHelper.isValidFileProtocol("smbx://test"));
protocolHelper.addFileProtocol("smbx");
assertEquals(2, protocolHelper.getWebProtocols().length);
assertEquals(3, protocolHelper.getFileProtocols().length);
}
}