diff --git a/src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java b/src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java
index 68fa1c2ab..99b6cc92d 100644
--- a/src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java
+++ b/src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java
@@ -27,6 +27,7 @@ import org.codelibs.fess.app.web.CrudMode;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.StreamUtil;
import org.codelibs.fess.validation.UriType;
+import org.codelibs.fess.validation.UriTypeValidator.ProtocolType;
import org.lastaflute.web.validation.Required;
import org.lastaflute.web.validation.theme.conversion.ValidateTypeFailure;
@@ -50,7 +51,7 @@ public class CreateForm implements Serializable {
public String name;
@Required
- @UriType(protocols = { "file:", "smb:" })
+ @UriType(protocolType = ProtocolType.FILE)
@Size(max = 4000)
public String paths;
diff --git a/src/main/java/org/codelibs/fess/app/web/admin/webconfig/CreateForm.java b/src/main/java/org/codelibs/fess/app/web/admin/webconfig/CreateForm.java
index 65ab22814..3279a68a4 100644
--- a/src/main/java/org/codelibs/fess/app/web/admin/webconfig/CreateForm.java
+++ b/src/main/java/org/codelibs/fess/app/web/admin/webconfig/CreateForm.java
@@ -27,6 +27,7 @@ import org.codelibs.fess.app.web.CrudMode;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.StreamUtil;
import org.codelibs.fess.validation.UriType;
+import org.codelibs.fess.validation.UriTypeValidator.ProtocolType;
import org.lastaflute.web.validation.Required;
import org.lastaflute.web.validation.theme.conversion.ValidateTypeFailure;
@@ -51,7 +52,7 @@ public class CreateForm implements Serializable {
public String name;
@Required
- @UriType(protocols = { "http:", "https:" })
+ @UriType(protocolType = ProtocolType.WEB)
@Size(max = 4000)
public String urls;
diff --git a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
index 777f9acfa..9daa56f0f 100644
--- a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
+++ b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
@@ -39,6 +39,7 @@ import org.codelibs.fess.crawler.service.UrlQueueService;
import org.codelibs.fess.es.config.exentity.FileConfig;
import org.codelibs.fess.es.config.exentity.WebConfig;
import org.codelibs.fess.indexer.IndexUpdater;
+import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.lastaflute.di.core.SingletonLaContainer;
import org.slf4j.Logger;
@@ -134,6 +135,7 @@ public class WebFsIndexHelper implements Serializable {
}
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
+ final FessConfig fessConfig = ComponentUtil.getFessConfig();
final long startTime = System.currentTimeMillis();
@@ -184,7 +186,7 @@ public class WebFsIndexHelper implements Serializable {
for (final String u : urls) {
if (StringUtil.isNotBlank(u)) {
final String urlValue = u.trim();
- if (!urlValue.startsWith("#")) {
+ if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(u)) {
crawler.addUrl(urlValue);
if (logger.isInfoEnabled()) {
logger.info("Target URL: " + urlValue);
@@ -288,7 +290,7 @@ public class WebFsIndexHelper implements Serializable {
if (StringUtil.isNotBlank(u)) {
u = u.trim();
if (!u.startsWith("#")) {
- if (!u.startsWith("file:") && !u.startsWith("smb:")) {
+ if (!fessConfig.isValidCrawlerFileProtocol(u)) {
if (u.startsWith("/")) {
u = "file:" + u;
} else {
diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java
index 00c44f937..8f1fd2418 100644
--- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java
+++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java
@@ -126,6 +126,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. UTF-8 */
String CRAWLER_CRAWLING_DATA_ENCODING = "crawler.crawling.data.encoding";
+ /** The key of the configuration. e.g. http,https */
+ String CRAWLER_WEB_PROTOCOLS = "crawler.web.protocols";
+
+ /** The key of the configuration. e.g. file,smb */
+ String CRAWLER_FILE_PROTOCOLS = "crawler.file.protocols";
+
/** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.* */
String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes";
@@ -956,6 +962,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
String getCrawlerCrawlingDataEncoding();
+ /**
+ * Get the value for the key 'crawler.web.protocols'.
+ * The value is, e.g. http,https
+ * @return The value of found property. (NotNull: if not found, exception but basically no way)
+ */
+ String getCrawlerWebProtocols();
+
+ /**
+ * Get the value for the key 'crawler.file.protocols'.
+ * The value is, e.g. file,smb
+ * @return The value of found property. (NotNull: if not found, exception but basically no way)
+ */
+ String getCrawlerFileProtocols();
+
/**
* Get the value for the key 'crawler.metadata.content.excludes'.
* The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*
@@ -2854,6 +2874,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return get(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING);
}
+ public String getCrawlerWebProtocols() {
+ return get(FessConfig.CRAWLER_WEB_PROTOCOLS);
+ }
+
+ public String getCrawlerFileProtocols() {
+ return get(FessConfig.CRAWLER_FILE_PROTOCOLS);
+ }
+
public String getCrawlerMetadataContentExcludes() {
return get(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES);
}
diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java
index 7c007892f..f8e02e3ef 100644
--- a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java
+++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java
@@ -503,4 +503,25 @@ public interface FessProp {
return StreamUtil.of(getAuthenticationAdminUsers().split(",")).anyMatch(s -> s.equals(username));
}
+ String getCrawlerWebProtocols();
+
+ public default String[] getCrawlerWebProtocolsAsArray() {
+ return StreamUtil.of(getCrawlerWebProtocols().split(",")).filter(s -> StringUtil.isNotBlank(s)).map(s -> s.trim() + ":")
+ .toArray(n -> new String[n]);
+ }
+
+ public default boolean isValidCrawlerWebProtocol(final String url) {
+ return StreamUtil.of(getCrawlerWebProtocolsAsArray()).anyMatch(s -> url.startsWith(s));
+ }
+
+ String getCrawlerFileProtocols();
+
+ public default String[] getCrawlerFileProtocolsAsArray() {
+ return StreamUtil.of(getCrawlerFileProtocols().split(",")).filter(s -> StringUtil.isNotBlank(s)).map(s -> s.trim() + ":")
+ .toArray(n -> new String[n]);
+ }
+
+ public default boolean isValidCrawlerFileProtocol(final String url) {
+ return StreamUtil.of(getCrawlerFileProtocolsAsArray()).anyMatch(s -> url.startsWith(s));
+ }
}
diff --git a/src/main/java/org/codelibs/fess/validation/UriType.java b/src/main/java/org/codelibs/fess/validation/UriType.java
index a5f69c462..84a490d75 100644
--- a/src/main/java/org/codelibs/fess/validation/UriType.java
+++ b/src/main/java/org/codelibs/fess/validation/UriType.java
@@ -29,13 +29,15 @@ import java.lang.annotation.Target;
import javax.validation.Constraint;
import javax.validation.Payload;
+import org.codelibs.fess.validation.UriTypeValidator.ProtocolType;
+
@Target({ METHOD, FIELD, ANNOTATION_TYPE, CONSTRUCTOR, PARAMETER })
@Retention(RUNTIME)
@Documented
@Constraint(validatedBy = UriTypeValidator.class)
public @interface UriType {
- String[] protocols();
+ ProtocolType protocolType();
String message() default "{org.lastaflute.validator.constraints.UriType.message}";
diff --git a/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java b/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java
index 3f4644ec7..be4590c4b 100644
--- a/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java
+++ b/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java
@@ -20,15 +20,22 @@ import javax.validation.ConstraintValidator;
import javax.validation.ConstraintValidatorContext;
import org.codelibs.core.lang.StringUtil;
+import org.codelibs.fess.util.ComponentUtil;
public class UriTypeValidator implements ConstraintValidator {
private String[] protocols;
@Override
public void initialize(final UriType uriType) {
- protocols = uriType.protocols();
- if (protocols == null || protocols.length == 0) {
- throw new ConstraintDefinitionException("protocols is emtpy.");
+ switch (uriType.protocolType()) {
+ case WEB:
+ protocols = ComponentUtil.getFessConfig().getCrawlerWebProtocolsAsArray();
+ break;
+ case FILE:
+ protocols = ComponentUtil.getFessConfig().getCrawlerFileProtocolsAsArray();
+ break;
+ default:
+ throw new ConstraintDefinitionException("protocolType is emtpy.");
}
}
@@ -58,4 +65,8 @@ public class UriTypeValidator implements ConstraintValidator {
}
return true;
}
+
+ public enum ProtocolType {
+ WEB, FILE;
+ }
}
diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties
index c99c3f270..e8bcb5d48 100644
--- a/src/main/resources/fess_config.properties
+++ b/src/main/resources/fess_config.properties
@@ -74,6 +74,8 @@ crawler.document.unknown.hostname=unknown
crawler.document.use.site.encoding.on.english=false
crawler.document.append.data=true
crawler.crawling.data.encoding=UTF-8
+crawler.web.protocols=http,https
+crawler.file.protocols=file,smb
crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*
crawler.metadata.name.mapping=\
title=title:string\n\