diff --git a/src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java b/src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java index 68fa1c2ab..99b6cc92d 100644 --- a/src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java +++ b/src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java @@ -27,6 +27,7 @@ import org.codelibs.fess.app.web.CrudMode; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.StreamUtil; import org.codelibs.fess.validation.UriType; +import org.codelibs.fess.validation.UriTypeValidator.ProtocolType; import org.lastaflute.web.validation.Required; import org.lastaflute.web.validation.theme.conversion.ValidateTypeFailure; @@ -50,7 +51,7 @@ public class CreateForm implements Serializable { public String name; @Required - @UriType(protocols = { "file:", "smb:" }) + @UriType(protocolType = ProtocolType.FILE) @Size(max = 4000) public String paths; diff --git a/src/main/java/org/codelibs/fess/app/web/admin/webconfig/CreateForm.java b/src/main/java/org/codelibs/fess/app/web/admin/webconfig/CreateForm.java index 65ab22814..3279a68a4 100644 --- a/src/main/java/org/codelibs/fess/app/web/admin/webconfig/CreateForm.java +++ b/src/main/java/org/codelibs/fess/app/web/admin/webconfig/CreateForm.java @@ -27,6 +27,7 @@ import org.codelibs.fess.app.web.CrudMode; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.StreamUtil; import org.codelibs.fess.validation.UriType; +import org.codelibs.fess.validation.UriTypeValidator.ProtocolType; import org.lastaflute.web.validation.Required; import org.lastaflute.web.validation.theme.conversion.ValidateTypeFailure; @@ -51,7 +52,7 @@ public class CreateForm implements Serializable { public String name; @Required - @UriType(protocols = { "http:", "https:" }) + @UriType(protocolType = ProtocolType.WEB) @Size(max = 4000) public String urls; diff --git a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java index 777f9acfa..9daa56f0f 100644 --- a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java +++ b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java @@ -39,6 +39,7 @@ import org.codelibs.fess.crawler.service.UrlQueueService; import org.codelibs.fess.es.config.exentity.FileConfig; import org.codelibs.fess.es.config.exentity.WebConfig; import org.codelibs.fess.indexer.IndexUpdater; +import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; @@ -134,6 +135,7 @@ public class WebFsIndexHelper implements Serializable { } final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); + final FessConfig fessConfig = ComponentUtil.getFessConfig(); final long startTime = System.currentTimeMillis(); @@ -184,7 +186,7 @@ public class WebFsIndexHelper implements Serializable { for (final String u : urls) { if (StringUtil.isNotBlank(u)) { final String urlValue = u.trim(); - if (!urlValue.startsWith("#")) { + if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(u)) { crawler.addUrl(urlValue); if (logger.isInfoEnabled()) { logger.info("Target URL: " + urlValue); @@ -288,7 +290,7 @@ public class WebFsIndexHelper implements Serializable { if (StringUtil.isNotBlank(u)) { u = u.trim(); if (!u.startsWith("#")) { - if (!u.startsWith("file:") && !u.startsWith("smb:")) { + if (!fessConfig.isValidCrawlerFileProtocol(u)) { if (u.startsWith("/")) { u = "file:" + u; } else { diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index 00c44f937..8f1fd2418 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -126,6 +126,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. UTF-8 */ String CRAWLER_CRAWLING_DATA_ENCODING = "crawler.crawling.data.encoding"; + /** The key of the configuration. e.g. http,https */ + String CRAWLER_WEB_PROTOCOLS = "crawler.web.protocols"; + + /** The key of the configuration. e.g. file,smb */ + String CRAWLER_FILE_PROTOCOLS = "crawler.file.protocols"; + /** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.* */ String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes"; @@ -956,6 +962,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ String getCrawlerCrawlingDataEncoding(); + /** + * Get the value for the key 'crawler.web.protocols'.
+ * The value is, e.g. http,https
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getCrawlerWebProtocols(); + + /** + * Get the value for the key 'crawler.file.protocols'.
+ * The value is, e.g. file,smb
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getCrawlerFileProtocols(); + /** * Get the value for the key 'crawler.metadata.content.excludes'.
* The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*
@@ -2854,6 +2874,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return get(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING); } + public String getCrawlerWebProtocols() { + return get(FessConfig.CRAWLER_WEB_PROTOCOLS); + } + + public String getCrawlerFileProtocols() { + return get(FessConfig.CRAWLER_FILE_PROTOCOLS); + } + public String getCrawlerMetadataContentExcludes() { return get(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES); } diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java index 7c007892f..f8e02e3ef 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java @@ -503,4 +503,25 @@ public interface FessProp { return StreamUtil.of(getAuthenticationAdminUsers().split(",")).anyMatch(s -> s.equals(username)); } + String getCrawlerWebProtocols(); + + public default String[] getCrawlerWebProtocolsAsArray() { + return StreamUtil.of(getCrawlerWebProtocols().split(",")).filter(s -> StringUtil.isNotBlank(s)).map(s -> s.trim() + ":") + .toArray(n -> new String[n]); + } + + public default boolean isValidCrawlerWebProtocol(final String url) { + return StreamUtil.of(getCrawlerWebProtocolsAsArray()).anyMatch(s -> url.startsWith(s)); + } + + String getCrawlerFileProtocols(); + + public default String[] getCrawlerFileProtocolsAsArray() { + return StreamUtil.of(getCrawlerFileProtocols().split(",")).filter(s -> StringUtil.isNotBlank(s)).map(s -> s.trim() + ":") + .toArray(n -> new String[n]); + } + + public default boolean isValidCrawlerFileProtocol(final String url) { + return StreamUtil.of(getCrawlerFileProtocolsAsArray()).anyMatch(s -> url.startsWith(s)); + } } diff --git a/src/main/java/org/codelibs/fess/validation/UriType.java b/src/main/java/org/codelibs/fess/validation/UriType.java index a5f69c462..84a490d75 100644 --- a/src/main/java/org/codelibs/fess/validation/UriType.java +++ b/src/main/java/org/codelibs/fess/validation/UriType.java @@ -29,13 +29,15 @@ import java.lang.annotation.Target; import javax.validation.Constraint; import javax.validation.Payload; +import org.codelibs.fess.validation.UriTypeValidator.ProtocolType; + @Target({ METHOD, FIELD, ANNOTATION_TYPE, CONSTRUCTOR, PARAMETER }) @Retention(RUNTIME) @Documented @Constraint(validatedBy = UriTypeValidator.class) public @interface UriType { - String[] protocols(); + ProtocolType protocolType(); String message() default "{org.lastaflute.validator.constraints.UriType.message}"; diff --git a/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java b/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java index 3f4644ec7..be4590c4b 100644 --- a/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java +++ b/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java @@ -20,15 +20,22 @@ import javax.validation.ConstraintValidator; import javax.validation.ConstraintValidatorContext; import org.codelibs.core.lang.StringUtil; +import org.codelibs.fess.util.ComponentUtil; public class UriTypeValidator implements ConstraintValidator { private String[] protocols; @Override public void initialize(final UriType uriType) { - protocols = uriType.protocols(); - if (protocols == null || protocols.length == 0) { - throw new ConstraintDefinitionException("protocols is emtpy."); + switch (uriType.protocolType()) { + case WEB: + protocols = ComponentUtil.getFessConfig().getCrawlerWebProtocolsAsArray(); + break; + case FILE: + protocols = ComponentUtil.getFessConfig().getCrawlerFileProtocolsAsArray(); + break; + default: + throw new ConstraintDefinitionException("protocolType is emtpy."); } } @@ -58,4 +65,8 @@ public class UriTypeValidator implements ConstraintValidator { } return true; } + + public enum ProtocolType { + WEB, FILE; + } } diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index c99c3f270..e8bcb5d48 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -74,6 +74,8 @@ crawler.document.unknown.hostname=unknown crawler.document.use.site.encoding.on.english=false crawler.document.append.data=true crawler.crawling.data.encoding=UTF-8 +crawler.web.protocols=http,https +crawler.file.protocols=file,smb crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.* crawler.metadata.name.mapping=\ title=title:string\n\