Shinsuke Sugaya 9 лет назад
Родитель
Сommit
d444d1ba5b

+ 2 - 1
src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java

@@ -27,6 +27,7 @@ import org.codelibs.fess.app.web.CrudMode;
 import org.codelibs.fess.util.ComponentUtil;
 import org.codelibs.fess.util.StreamUtil;
 import org.codelibs.fess.validation.UriType;
+import org.codelibs.fess.validation.UriTypeValidator.ProtocolType;
 import org.lastaflute.web.validation.Required;
 import org.lastaflute.web.validation.theme.conversion.ValidateTypeFailure;
 
@@ -50,7 +51,7 @@ public class CreateForm implements Serializable {
     public String name;
 
     @Required
-    @UriType(protocols = { "file:", "smb:" })
+    @UriType(protocolType = ProtocolType.FILE)
     @Size(max = 4000)
     public String paths;
 

+ 2 - 1
src/main/java/org/codelibs/fess/app/web/admin/webconfig/CreateForm.java

@@ -27,6 +27,7 @@ import org.codelibs.fess.app.web.CrudMode;
 import org.codelibs.fess.util.ComponentUtil;
 import org.codelibs.fess.util.StreamUtil;
 import org.codelibs.fess.validation.UriType;
+import org.codelibs.fess.validation.UriTypeValidator.ProtocolType;
 import org.lastaflute.web.validation.Required;
 import org.lastaflute.web.validation.theme.conversion.ValidateTypeFailure;
 
@@ -51,7 +52,7 @@ public class CreateForm implements Serializable {
     public String name;
 
     @Required
-    @UriType(protocols = { "http:", "https:" })
+    @UriType(protocolType = ProtocolType.WEB)
     @Size(max = 4000)
     public String urls;
 

+ 4 - 2
src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java

@@ -39,6 +39,7 @@ import org.codelibs.fess.crawler.service.UrlQueueService;
 import org.codelibs.fess.es.config.exentity.FileConfig;
 import org.codelibs.fess.es.config.exentity.WebConfig;
 import org.codelibs.fess.indexer.IndexUpdater;
+import org.codelibs.fess.mylasta.direction.FessConfig;
 import org.codelibs.fess.util.ComponentUtil;
 import org.lastaflute.di.core.SingletonLaContainer;
 import org.slf4j.Logger;
@@ -134,6 +135,7 @@ public class WebFsIndexHelper implements Serializable {
         }
 
         final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
 
         final long startTime = System.currentTimeMillis();
 
@@ -184,7 +186,7 @@ public class WebFsIndexHelper implements Serializable {
             for (final String u : urls) {
                 if (StringUtil.isNotBlank(u)) {
                     final String urlValue = u.trim();
-                    if (!urlValue.startsWith("#")) {
+                    if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(u)) {
                         crawler.addUrl(urlValue);
                         if (logger.isInfoEnabled()) {
                             logger.info("Target URL: " + urlValue);
@@ -288,7 +290,7 @@ public class WebFsIndexHelper implements Serializable {
                 if (StringUtil.isNotBlank(u)) {
                     u = u.trim();
                     if (!u.startsWith("#")) {
-                        if (!u.startsWith("file:") && !u.startsWith("smb:")) {
+                        if (!fessConfig.isValidCrawlerFileProtocol(u)) {
                             if (u.startsWith("/")) {
                                 u = "file:" + u;
                             } else {

+ 28 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -126,6 +126,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. UTF-8 */
     String CRAWLER_CRAWLING_DATA_ENCODING = "crawler.crawling.data.encoding";
 
+    /** The key of the configuration. e.g. http,https */
+    String CRAWLER_WEB_PROTOCOLS = "crawler.web.protocols";
+
+    /** The key of the configuration. e.g. file,smb */
+    String CRAWLER_FILE_PROTOCOLS = "crawler.file.protocols";
+
     /** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.* */
     String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes";
 
@@ -956,6 +962,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
     String getCrawlerCrawlingDataEncoding();
 
+    /**
+     * Get the value for the key 'crawler.web.protocols'. <br>
+     * The value is, e.g. http,https <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getCrawlerWebProtocols();
+
+    /**
+     * Get the value for the key 'crawler.file.protocols'. <br>
+     * The value is, e.g. file,smb <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getCrawlerFileProtocols();
+
     /**
      * Get the value for the key 'crawler.metadata.content.excludes'. <br>
      * The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.* <br>
@@ -2854,6 +2874,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return get(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING);
         }
 
+        public String getCrawlerWebProtocols() {
+            return get(FessConfig.CRAWLER_WEB_PROTOCOLS);
+        }
+
+        public String getCrawlerFileProtocols() {
+            return get(FessConfig.CRAWLER_FILE_PROTOCOLS);
+        }
+
         public String getCrawlerMetadataContentExcludes() {
             return get(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES);
         }

+ 21 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java

@@ -503,4 +503,25 @@ public interface FessProp {
         return StreamUtil.of(getAuthenticationAdminUsers().split(",")).anyMatch(s -> s.equals(username));
     }
 
+    String getCrawlerWebProtocols();
+
+    public default String[] getCrawlerWebProtocolsAsArray() {
+        return StreamUtil.of(getCrawlerWebProtocols().split(",")).filter(s -> StringUtil.isNotBlank(s)).map(s -> s.trim() + ":")
+                .toArray(n -> new String[n]);
+    }
+
+    public default boolean isValidCrawlerWebProtocol(final String url) {
+        return StreamUtil.of(getCrawlerWebProtocolsAsArray()).anyMatch(s -> url.startsWith(s));
+    }
+
+    String getCrawlerFileProtocols();
+
+    public default String[] getCrawlerFileProtocolsAsArray() {
+        return StreamUtil.of(getCrawlerFileProtocols().split(",")).filter(s -> StringUtil.isNotBlank(s)).map(s -> s.trim() + ":")
+                .toArray(n -> new String[n]);
+    }
+
+    public default boolean isValidCrawlerFileProtocol(final String url) {
+        return StreamUtil.of(getCrawlerFileProtocolsAsArray()).anyMatch(s -> url.startsWith(s));
+    }
 }

+ 3 - 1
src/main/java/org/codelibs/fess/validation/UriType.java

@@ -29,13 +29,15 @@ import java.lang.annotation.Target;
 import javax.validation.Constraint;
 import javax.validation.Payload;
 
+import org.codelibs.fess.validation.UriTypeValidator.ProtocolType;
+
 @Target({ METHOD, FIELD, ANNOTATION_TYPE, CONSTRUCTOR, PARAMETER })
 @Retention(RUNTIME)
 @Documented
 @Constraint(validatedBy = UriTypeValidator.class)
 public @interface UriType {
 
-    String[] protocols();
+    ProtocolType protocolType();
 
     String message() default "{org.lastaflute.validator.constraints.UriType.message}";
 

+ 14 - 3
src/main/java/org/codelibs/fess/validation/UriTypeValidator.java

@@ -20,15 +20,22 @@ import javax.validation.ConstraintValidator;
 import javax.validation.ConstraintValidatorContext;
 
 import org.codelibs.core.lang.StringUtil;
+import org.codelibs.fess.util.ComponentUtil;
 
 public class UriTypeValidator implements ConstraintValidator<UriType, String> {
     private String[] protocols;
 
     @Override
     public void initialize(final UriType uriType) {
-        protocols = uriType.protocols();
-        if (protocols == null || protocols.length == 0) {
-            throw new ConstraintDefinitionException("protocols is emtpy.");
+        switch (uriType.protocolType()) {
+        case WEB:
+            protocols = ComponentUtil.getFessConfig().getCrawlerWebProtocolsAsArray();
+            break;
+        case FILE:
+            protocols = ComponentUtil.getFessConfig().getCrawlerFileProtocolsAsArray();
+            break;
+        default:
+            throw new ConstraintDefinitionException("protocolType is emtpy.");
         }
     }
 
@@ -58,4 +65,8 @@ public class UriTypeValidator implements ConstraintValidator<UriType, String> {
         }
         return true;
     }
+
+    public enum ProtocolType {
+        WEB, FILE;
+    }
 }

+ 2 - 0
src/main/resources/fess_config.properties

@@ -74,6 +74,8 @@ crawler.document.unknown.hostname=unknown
 crawler.document.use.site.encoding.on.english=false
 crawler.document.append.data=true
 crawler.crawling.data.encoding=UTF-8
+crawler.web.protocols=http,https
+crawler.file.protocols=file,smb
 crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*
 crawler.metadata.name.mapping=\
 title=title:string\n\