Browse Source

Merge branch 'master' into 10.3.x

Shinsuke Sugaya 8 years ago
parent
commit
d1ae7712a9

+ 2 - 6
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -140,9 +140,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. false */
     /** The key of the configuration. e.g. false */
     String CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED = "crawler.document.duplicate.term.removed";
     String CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED = "crawler.document.duplicate.term.removed";
 
 
-    /** The key of the configuration. e.g. 	
-    
-       ᠎           ​   �¶ */
+    /** The key of the configuration. e.g. u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu202Fu205Fu3000uFEFFuFFFDu00B6 */
     String CRAWLER_DOCUMENT_SPACE_CHARS = "crawler.document.space.chars";
     String CRAWLER_DOCUMENT_SPACE_CHARS = "crawler.document.space.chars";
 
 
     /** The key of the configuration. e.g. UTF-8 */
     /** The key of the configuration. e.g. UTF-8 */
@@ -1358,9 +1356,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
 
 
     /**
     /**
      * Get the value for the key 'crawler.document.space.chars'. <br>
      * Get the value for the key 'crawler.document.space.chars'. <br>
-     * The value is, e.g. 	
-    
-       ᠎           ​   �¶ <br>
+     * The value is, e.g. u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu202Fu205Fu3000uFEFFuFFFDu00B6 <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      */
      */
     String getCrawlerDocumentSpaceChars();
     String getCrawlerDocumentSpaceChars();

+ 12 - 4
src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java

@@ -1305,10 +1305,18 @@ public interface FessProp {
     public default int[] getCrawlerDocumentSpaceCharsAsArray() {
     public default int[] getCrawlerDocumentSpaceCharsAsArray() {
         int[] spaceChars = (int[]) propMap.get(CRAWLER_DOCUMENT_SPACE_CHARS);
         int[] spaceChars = (int[]) propMap.get(CRAWLER_DOCUMENT_SPACE_CHARS);
         if (spaceChars == null) {
         if (spaceChars == null) {
-            final int length = getCrawlerDocumentSpaceChars().length();
-            spaceChars = new int[length];
-            for (int i = 0; i < length; i++) {
-                spaceChars[i] = getCrawlerDocumentSpaceChars().codePointAt(i);
+            String spaceStr = getCrawlerDocumentSpaceChars();
+            if (spaceStr.startsWith("u")) {
+                spaceChars =
+                        split(spaceStr, "u").get(
+                                stream -> stream.filter(StringUtil::isNotBlank).mapToInt(s -> Integer.parseInt(s, 16)).toArray());
+            } else {
+                // backward compatibility
+                final int length = spaceStr.length();
+                spaceChars = new int[length];
+                for (int i = 0; i < length; i++) {
+                    spaceChars[i] = spaceStr.codePointAt(i);
+                }
             }
             }
             propMap.put(CRAWLER_DOCUMENT_SPACE_CHARS, spaceChars);
             propMap.put(CRAWLER_DOCUMENT_SPACE_CHARS, spaceChars);
         }
         }

+ 1 - 1
src/main/resources/fess_config.properties

@@ -85,7 +85,7 @@ crawler.document.append.data=true
 crawler.document.max.alphanum.term.size=20
 crawler.document.max.alphanum.term.size=20
 crawler.document.max.symbol.term.size=10
 crawler.document.max.symbol.term.size=10
 crawler.document.duplicate.term.removed=false
 crawler.document.duplicate.term.removed=false
-crawler.document.space.chars=\u0009\u000A\u000B\u000C\u000D\u001C\u001D\u001E\u001F\u0020\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u202F\u205F\u3000\uFEFF\uFFFD\u00B6
+crawler.document.space.chars=u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu202Fu205Fu3000uFEFFuFFFDu00B6
 crawler.crawling.data.encoding=UTF-8
 crawler.crawling.data.encoding=UTF-8
 crawler.web.protocols=http,https
 crawler.web.protocols=http,https
 crawler.file.protocols=file,smb,ftp
 crawler.file.protocols=file,smb,ftp

+ 2 - 1
src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java

@@ -109,7 +109,7 @@ public class FessPropTest extends UnitFessTestCase {
         FessConfig fessConfig = new FessConfig.SimpleImpl() {
         FessConfig fessConfig = new FessConfig.SimpleImpl() {
             @Override
             @Override
             public String getCrawlerDocumentSpaceChars() {
             public String getCrawlerDocumentSpaceChars() {
-                return "\u0020\u3000";
+                return "u0020u3000";
             }
             }
         };
         };
 
 
@@ -118,4 +118,5 @@ public class FessPropTest extends UnitFessTestCase {
         assertEquals(32, spaceChars[0]);
         assertEquals(32, spaceChars[0]);
         assertEquals(12288, spaceChars[1]);
         assertEquals(12288, spaceChars[1]);
     }
     }
+
 }
 }