Explorar el Código

Merge pull request #359 from codelibs/10.0.x

Merge from 10.0.x
Shinsuke Sugaya hace 9 años
padre
commit
1cd98b8b90

+ 1 - 1
pom.xml

@@ -59,7 +59,7 @@
 		<utflute.version>0.6.0F</utflute.version>
 
 		<!-- Crawler -->
-		<crawler.version>1.0.3</crawler.version>
+		<crawler.version>1.0.4-SNAPSHOT</crawler.version>
 
 		<!-- Suggest -->
 		<suggest.version>2.1.1</suggest.version>

+ 10 - 5
src/main/java/org/codelibs/fess/app/web/admin/design/AdminDesignAction.java

@@ -27,6 +27,7 @@ import javax.annotation.Resource;
 
 import org.apache.commons.io.FileUtils;
 import org.codelibs.core.io.FileUtil;
+import org.codelibs.core.io.ResourceUtil;
 import org.codelibs.core.lang.StringUtil;
 import org.codelibs.core.misc.DynamicProperties;
 import org.codelibs.fess.Constants;
@@ -134,22 +135,26 @@ public class AdminDesignAction extends FessAdminAction implements Serializable {
             throwValidationError(messages -> messages.addErrorsDesignFileNameIsNotFound("designFile"), () -> asListHtml());
         }
 
-        String baseDir = null;
+        File uploadFile = null;
         // normalize filename
         if (checkFileType(fileName, fessConfig.getSupportedUploadedMediaExtentionsAsArray())
                 && checkFileType(uploadedFileName, fessConfig.getSupportedUploadedMediaExtentionsAsArray())) {
-            baseDir = "/images/";
+            uploadFile = new File(getServletContext().getRealPath("/images/" + fileName));
         } else if (checkFileType(fileName, fessConfig.getSupportedUploadedCssExtentionsAsArray())
                 && checkFileType(uploadedFileName, fessConfig.getSupportedUploadedCssExtentionsAsArray())) {
-            baseDir = "/css/";
+            uploadFile = new File(getServletContext().getRealPath("/css/" + fileName));
         } else if (checkFileType(fileName, fessConfig.getSupportedUploadedJsExtentionsAsArray())
                 && checkFileType(uploadedFileName, fessConfig.getSupportedUploadedJsExtentionsAsArray())) {
-            baseDir = "/js/";
+            uploadFile = new File(getServletContext().getRealPath("/js/" + fileName));
+        } else if (fessConfig.isSupportedUploadedFile(fileName) || fessConfig.isSupportedUploadedFile(uploadedFileName)) {
+            uploadFile = ResourceUtil.getResourceAsFileNoException(fileName);
+            if (uploadFile == null) {
+                throwValidationError(messages -> messages.addErrorsDesignFileNameIsNotFound("designFileName"), () -> asListHtml());
+            }
         } else {
             throwValidationError(messages -> messages.addErrorsDesignFileIsUnsupportedType("designFileName"), () -> asListHtml());
         }
 
-        final File uploadFile = new File(getServletContext().getRealPath(baseDir + fileName));
         final File parentFile = uploadFile.getParentFile();
         if (!parentFile.exists() && !parentFile.mkdirs()) {
             logger.warn("Could not create " + parentFile.getAbsolutePath());

+ 1 - 1
src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java

@@ -161,7 +161,7 @@ public class FessCrawlerThread extends CrawlerThread {
                 }
             } finally {
                 if (responseData != null) {
-                    IOUtils.closeQuietly(responseData.getResponseBody());
+                    IOUtils.closeQuietly(responseData);
                 }
             }
         }

+ 6 - 7
src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

@@ -27,7 +27,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.tika.metadata.HttpHeaders;
 import org.apache.tika.metadata.TikaMetadataKeys;
@@ -79,12 +78,11 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
 
     @Override
     public ResultData transform(final ResponseData responseData) {
-        if (responseData == null || responseData.getResponseBody() == null) {
+        if (responseData == null || !responseData.hasResponseBody()) {
             throw new CrawlingAccessException("No response body.");
         }
 
         final Extractor extractor = getExtractor(responseData);
-        final InputStream in = responseData.getResponseBody();
         final Map<String, String> params = new HashMap<String, String>();
         params.put(TikaMetadataKeys.RESOURCE_NAME_KEY, getResourceName(responseData));
         final String mimeType = responseData.getMimeType();
@@ -94,7 +92,7 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
         final Map<String, Object> dataMap = new HashMap<String, Object>();
         final Map<String, Object> metaDataMap = new HashMap<>();
         String content;
-        try {
+        try (final InputStream in = responseData.getResponseBody()) {
             final ExtractData extractData = extractor.getText(in, params);
             content = extractData.getContent();
             if (fessConfig.isCrawlerDocumentFileIgnoreEmptyContent() && StringUtil.isBlank(content)) {
@@ -148,8 +146,6 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
             final CrawlingAccessException rcae = new CrawlingAccessException("Could not get a text from " + responseData.getUrl(), e);
             rcae.setLogLevel(CrawlingAccessException.WARN);
             throw rcae;
-        } finally {
-            IOUtils.closeQuietly(in);
         }
         if (content == null) {
             content = StringUtil.EMPTY;
@@ -308,7 +304,10 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
         }
         putResultDataBody(dataMap, fessConfig.getIndexFieldRole(), roleTypeList);
         // TODO date
-        // TODO lang
+        // lang
+        if (StringUtil.isNotBlank(fessConfig.getCrawlerDocumentFileDefaultLang())) {
+            putResultDataBody(dataMap, fessConfig.getIndexFieldLang(), fessConfig.getCrawlerDocumentFileDefaultLang());
+        }
         // id
         putResultDataBody(dataMap, fessConfig.getIndexFieldId(), crawlingInfoHelper.generateId(dataMap));
         // parentId

+ 56 - 83
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

@@ -16,9 +16,6 @@
 package org.codelibs.fess.crawler.transformer;
 
 import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
@@ -33,7 +30,6 @@ import java.util.Set;
 import javax.annotation.PostConstruct;
 import javax.xml.transform.TransformerException;
 
-import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.xpath.objects.XObject;
 import org.codelibs.core.io.InputStreamUtil;
@@ -51,7 +47,6 @@ import org.codelibs.fess.crawler.exception.CrawlerSystemException;
 import org.codelibs.fess.crawler.exception.CrawlingAccessException;
 import org.codelibs.fess.crawler.transformer.impl.XpathTransformer;
 import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
-import org.codelibs.fess.crawler.util.ResponseDataUtil;
 import org.codelibs.fess.es.config.exentity.CrawlingConfig;
 import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
 import org.codelibs.fess.helper.CrawlingConfigHelper;
@@ -99,88 +94,67 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
 
     @Override
     protected void storeData(final ResponseData responseData, final ResultData resultData) {
-        final File tempFile = ResponseDataUtil.createResponseBodyFile(responseData);
-        try {
-            final DOMParser parser = getDomParser();
-            BufferedInputStream bis = null;
-            try {
-                bis = new BufferedInputStream(new FileInputStream(tempFile));
-                final byte[] bomBytes = new byte[UTF8_BOM_SIZE];
-                bis.mark(UTF8_BOM_SIZE);
-                bis.read(bomBytes); // NOSONAR
-                if (!isUtf8BomBytes(bomBytes)) {
-                    bis.reset();
-                }
-                final InputSource is = new InputSource(bis);
-                if (responseData.getCharSet() != null) {
-                    is.setEncoding(responseData.getCharSet());
-                }
-                parser.parse(is);
-            } catch (final Exception e) {
-                throw new CrawlingAccessException("Could not parse " + responseData.getUrl(), e);
-            } finally {
-                IOUtils.closeQuietly(bis);
+        final DOMParser parser = getDomParser();
+        try (final BufferedInputStream bis = new BufferedInputStream(responseData.getResponseBody())) {
+            final byte[] bomBytes = new byte[UTF8_BOM_SIZE];
+            bis.mark(UTF8_BOM_SIZE);
+            bis.read(bomBytes); // NOSONAR
+            if (!isUtf8BomBytes(bomBytes)) {
+                bis.reset();
             }
-
-            final Document document = parser.getDocument();
-
-            final Map<String, Object> dataMap = new LinkedHashMap<String, Object>();
-            for (final Map.Entry<String, String> entry : fieldRuleMap.entrySet()) {
-                final String path = entry.getValue();
-                try {
-                    final XObject xObj = getXPathAPI().eval(document, path);
-                    final int type = xObj.getType();
-                    switch (type) {
-                    case XObject.CLASS_BOOLEAN:
-                        final boolean b = xObj.bool();
-                        putResultDataBody(dataMap, entry.getKey(), Boolean.toString(b));
-                        break;
-                    case XObject.CLASS_NUMBER:
-                        final double d = xObj.num();
-                        putResultDataBody(dataMap, entry.getKey(), Double.toString(d));
-                        break;
-                    case XObject.CLASS_STRING:
-                        final String str = xObj.str();
-                        putResultDataBody(dataMap, entry.getKey(), str);
-                        break;
-                    case XObject.CLASS_NULL:
-                    case XObject.CLASS_UNKNOWN:
-                    case XObject.CLASS_NODESET:
-                    case XObject.CLASS_RTREEFRAG:
-                    case XObject.CLASS_UNRESOLVEDVARIABLE:
-                    default:
-                        final Node value = getXPathAPI().selectSingleNode(document, entry.getValue());
-                        putResultDataBody(dataMap, entry.getKey(), value != null ? value.getTextContent() : null);
-                        break;
-                    }
-                } catch (final TransformerException e) {
-                    logger.warn("Could not parse a value of " + entry.getKey() + ":" + entry.getValue());
-                }
+            final InputSource is = new InputSource(bis);
+            if (responseData.getCharSet() != null) {
+                is.setEncoding(responseData.getCharSet());
             }
+            parser.parse(is);
+        } catch (final Exception e) {
+            throw new CrawlingAccessException("Could not parse " + responseData.getUrl(), e);
+        }
 
-            FileInputStream fis = null;
-            try {
-                fis = new FileInputStream(tempFile);
-                responseData.setResponseBody(fis);
-                putAdditionalData(dataMap, responseData, document);
-            } catch (final FileNotFoundException e) {
-                logger.warn(tempFile + " does not exist.", e);
-                putAdditionalData(dataMap, responseData, document);
-            } finally {
-                IOUtils.closeQuietly(fis);
-            }
+        final Document document = parser.getDocument();
 
+        final Map<String, Object> dataMap = new LinkedHashMap<String, Object>();
+        for (final Map.Entry<String, String> entry : fieldRuleMap.entrySet()) {
+            final String path = entry.getValue();
             try {
-                resultData.setData(SerializeUtil.fromObjectToBinary(dataMap));
-            } catch (final Exception e) {
-                throw new CrawlingAccessException("Could not serialize object: " + responseData.getUrl(), e);
-            }
-            resultData.setEncoding(charsetName);
-        } finally {
-            if (!tempFile.delete()) {
-                logger.warn("Could not delete a temp file: " + tempFile);
+                final XObject xObj = getXPathAPI().eval(document, path);
+                final int type = xObj.getType();
+                switch (type) {
+                case XObject.CLASS_BOOLEAN:
+                    final boolean b = xObj.bool();
+                    putResultDataBody(dataMap, entry.getKey(), Boolean.toString(b));
+                    break;
+                case XObject.CLASS_NUMBER:
+                    final double d = xObj.num();
+                    putResultDataBody(dataMap, entry.getKey(), Double.toString(d));
+                    break;
+                case XObject.CLASS_STRING:
+                    final String str = xObj.str();
+                    putResultDataBody(dataMap, entry.getKey(), str);
+                    break;
+                case XObject.CLASS_NULL:
+                case XObject.CLASS_UNKNOWN:
+                case XObject.CLASS_NODESET:
+                case XObject.CLASS_RTREEFRAG:
+                case XObject.CLASS_UNRESOLVEDVARIABLE:
+                default:
+                    final Node value = getXPathAPI().selectSingleNode(document, entry.getValue());
+                    putResultDataBody(dataMap, entry.getKey(), value != null ? value.getTextContent() : null);
+                    break;
+                }
+            } catch (final TransformerException e) {
+                logger.warn("Could not parse a value of " + entry.getKey() + ":" + entry.getValue());
             }
         }
+
+        putAdditionalData(dataMap, responseData, document);
+
+        try {
+            resultData.setData(SerializeUtil.fromObjectToBinary(dataMap));
+        } catch (final Exception e) {
+            throw new CrawlingAccessException("Could not serialize object: " + responseData.getUrl(), e);
+        }
+        resultData.setEncoding(charsetName);
     }
 
     protected void putAdditionalData(final Map<String, Object> dataMap, final ResponseData responseData, final Document document) {
@@ -243,10 +217,9 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
                 if (charSet == null) {
                     charSet = Constants.UTF_8;
                 }
-                try {
+                try (final BufferedInputStream is = new BufferedInputStream(responseData.getResponseBody())) {
                     // cache
-                    putResultDataBody(dataMap, fessConfig.getIndexFieldCache(),
-                            new String(InputStreamUtil.getBytes(responseData.getResponseBody()), charSet));
+                    putResultDataBody(dataMap, fessConfig.getIndexFieldCache(), new String(InputStreamUtil.getBytes(is), charSet));
                     putResultDataBody(dataMap, fessConfig.getIndexFieldHasCache(), Constants.TRUE);
                 } catch (final Exception e) {
                     logger.warn("Failed to write a cache: " + sessionId + ":" + responseData, e);

+ 1 - 1
src/main/java/org/codelibs/fess/helper/ViewHelper.java

@@ -527,7 +527,7 @@ public class ViewHelper implements Serializable {
         writeContentType(response, responseData);
         writeNoCache(response, responseData);
         response.stream(out -> {
-            try (InputStream is = new BufferedInputStream(responseData.getResponseBody())) {
+            try (final InputStream is = new BufferedInputStream(responseData.getResponseBody())) {
                 out.write(is);
             } catch (final IOException e) {
                 if (!"ClientAbortException".equals(e.getClass().getSimpleName())) {

+ 1 - 1
src/main/java/org/codelibs/fess/mylasta/action/FessLabels.java

@@ -779,7 +779,7 @@ public class FessLabels extends ActionMessages {
     /** The key of the message: Search */
     public static final String LABELS_index_form_search_btn = "{labels.index_form_search_btn}";
 
-    /** The key of the message: Fess Search */
+    /** The key of the message: Search */
     public static final String LABELS_index_osdd_title = "{labels.index_osdd_title}";
 
     /** The key of the message: Options */

+ 40 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -102,6 +102,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. jpg,jpeg,gif,png,swf */
     String SUPPORTED_UPLOADED_MEDIA_EXTENTIONS = "supported.uploaded.media.extentions";
 
+    /** The key of the configuration. e.g. license.properties */
+    String SUPPORTED_UPLOADED_FILES = "supported.uploaded.files";
+
     /** The key of the configuration. e.g. ar,bg,ca,da,de,el,en,es,eu,fa,fi,fr,ga,gl,hi,hu,hy,id,it,ja,lv,ko,nl,no,pt,ro,ru,sv,th,tr,zh_CN,zh_TW,zh */
     String SUPPORTED_LANGUAGES = "supported.languages";
 
@@ -173,6 +176,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. true */
     String CRAWLER_DOCUMENT_FILE_APPEND_BODY_CONTENT = "crawler.document.file.append.body.content";
 
+    /** The key of the configuration. e.g.  */
+    String CRAWLER_DOCUMENT_FILE_DEFAULT_LANG = "crawler.document.file.default.lang";
+
     /** The key of the configuration. e.g. true */
     String CRAWLER_DOCUMENT_CACHE_ENABLE = "crawler.document.cache.enable";
 
@@ -804,6 +810,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
     String getSupportedUploadedMediaExtentions();
 
+    /**
+     * Get the value for the key 'supported.uploaded.files'. <br>
+     * The value is, e.g. license.properties <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getSupportedUploadedFiles();
+
     /**
      * Get the value for the key 'supported.languages'. <br>
      * The value is, e.g. ar,bg,ca,da,de,el,en,es,eu,fa,fi,fr,ga,gl,hi,hu,hy,id,it,ja,lv,ko,nl,no,pt,ro,ru,sv,th,tr,zh_CN,zh_TW,zh <br>
@@ -1055,6 +1068,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
     boolean isCrawlerDocumentFileAppendBodyContent();
 
+    /**
+     * Get the value for the key 'crawler.document.file.default.lang'. <br>
+     * The value is, e.g.  <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getCrawlerDocumentFileDefaultLang();
+
+    /**
+     * Get the value for the key 'crawler.document.file.default.lang' as {@link Integer}. <br>
+     * The value is, e.g.  <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getCrawlerDocumentFileDefaultLangAsInteger();
+
     /**
      * Get the value for the key 'crawler.document.cache.enable'. <br>
      * The value is, e.g. true <br>
@@ -2523,6 +2551,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return get(FessConfig.SUPPORTED_UPLOADED_MEDIA_EXTENTIONS);
         }
 
+        public String getSupportedUploadedFiles() {
+            return get(FessConfig.SUPPORTED_UPLOADED_FILES);
+        }
+
         public String getSupportedLanguages() {
             return get(FessConfig.SUPPORTED_LANGUAGES);
         }
@@ -2659,6 +2691,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return is(FessConfig.CRAWLER_DOCUMENT_FILE_APPEND_BODY_CONTENT);
         }
 
+        public String getCrawlerDocumentFileDefaultLang() {
+            return get(FessConfig.CRAWLER_DOCUMENT_FILE_DEFAULT_LANG);
+        }
+
+        public Integer getCrawlerDocumentFileDefaultLangAsInteger() {
+            return getAsInteger(FessConfig.CRAWLER_DOCUMENT_FILE_DEFAULT_LANG);
+        }
+
         public String getCrawlerDocumentCacheEnable() {
             return get(FessConfig.CRAWLER_DOCUMENT_CACHE_ENABLE);
         }

+ 8 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java

@@ -411,4 +411,12 @@ public interface FessProp {
         }
         return null;
     }
+
+    String getSupportedUploadedFiles();
+
+    public default boolean isSupportedUploadedFile(String name) {
+        return StreamUtil.of(getSuggestPopularWordExcludes().split(",")).filter(s -> StringUtil.isNotBlank(s))
+                .anyMatch(s -> s.equals(name));
+    }
+
 }

+ 2 - 0
src/main/resources/fess_config.properties

@@ -60,6 +60,7 @@ max.log.output.length=4000
 supported.uploaded.js.extentions=js
 supported.uploaded.css.extentions=css
 supported.uploaded.media.extentions=jpg,jpeg,gif,png,swf
+supported.uploaded.files=license.properties
 supported.languages=ar,bg,ca,da,de,el,en,es,eu,fa,fi,fr,ga,gl,hi,hu,hy,id,it,ja,lv,ko,nl,no,pt,ro,ru,sv,th,tr,zh_CN,zh_TW,zh
 
 # ========================================================================================
@@ -95,6 +96,7 @@ crawler.document.file.max.title.length=100
 crawler.document.file.max.digest.length=200
 crawler.document.file.append.meta.content=true
 crawler.document.file.append.body.content=true
+crawler.document.file.default.lang=
 
 # cache
 crawler.document.cache.enable=true