Explorar o código

fix #1081 add HtmlTagBasedGenerator

Shinsuke Sugaya %!s(int64=8) %!d(string=hai) anos
pai
achega
0271b5041b
Modificáronse 22 ficheiros con 666 adicións e 19 borrados
  1. 94 10
      src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java
  2. 1 0
      src/main/java/org/codelibs/fess/helper/QueryHelper.java
  3. 213 3
      src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java
  4. 22 0
      src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java
  5. 4 0
      src/main/java/org/codelibs/fess/thumbnail/ThumbnailGenerator.java
  6. 4 4
      src/main/java/org/codelibs/fess/thumbnail/ThumbnailManager.java
  7. 11 0
      src/main/java/org/codelibs/fess/thumbnail/impl/BaseThumbnailGenerator.java
  8. 121 0
      src/main/java/org/codelibs/fess/thumbnail/impl/HtmlTagBasedGenerator.java
  9. 10 1
      src/main/resources/fess_config.properties
  10. 3 0
      src/main/resources/fess_indices/fess/doc.json
  11. 8 1
      src/main/resources/fess_thumbnail.xml
  12. 76 0
      src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java
  13. 99 0
      src/test/java/org/codelibs/fess/thumbnail/impl/HtmlTagBasedGeneratorTest.java
  14. BIN=BIN
      src/test/resources/thumbnail/400x400.gif
  15. BIN=BIN
      src/test/resources/thumbnail/400x400.jpg
  16. BIN=BIN
      src/test/resources/thumbnail/400x400.png
  17. BIN=BIN
      src/test/resources/thumbnail/400x600.gif
  18. BIN=BIN
      src/test/resources/thumbnail/400x600.jpg
  19. BIN=BIN
      src/test/resources/thumbnail/400x600.png
  20. BIN=BIN
      src/test/resources/thumbnail/600x400.gif
  21. BIN=BIN
      src/test/resources/thumbnail/600x400.jpg
  22. BIN=BIN
      src/test/resources/thumbnail/600x400.png

+ 94 - 10
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

@@ -68,6 +68,7 @@ import org.cyberneko.html.parsers.DOMParser;
 import org.slf4j.Logger;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.Document;
+import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.w3c.dom.NodeList;
 import org.xml.sax.InputSource;
 import org.xml.sax.InputSource;
@@ -75,6 +76,10 @@ import org.xml.sax.InputSource;
 public class FessXpathTransformer extends XpathTransformer implements FessTransformer {
 public class FessXpathTransformer extends XpathTransformer implements FessTransformer {
     private static final Logger logger = LoggerFactory.getLogger(FessXpathTransformer.class);
     private static final Logger logger = LoggerFactory.getLogger(FessXpathTransformer.class);
 
 
+    private static final String META_NAME_THUMBNAIL_CONTENT = "//META[@name=\"thumbnail\" or @name=\"THUMBNAIL\"]/@content";
+
+    private static final String META_PROPERTY_OGIMAGE_CONTENT = "//META[@property=\"og:image\"]/@content";
+
     private static final String META_NAME_ROBOTS_CONTENT = "//META[@name=\"robots\" or @name=\"ROBOTS\"]/@content";
     private static final String META_NAME_ROBOTS_CONTENT = "//META[@name=\"robots\" or @name=\"ROBOTS\"]/@content";
 
 
     private static final String META_ROBOTS_NONE = "none";
     private static final String META_ROBOTS_NONE = "none";
@@ -360,6 +365,11 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
             putResultDataBody(dataMap, fessConfig.getIndexFieldParentId(), crawlingInfoHelper.generateId(dataMap));
             putResultDataBody(dataMap, fessConfig.getIndexFieldParentId(), crawlingInfoHelper.generateId(dataMap));
             putResultDataBody(dataMap, fessConfig.getIndexFieldUrl(), url); // set again
             putResultDataBody(dataMap, fessConfig.getIndexFieldUrl(), url); // set again
         }
         }
+        // thumbnail
+        final String thumbnailUrl = getThumbnailUrl(responseData, document);
+        if (StringUtil.isNotBlank(thumbnailUrl)) {
+            putResultDataBody(dataMap, fessConfig.getIndexFieldThumbnail(), thumbnailUrl);
+        }
 
 
         // from config
         // from config
         final Map<String, String> scriptConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.SCRIPT);
         final Map<String, String> scriptConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.SCRIPT);
@@ -598,16 +608,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
 
 
     protected URL getBaseUrl(final String currentUrl, final String baseHref) throws MalformedURLException {
     protected URL getBaseUrl(final String currentUrl, final String baseHref) throws MalformedURLException {
         if (baseHref != null) {
         if (baseHref != null) {
-            if (baseHref.startsWith("://")) {
-                final String protocol = currentUrl.split(":")[0];
-                return new URL(protocol + baseHref);
-            } else if (baseHref.startsWith("//")) {
-                final String protocol = currentUrl.split(":")[0];
-                return new URL(protocol + ":" + baseHref);
-            } else if (baseHref.startsWith("/")) {
-                return new URL(new URL(currentUrl), baseHref);
-            }
-            return new URL(baseHref);
+            return getURL(currentUrl, baseHref);
         }
         }
         return new URL(currentUrl);
         return new URL(currentUrl);
     }
     }
@@ -687,4 +688,87 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
         this.useGoogleOffOn = useGoogleOffOn;
         this.useGoogleOffOn = useGoogleOffOn;
     }
     }
 
 
+    protected String getThumbnailUrl(final ResponseData responseData, final Document document) {
+        // TODO PageMap
+        try {
+            // meta thumbnail
+            final Node thumbnailNode = getXPathAPI().selectSingleNode(document, META_NAME_THUMBNAIL_CONTENT);
+            if (thumbnailNode != null) {
+                final URL thumbnailUrl = getURL(responseData.getUrl(), thumbnailNode.getTextContent());
+                if (thumbnailUrl != null) {
+                    return thumbnailUrl.toExternalForm();
+                }
+            }
+
+            // meta og:image
+            final Node ogImageNode = getXPathAPI().selectSingleNode(document, META_PROPERTY_OGIMAGE_CONTENT);
+            if (ogImageNode != null) {
+                final URL thumbnailUrl = getURL(responseData.getUrl(), ogImageNode.getTextContent());
+                if (thumbnailUrl != null) {
+                    return thumbnailUrl.toExternalForm();
+                }
+            }
+
+            final NodeList imgNodeList = getXPathAPI().selectNodeList(document, "//IMG");
+            Node firstSrcNode = null;
+            for (int i = 0; i < imgNodeList.getLength(); i++) {
+                final Node imgNode = imgNodeList.item(i);
+                final NamedNodeMap attributes = imgNode.getAttributes();
+                final Node heightAttr = attributes.getNamedItem("height");
+                final Node widthAttr = attributes.getNamedItem("width");
+                if (heightAttr != null && widthAttr != null) {
+                    try {
+                        final int height = Integer.parseInt(heightAttr.getTextContent());
+                        final int width = Integer.parseInt(widthAttr.getTextContent());
+                        if (fessConfig.validateThumbnailSize(width, height)) {
+                            final Node srcNode = attributes.getNamedItem("src");
+                            if (srcNode != null) {
+                                final URL thumbnailUrl = getURL(responseData.getUrl(), srcNode.getTextContent());
+                                if (thumbnailUrl != null) {
+                                    return thumbnailUrl.toExternalForm();
+                                }
+                            }
+                        }
+                    } catch (Exception e) {
+                        logger.debug("Failed to parse " + imgNode + " at " + responseData.getUrl(), e);
+                    }
+                } else if (firstSrcNode == null) {
+                    final Node srcNode = attributes.getNamedItem("src");
+                    if (srcNode != null) {
+                        firstSrcNode = srcNode;
+                    }
+                }
+            }
+
+            if (firstSrcNode != null) {
+                try {
+                    final URL thumbnailUrl = getURL(responseData.getUrl(), firstSrcNode.getTextContent());
+                    if (thumbnailUrl != null) {
+                        return thumbnailUrl.toExternalForm();
+                    }
+                } catch (Exception e) {
+                    logger.debug("Failed to parse " + firstSrcNode + " at " + responseData.getUrl(), e);
+                }
+            }
+        } catch (final Exception e) {
+            logger.warn("Failed to retrieve thumbnail url from " + responseData.getUrl(), e);
+        }
+        return null;
+    }
+
+    protected URL getURL(final String currentUrl, final String url) throws MalformedURLException {
+        if (url != null) {
+            if (url.startsWith("://")) {
+                final String protocol = currentUrl.split(":")[0];
+                return new URL(protocol + url);
+            } else if (url.startsWith("//")) {
+                final String protocol = currentUrl.split(":")[0];
+                return new URL(protocol + ":" + url);
+            } else if (url.startsWith("/") || url.indexOf(':') == -1) {
+                return new URL(new URL(currentUrl), url);
+            }
+            return new URL(url);
+        }
+        return null;
+    }
 }
 }

+ 1 - 0
src/main/java/org/codelibs/fess/helper/QueryHelper.java

@@ -148,6 +148,7 @@ public class QueryHelper {
                     fessConfig.getIndexFieldTitle(), //
                     fessConfig.getIndexFieldTitle(), //
                     fessConfig.getIndexFieldDigest(), //
                     fessConfig.getIndexFieldDigest(), //
                     fessConfig.getIndexFieldUrl(), //
                     fessConfig.getIndexFieldUrl(), //
+                    fessConfig.getIndexFieldThumbnail(), //
                     fessConfig.getIndexFieldClickCount(), //
                     fessConfig.getIndexFieldClickCount(), //
                     fessConfig.getIndexFieldFavoriteCount(), //
                     fessConfig.getIndexFieldFavoriteCount(), //
                     fessConfig.getIndexFieldConfigId(), //
                     fessConfig.getIndexFieldConfigId(), //

+ 213 - 3
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -378,6 +378,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. filename */
     /** The key of the configuration. e.g. filename */
     String INDEX_FIELD_FILENAME = "index.field.filename";
     String INDEX_FIELD_FILENAME = "index.field.filename";
 
 
+    /** The key of the configuration. e.g. thumbnail */
+    String INDEX_FIELD_THUMBNAIL = "index.field.thumbnail";
+
     /** The key of the configuration. e.g. content_title */
     /** The key of the configuration. e.g. content_title */
     String RESPONSE_FIELD_content_title = "response.field.content_title";
     String RESPONSE_FIELD_content_title = "response.field.content_title";
 
 
@@ -717,7 +720,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. 100 */
     /** The key of the configuration. e.g. 100 */
     String PAGING_SEARCH_PAGE_MAX_SIZE = "paging.search.page.max.size";
     String PAGING_SEARCH_PAGE_MAX_SIZE = "paging.search.page.max.size";
 
 
-    /** The key of the configuration. e.g. true */
+    /** The key of the configuration. e.g. false */
     String THUMBNAIL_HTML_PHANTOMJS_ENABLED = "thumbnail.html.phantomjs.enabled";
     String THUMBNAIL_HTML_PHANTOMJS_ENABLED = "thumbnail.html.phantomjs.enabled";
 
 
     /** The key of the configuration. e.g. 20000 */
     /** The key of the configuration. e.g. 20000 */
@@ -741,6 +744,30 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. png */
     /** The key of the configuration. e.g. png */
     String THUMBNAIL_HTML_PHANTOMJS_FORMAT = "thumbnail.html.phantomjs.format";
     String THUMBNAIL_HTML_PHANTOMJS_FORMAT = "thumbnail.html.phantomjs.format";
 
 
+    /** The key of the configuration. e.g. 50 */
+    String THUMBNAIL_HTML_IMAGE_MIN_WIDTH = "thumbnail.html.image.min.width";
+
+    /** The key of the configuration. e.g. 50 */
+    String THUMBNAIL_HTML_IMAGE_MIN_HEIGHT = "thumbnail.html.image.min.height";
+
+    /** The key of the configuration. e.g. 3.0 */
+    String THUMBNAIL_HTML_IMAGE_MAX_ASPECT_RATIO = "thumbnail.html.image.max.aspect.ratio";
+
+    /** The key of the configuration. e.g. 1200 */
+    String THUMBNAIL_HTML_IMAGE_WINDOW_WIDTH = "thumbnail.html.image.window.width";
+
+    /** The key of the configuration. e.g. 800 */
+    String THUMBNAIL_HTML_IMAGE_WINDOW_HEIGHT = "thumbnail.html.image.window.height";
+
+    /** The key of the configuration. e.g. 160 */
+    String THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH = "thumbnail.html.image.thumbnail.width";
+
+    /** The key of the configuration. e.g. 160 */
+    String THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT = "thumbnail.html.image.thumbnail.height";
+
+    /** The key of the configuration. e.g. png */
+    String THUMBNAIL_HTML_IMAGE_FORMAT = "thumbnail.html.image.format";
+
     /** The key of the configuration. e.g. all */
     /** The key of the configuration. e.g. all */
     String THUMBNAIL_GENERATOR_TARGETS = "thumbnail.generator.targets";
     String THUMBNAIL_GENERATOR_TARGETS = "thumbnail.generator.targets";
 
 
@@ -2267,6 +2294,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
      */
     String getIndexFieldFilename();
     String getIndexFieldFilename();
 
 
+    /**
+     * Get the value for the key 'index.field.thumbnail'. <br>
+     * The value is, e.g. thumbnail <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexFieldThumbnail();
+
     /**
     /**
      * Get the value for the key 'response.field.content_title'. <br>
      * Get the value for the key 'response.field.content_title'. <br>
      * The value is, e.g. content_title <br>
      * The value is, e.g. content_title <br>
@@ -3516,14 +3550,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
 
 
     /**
     /**
      * Get the value for the key 'thumbnail.html.phantomjs.enabled'. <br>
      * Get the value for the key 'thumbnail.html.phantomjs.enabled'. <br>
-     * The value is, e.g. true <br>
+     * The value is, e.g. false <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      */
      */
     String getThumbnailHtmlPhantomjsEnabled();
     String getThumbnailHtmlPhantomjsEnabled();
 
 
     /**
     /**
      * Is the property for the key 'thumbnail.html.phantomjs.enabled' true? <br>
      * Is the property for the key 'thumbnail.html.phantomjs.enabled' true? <br>
-     * The value is, e.g. true <br>
+     * The value is, e.g. false <br>
      * @return The determination, true or false. (if not found, exception but basically no way)
      * @return The determination, true or false. (if not found, exception but basically no way)
      */
      */
     boolean isThumbnailHtmlPhantomjsEnabled();
     boolean isThumbnailHtmlPhantomjsEnabled();
@@ -3625,6 +3659,118 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
      */
     String getThumbnailHtmlPhantomjsFormat();
     String getThumbnailHtmlPhantomjsFormat();
 
 
+    /**
+     * Get the value for the key 'thumbnail.html.image.min.width'. <br>
+     * The value is, e.g. 50 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getThumbnailHtmlImageMinWidth();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.min.width' as {@link Integer}. <br>
+     * The value is, e.g. 50 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getThumbnailHtmlImageMinWidthAsInteger();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.min.height'. <br>
+     * The value is, e.g. 50 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getThumbnailHtmlImageMinHeight();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.min.height' as {@link Integer}. <br>
+     * The value is, e.g. 50 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getThumbnailHtmlImageMinHeightAsInteger();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.max.aspect.ratio'. <br>
+     * The value is, e.g. 3.0 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getThumbnailHtmlImageMaxAspectRatio();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.max.aspect.ratio' as {@link java.math.BigDecimal}. <br>
+     * The value is, e.g. 3.0 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not decimal.
+     */
+    java.math.BigDecimal getThumbnailHtmlImageMaxAspectRatioAsDecimal();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.window.width'. <br>
+     * The value is, e.g. 1200 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getThumbnailHtmlImageWindowWidth();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.window.width' as {@link Integer}. <br>
+     * The value is, e.g. 1200 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getThumbnailHtmlImageWindowWidthAsInteger();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.window.height'. <br>
+     * The value is, e.g. 800 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getThumbnailHtmlImageWindowHeight();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.window.height' as {@link Integer}. <br>
+     * The value is, e.g. 800 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getThumbnailHtmlImageWindowHeightAsInteger();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.thumbnail.width'. <br>
+     * The value is, e.g. 160 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getThumbnailHtmlImageThumbnailWidth();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.thumbnail.width' as {@link Integer}. <br>
+     * The value is, e.g. 160 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getThumbnailHtmlImageThumbnailWidthAsInteger();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.thumbnail.height'. <br>
+     * The value is, e.g. 160 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getThumbnailHtmlImageThumbnailHeight();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.thumbnail.height' as {@link Integer}. <br>
+     * The value is, e.g. 160 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getThumbnailHtmlImageThumbnailHeightAsInteger();
+
+    /**
+     * Get the value for the key 'thumbnail.html.image.format'. <br>
+     * The value is, e.g. png <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getThumbnailHtmlImageFormat();
+
     /**
     /**
      * Get the value for the key 'thumbnail.generator.targets'. <br>
      * Get the value for the key 'thumbnail.generator.targets'. <br>
      * The value is, e.g. all <br>
      * The value is, e.g. all <br>
@@ -5447,6 +5593,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return get(FessConfig.INDEX_FIELD_FILENAME);
             return get(FessConfig.INDEX_FIELD_FILENAME);
         }
         }
 
 
+        public String getIndexFieldThumbnail() {
+            return get(FessConfig.INDEX_FIELD_THUMBNAIL);
+        }
+
         public String getResponseFieldContentTitle() {
         public String getResponseFieldContentTitle() {
             return get(FessConfig.RESPONSE_FIELD_content_title);
             return get(FessConfig.RESPONSE_FIELD_content_title);
         }
         }
@@ -6143,6 +6293,66 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return get(FessConfig.THUMBNAIL_HTML_PHANTOMJS_FORMAT);
             return get(FessConfig.THUMBNAIL_HTML_PHANTOMJS_FORMAT);
         }
         }
 
 
+        public String getThumbnailHtmlImageMinWidth() {
+            return get(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_WIDTH);
+        }
+
+        public Integer getThumbnailHtmlImageMinWidthAsInteger() {
+            return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_WIDTH);
+        }
+
+        public String getThumbnailHtmlImageMinHeight() {
+            return get(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_HEIGHT);
+        }
+
+        public Integer getThumbnailHtmlImageMinHeightAsInteger() {
+            return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_HEIGHT);
+        }
+
+        public String getThumbnailHtmlImageMaxAspectRatio() {
+            return get(FessConfig.THUMBNAIL_HTML_IMAGE_MAX_ASPECT_RATIO);
+        }
+
+        public java.math.BigDecimal getThumbnailHtmlImageMaxAspectRatioAsDecimal() {
+            return getAsDecimal(FessConfig.THUMBNAIL_HTML_IMAGE_MAX_ASPECT_RATIO);
+        }
+
+        public String getThumbnailHtmlImageWindowWidth() {
+            return get(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_WIDTH);
+        }
+
+        public Integer getThumbnailHtmlImageWindowWidthAsInteger() {
+            return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_WIDTH);
+        }
+
+        public String getThumbnailHtmlImageWindowHeight() {
+            return get(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_HEIGHT);
+        }
+
+        public Integer getThumbnailHtmlImageWindowHeightAsInteger() {
+            return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_HEIGHT);
+        }
+
+        public String getThumbnailHtmlImageThumbnailWidth() {
+            return get(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH);
+        }
+
+        public Integer getThumbnailHtmlImageThumbnailWidthAsInteger() {
+            return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH);
+        }
+
+        public String getThumbnailHtmlImageThumbnailHeight() {
+            return get(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT);
+        }
+
+        public Integer getThumbnailHtmlImageThumbnailHeightAsInteger() {
+            return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT);
+        }
+
+        public String getThumbnailHtmlImageFormat() {
+            return get(FessConfig.THUMBNAIL_HTML_IMAGE_FORMAT);
+        }
+
         public String getThumbnailGeneratorTargets() {
         public String getThumbnailGeneratorTargets() {
             return get(FessConfig.THUMBNAIL_GENERATOR_TARGETS);
             return get(FessConfig.THUMBNAIL_GENERATOR_TARGETS);
         }
         }

+ 22 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java

@@ -1605,4 +1605,26 @@ public interface FessProp {
         return false;
         return false;
     }
     }
 
 
+    Integer getThumbnailHtmlImageMinWidthAsInteger();
+
+    Integer getThumbnailHtmlImageMinHeightAsInteger();
+
+    java.math.BigDecimal getThumbnailHtmlImageMaxAspectRatioAsDecimal();
+
+    public default boolean validateThumbnailSize(final int width, final int height) {
+        if (width <= 0 || height <= 0) {
+            return false;
+        }
+
+        if (width < getThumbnailHtmlImageMinWidthAsInteger().intValue() || height < getThumbnailHtmlImageMinHeightAsInteger().intValue()) {
+            return false;
+        }
+
+        final float ratio = getThumbnailHtmlImageMaxAspectRatioAsDecimal().floatValue();
+        if (((float) width) / ((float) height) > ratio || ((float) height) / ((float) width) > ratio) {
+            return false;
+        }
+
+        return true;
+    }
 }
 }

+ 4 - 0
src/main/java/org/codelibs/fess/thumbnail/ThumbnailGenerator.java

@@ -18,6 +18,8 @@ package org.codelibs.fess.thumbnail;
 import java.io.File;
 import java.io.File;
 import java.util.Map;
 import java.util.Map;
 
 
+import org.codelibs.core.misc.Tuple3;
+
 public interface ThumbnailGenerator {
 public interface ThumbnailGenerator {
 
 
     String getName();
     String getName();
@@ -29,4 +31,6 @@ public interface ThumbnailGenerator {
     boolean isAvailable();
     boolean isAvailable();
 
 
     void destroy();
     void destroy();
+
+    Tuple3<String, String, String> createTask(String path, Map<String, Object> docMap);
 }
 }

+ 4 - 4
src/main/java/org/codelibs/fess/thumbnail/ThumbnailManager.java

@@ -226,13 +226,13 @@ public class ThumbnailManager {
     }
     }
 
 
     public void offer(final Map<String, Object> docMap) {
     public void offer(final Map<String, Object> docMap) {
-        final FessConfig fessConfig = ComponentUtil.getFessConfig();
         for (final ThumbnailGenerator generator : generatorList) {
         for (final ThumbnailGenerator generator : generatorList) {
             if (generator.isTarget(docMap)) {
             if (generator.isTarget(docMap)) {
-                final String url = DocumentUtil.getValue(docMap, fessConfig.getIndexFieldUrl(), String.class);
                 final String path = getImageFilename(docMap);
                 final String path = getImageFilename(docMap);
-                final Tuple3<String, String, String> task = new Tuple3<>(generator.getName(), url, path);
-                thumbnailTaskQueue.offer(task);
+                final Tuple3<String, String, String> task = generator.createTask(path, docMap);
+                if (task != null) {
+                    thumbnailTaskQueue.offer(task);
+                }
                 break;
                 break;
             }
             }
         }
         }

+ 11 - 0
src/main/java/org/codelibs/fess/thumbnail/impl/BaseThumbnailGenerator.java

@@ -23,7 +23,11 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.List;
 import java.util.Map;
 import java.util.Map;
 
 
+import org.codelibs.core.misc.Tuple3;
+import org.codelibs.fess.mylasta.direction.FessConfig;
 import org.codelibs.fess.thumbnail.ThumbnailGenerator;
 import org.codelibs.fess.thumbnail.ThumbnailGenerator;
+import org.codelibs.fess.util.ComponentUtil;
+import org.codelibs.fess.util.DocumentUtil;
 
 
 public abstract class BaseThumbnailGenerator implements ThumbnailGenerator {
 public abstract class BaseThumbnailGenerator implements ThumbnailGenerator {
 
 
@@ -84,6 +88,13 @@ public abstract class BaseThumbnailGenerator implements ThumbnailGenerator {
         return true;
         return true;
     }
     }
 
 
+    @Override
+    public Tuple3<String, String, String> createTask(final String path, final Map<String, Object> docMap) {
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        final String url = DocumentUtil.getValue(docMap, fessConfig.getIndexFieldUrl(), String.class);
+        return new Tuple3<>(getName(), url, path);
+    }
+
     public void setDirectoryNameLength(final int directoryNameLength) {
     public void setDirectoryNameLength(final int directoryNameLength) {
         this.directoryNameLength = directoryNameLength;
         this.directoryNameLength = directoryNameLength;
     }
     }

+ 121 - 0
src/main/java/org/codelibs/fess/thumbnail/impl/HtmlTagBasedGenerator.java

@@ -0,0 +1,121 @@
+/*
+ * Copyright 2012-2017 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.thumbnail.impl;
+
+import java.awt.Image;
+import java.awt.Rectangle;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+
+import javax.imageio.ImageIO;
+import javax.imageio.ImageReadParam;
+import javax.imageio.ImageReader;
+import javax.imageio.stream.ImageInputStream;
+
+import org.codelibs.core.lang.StringUtil;
+import org.codelibs.core.misc.Tuple3;
+import org.codelibs.elasticsearch.runner.net.Curl;
+import org.codelibs.fess.mylasta.direction.FessConfig;
+import org.codelibs.fess.util.ComponentUtil;
+import org.codelibs.fess.util.DocumentUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HtmlTagBasedGenerator extends BaseThumbnailGenerator {
+
+    private static final Logger logger = LoggerFactory.getLogger(HtmlTagBasedGenerator.class);
+
+    @Override
+    public void destroy() {
+    }
+
+    @Override
+    public Tuple3<String, String, String> createTask(final String path, final Map<String, Object> docMap) {
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        final String url = DocumentUtil.getValue(docMap, fessConfig.getIndexFieldThumbnail(), String.class);
+        if (StringUtil.isBlank(url)) {
+            return null;
+        }
+        return new Tuple3<>(getName(), url, path);
+    }
+
+    @Override
+    public boolean generate(final String url, final File outputFile) {
+        if (logger.isDebugEnabled()) {
+            logger.debug("Generate Thumbnail: " + url);
+        }
+
+        if (outputFile.exists()) {
+            if (logger.isDebugEnabled()) {
+                logger.debug("The thumbnail file exists: " + outputFile.getAbsolutePath());
+            }
+            return true;
+        }
+
+        final File parentFile = outputFile.getParentFile();
+        if (!parentFile.exists()) {
+            parentFile.mkdirs();
+        }
+        if (!parentFile.isDirectory()) {
+            logger.warn("Not found: " + parentFile.getAbsolutePath());
+            return false;
+        }
+
+        Curl.get(url).execute(con -> {
+            try (ImageInputStream input = ImageIO.createImageInputStream(con.getInputStream())) {
+                saveImage(input, outputFile);
+            } catch (final Throwable t) {
+                logger.warn("Failed to convert " + url, t);
+            }
+        });
+
+        return false;
+    }
+
+    protected void saveImage(final ImageInputStream input, final File outputFile) throws IOException {
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        final Iterator<ImageReader> readers = ImageIO.getImageReaders(input);
+        if (readers.hasNext()) {
+            final ImageReader reader = readers.next();
+            try {
+                reader.setInput(input);
+                final ImageReadParam param = reader.getDefaultReadParam();
+                final int width = reader.getWidth(0);
+                final int height = reader.getHeight(0);
+                final int samplingWidth = width / fessConfig.getThumbnailHtmlImageThumbnailWidthAsInteger();
+                final int samplingHeight = height / fessConfig.getThumbnailHtmlImageThumbnailHeightAsInteger();
+                param.setSourceSubsampling(samplingWidth <= 0 ? 1 : samplingWidth, samplingHeight <= 0 ? 1 : samplingHeight, 0, 0);
+                param.setSourceRegion(new Rectangle(width, height > width ? width : height));
+                final BufferedImage image = reader.read(0, param);
+                final int thumbnailWidth = fessConfig.getThumbnailHtmlImageThumbnailWidthAsInteger();
+                final int thumbnailHeight =
+                        (int) (((float) (height > width ? width : height))
+                                * fessConfig.getThumbnailHtmlImageThumbnailWidthAsInteger().floatValue() / (float) width);
+                BufferedImage thumbnail = new BufferedImage(thumbnailWidth, thumbnailHeight, image.getType());
+                thumbnail.getGraphics().drawImage(image.getScaledInstance(thumbnailWidth, thumbnailHeight, Image.SCALE_AREA_AVERAGING), 0,
+                        0, thumbnailWidth, thumbnailHeight, null);
+                ImageIO.write(thumbnail, fessConfig.getThumbnailHtmlImageFormat(), outputFile);
+                image.flush();
+            } finally {
+                reader.dispose();
+            }
+        }
+    }
+
+}

+ 10 - 1
src/main/resources/fess_config.properties

@@ -180,6 +180,7 @@ index.field.site=site
 index.field.content_length=content_length
 index.field.content_length=content_length
 index.field.filetype=filetype
 index.field.filetype=filetype
 index.field.filename=filename
 index.field.filename=filename
+index.field.thumbnail=thumbnail
 response.field.content_title=content_title
 response.field.content_title=content_title
 response.field.content_description=content_description
 response.field.content_description=content_description
 response.field.url_link=url_link
 response.field.url_link=url_link
@@ -375,7 +376,7 @@ paging.search.page.start=0
 paging.search.page.size=20
 paging.search.page.size=20
 paging.search.page.max.size=100
 paging.search.page.max.size=100
 
 
-thumbnail.html.phantomjs.enabled=true
+thumbnail.html.phantomjs.enabled=false
 thumbnail.html.phantomjs.max.height=20000
 thumbnail.html.phantomjs.max.height=20000
 thumbnail.html.phantomjs.keep.alive=600000
 thumbnail.html.phantomjs.keep.alive=600000
 thumbnail.html.phantomjs.window.width=1200
 thumbnail.html.phantomjs.window.width=1200
@@ -383,6 +384,14 @@ thumbnail.html.phantomjs.window.height=800
 thumbnail.html.phantomjs.thumbnail.width=160
 thumbnail.html.phantomjs.thumbnail.width=160
 thumbnail.html.phantomjs.thumbnail.height=160
 thumbnail.html.phantomjs.thumbnail.height=160
 thumbnail.html.phantomjs.format=png
 thumbnail.html.phantomjs.format=png
+thumbnail.html.image.min.width=50
+thumbnail.html.image.min.height=50
+thumbnail.html.image.max.aspect.ratio=3.0
+thumbnail.html.image.window.width=1200
+thumbnail.html.image.window.height=800
+thumbnail.html.image.thumbnail.width=160
+thumbnail.html.image.thumbnail.height=160
+thumbnail.html.image.format=png
 thumbnail.generator.targets=all
 thumbnail.generator.targets=all
 thumbnail.crawler.enabled=false
 thumbnail.crawler.enabled=false
 
 

+ 3 - 0
src/main/resources/fess_indices/fess/doc.json

@@ -532,6 +532,9 @@
         "analyzer": "standard_analyzer",
         "analyzer": "standard_analyzer",
         "term_vector": "with_positions_offsets"
         "term_vector": "with_positions_offsets"
       },
       },
+      "thumbnail": {
+        "type": "keyword"
+      },
       "url": {
       "url": {
         "type": "keyword"
         "type": "keyword"
       }
       }

+ 8 - 1
src/main/resources/fess_thumbnail.xml

@@ -13,6 +13,14 @@
 			<arg>pdfThumbnailGenerator</arg>
 			<arg>pdfThumbnailGenerator</arg>
 		</postConstruct>
 		</postConstruct>
 	</component>
 	</component>
+	<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.HtmlTagBasedGenerator">
+		<property name="name">"htmlThumbnailGenerator"</property>
+		<postConstruct name="addCondition">
+			<arg>"mimetype"</arg>
+			<arg>"text/html"</arg>
+		</postConstruct>
+	</component>
+<!--
 	<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.WebDriverGenerator">
 	<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.WebDriverGenerator">
 		<property name="name">"htmlThumbnailGenerator"</property>
 		<property name="name">"htmlThumbnailGenerator"</property>
 		<property name="generatorList">
 		<property name="generatorList">
@@ -31,7 +39,6 @@
 			<arg>"text/html"</arg>
 			<arg>"text/html"</arg>
 		</postConstruct>
 		</postConstruct>
 	</component>
 	</component>
-<!--
 	<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.CommandGenerator">
 	<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.CommandGenerator">
 		<property name="name">"htmlThumbnailGenerator"</property>
 		<property name="name">"htmlThumbnailGenerator"</property>
 		<property name="commandList">
 		<property name="commandList">

+ 76 - 0
src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java

@@ -642,5 +642,81 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
 
 
         value = transformer.getBaseUrl("https://hoge.com/", "//hoge.com/aaa/");
         value = transformer.getBaseUrl("https://hoge.com/", "//hoge.com/aaa/");
         assertEquals("https://hoge.com/aaa/", value.toExternalForm());
         assertEquals("https://hoge.com/aaa/", value.toExternalForm());
+
+        value = transformer.getBaseUrl("https://hoge.com/", "aaa/");
+        assertEquals("https://hoge.com/aaa/", value.toExternalForm());
+    }
+
+    public void test_getThumbnailUrl_no() throws Exception {
+
+        final FessXpathTransformer transformer = new FessXpathTransformer();
+        final ResponseData responseData = new ResponseData();
+        responseData.setUrl("http://example.com/");
+
+        String data = "<html><body>foo</body></html>";
+        assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
+
+        data = "<img src=\"http://example/foo.jpg\" width=\"x\" height=\"x\">";
+        assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
+
+        data = "<img src=\"http://example/foo.jpg\" width=\"10\" height=\"100\">";
+        assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
+
+        data = "<img src=\"http://example/foo.jpg\" width=\"100\" height=\"10\">";
+        assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
+
+        data = "<img src=\"http://example/foo.jpg\" width=\"400\" height=\"100\">";
+        assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
+
+        data = "<img src=\"http://example/foo.jpg\" width=\"100\" height=\"400\">";
+        assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
+    }
+
+    public void test_getThumbnailUrl() throws Exception {
+        String data = "<meta property=\"og:image\" content=\"http://example/foo.jpg\" />";
+        String expected = "http://example/foo.jpg";
+        assertGetThumbnailUrl(data, expected);
+
+        data = "<meta property=\"og:image\" content=\"://example/foo.jpg\" />";
+        expected = "http://example/foo.jpg";
+        assertGetThumbnailUrl(data, expected);
+
+        data = "<meta property=\"og:image\" content=\"http://example/foo.jpg\" />";
+        expected = "http://example/foo.jpg";
+        assertGetThumbnailUrl(data, expected);
+
+        data = "<meta property=\"og:image\" content=\"/foo.jpg\" />";
+        expected = "http://example.com/foo.jpg";
+        assertGetThumbnailUrl(data, expected);
+
+        data = "<img src=\"http://example/foo.jpg\">";
+        expected = "http://example/foo.jpg";
+        assertGetThumbnailUrl(data, expected);
+
+        data = "<img src=\"http://example/foo.jpg\">" //
+                + "<img src=\"http://example/bar.jpg\">";
+        expected = "http://example/foo.jpg";
+        assertGetThumbnailUrl(data, expected);
+
+        data = "<img src=\"http://example/foo.jpg\">" //
+                + "<img src=\"http://example/bar.jpg\" width=\"100\" height=\"100\">";
+        expected = "http://example/bar.jpg";
+        assertGetThumbnailUrl(data, expected);
+
+        data = "<img src=\"http://example/foo.jpg\" width=\"100\" height=\"100\">";
+        expected = "http://example/foo.jpg";
+        assertGetThumbnailUrl(data, expected);
+    }
+
+    private void assertGetThumbnailUrl(String data, String expected) throws Exception {
+        final Document document = getDocument(data);
+
+        final FessXpathTransformer transformer = new FessXpathTransformer();
+        transformer.init();
+
+        final ResponseData responseData = new ResponseData();
+        responseData.setUrl("http://example.com/");
+
+        assertEquals(expected, transformer.getThumbnailUrl(responseData, document));
     }
     }
 }
 }

+ 99 - 0
src/test/java/org/codelibs/fess/thumbnail/impl/HtmlTagBasedGeneratorTest.java

@@ -0,0 +1,99 @@
+/*
+ * Copyright 2012-2017 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.thumbnail.impl;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+
+import javax.imageio.ImageIO;
+import javax.imageio.stream.ImageInputStream;
+
+import org.codelibs.fess.unit.UnitFessTestCase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HtmlTagBasedGeneratorTest extends UnitFessTestCase {
+    private static final Logger logger = LoggerFactory.getLogger(HtmlTagBasedGeneratorTest.class);
+
+    public void test_saveImage() throws Exception {
+        HtmlTagBasedGenerator generator = new HtmlTagBasedGenerator();
+        ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+        File outputFile = File.createTempFile("generator_", ".png");
+
+        String imagePath = "thumbnail/600x400.png";
+        try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
+            generator.saveImage(input, outputFile);
+        }
+        assertImageSize(outputFile, 160, 106);
+
+        imagePath = "thumbnail/600x400.gif";
+        try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
+            generator.saveImage(input, outputFile);
+        }
+        assertImageSize(outputFile, 160, 106);
+
+        imagePath = "thumbnail/600x400.jpg";
+        try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
+            generator.saveImage(input, outputFile);
+        }
+        assertImageSize(outputFile, 160, 106);
+
+        imagePath = "thumbnail/400x400.png";
+        try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
+            generator.saveImage(input, outputFile);
+        }
+        assertImageSize(outputFile, 160, 160);
+
+        imagePath = "thumbnail/400x400.gif";
+        try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
+            generator.saveImage(input, outputFile);
+        }
+        assertImageSize(outputFile, 160, 160);
+
+        imagePath = "thumbnail/400x400.jpg";
+        try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
+            generator.saveImage(input, outputFile);
+        }
+        assertImageSize(outputFile, 160, 160);
+
+        imagePath = "thumbnail/400x600.png";
+        try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
+            generator.saveImage(input, outputFile);
+        }
+        assertImageSize(outputFile, 160, 160);
+
+        imagePath = "thumbnail/400x600.gif";
+        try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
+            generator.saveImage(input, outputFile);
+        }
+        assertImageSize(outputFile, 160, 160);
+
+        imagePath = "thumbnail/400x600.jpg";
+        try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
+            generator.saveImage(input, outputFile);
+        }
+        assertImageSize(outputFile, 160, 160);
+
+    }
+
+    private void assertImageSize(File file, int width, int height) throws IOException {
+        BufferedImage img = ImageIO.read(file);
+        logger.debug("width: " + img.getWidth() + ", height: " + img.getHeight());
+        assertEquals("Image Width", width, img.getWidth());
+        assertEquals("Image Height", height, img.getHeight());
+    }
+}

BIN=BIN
src/test/resources/thumbnail/400x400.gif


BIN=BIN
src/test/resources/thumbnail/400x400.jpg


BIN=BIN
src/test/resources/thumbnail/400x400.png


BIN=BIN
src/test/resources/thumbnail/400x600.gif


BIN=BIN
src/test/resources/thumbnail/400x600.jpg


BIN=BIN
src/test/resources/thumbnail/400x600.png


BIN=BIN
src/test/resources/thumbnail/600x400.gif


BIN=BIN
src/test/resources/thumbnail/600x400.jpg


BIN=BIN
src/test/resources/thumbnail/600x400.png