fix #1081 add HtmlTagBasedGenerator

This commit is contained in:
Shinsuke Sugaya 2017-06-01 23:38:39 +09:00
parent 0a74cdf7a4
commit 0271b5041b
22 changed files with 666 additions and 19 deletions

View file

@ -68,6 +68,7 @@ import org.cyberneko.html.parsers.DOMParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
@ -75,6 +76,10 @@ import org.xml.sax.InputSource;
public class FessXpathTransformer extends XpathTransformer implements FessTransformer {
private static final Logger logger = LoggerFactory.getLogger(FessXpathTransformer.class);
private static final String META_NAME_THUMBNAIL_CONTENT = "//META[@name=\"thumbnail\" or @name=\"THUMBNAIL\"]/@content";
private static final String META_PROPERTY_OGIMAGE_CONTENT = "//META[@property=\"og:image\"]/@content";
private static final String META_NAME_ROBOTS_CONTENT = "//META[@name=\"robots\" or @name=\"ROBOTS\"]/@content";
private static final String META_ROBOTS_NONE = "none";
@ -360,6 +365,11 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
putResultDataBody(dataMap, fessConfig.getIndexFieldParentId(), crawlingInfoHelper.generateId(dataMap));
putResultDataBody(dataMap, fessConfig.getIndexFieldUrl(), url); // set again
}
// thumbnail
final String thumbnailUrl = getThumbnailUrl(responseData, document);
if (StringUtil.isNotBlank(thumbnailUrl)) {
putResultDataBody(dataMap, fessConfig.getIndexFieldThumbnail(), thumbnailUrl);
}
// from config
final Map<String, String> scriptConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.SCRIPT);
@ -598,16 +608,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
protected URL getBaseUrl(final String currentUrl, final String baseHref) throws MalformedURLException {
if (baseHref != null) {
if (baseHref.startsWith("://")) {
final String protocol = currentUrl.split(":")[0];
return new URL(protocol + baseHref);
} else if (baseHref.startsWith("//")) {
final String protocol = currentUrl.split(":")[0];
return new URL(protocol + ":" + baseHref);
} else if (baseHref.startsWith("/")) {
return new URL(new URL(currentUrl), baseHref);
}
return new URL(baseHref);
return getURL(currentUrl, baseHref);
}
return new URL(currentUrl);
}
@ -687,4 +688,87 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
this.useGoogleOffOn = useGoogleOffOn;
}
protected String getThumbnailUrl(final ResponseData responseData, final Document document) {
// TODO PageMap
try {
// meta thumbnail
final Node thumbnailNode = getXPathAPI().selectSingleNode(document, META_NAME_THUMBNAIL_CONTENT);
if (thumbnailNode != null) {
final URL thumbnailUrl = getURL(responseData.getUrl(), thumbnailNode.getTextContent());
if (thumbnailUrl != null) {
return thumbnailUrl.toExternalForm();
}
}
// meta og:image
final Node ogImageNode = getXPathAPI().selectSingleNode(document, META_PROPERTY_OGIMAGE_CONTENT);
if (ogImageNode != null) {
final URL thumbnailUrl = getURL(responseData.getUrl(), ogImageNode.getTextContent());
if (thumbnailUrl != null) {
return thumbnailUrl.toExternalForm();
}
}
final NodeList imgNodeList = getXPathAPI().selectNodeList(document, "//IMG");
Node firstSrcNode = null;
for (int i = 0; i < imgNodeList.getLength(); i++) {
final Node imgNode = imgNodeList.item(i);
final NamedNodeMap attributes = imgNode.getAttributes();
final Node heightAttr = attributes.getNamedItem("height");
final Node widthAttr = attributes.getNamedItem("width");
if (heightAttr != null && widthAttr != null) {
try {
final int height = Integer.parseInt(heightAttr.getTextContent());
final int width = Integer.parseInt(widthAttr.getTextContent());
if (fessConfig.validateThumbnailSize(width, height)) {
final Node srcNode = attributes.getNamedItem("src");
if (srcNode != null) {
final URL thumbnailUrl = getURL(responseData.getUrl(), srcNode.getTextContent());
if (thumbnailUrl != null) {
return thumbnailUrl.toExternalForm();
}
}
}
} catch (Exception e) {
logger.debug("Failed to parse " + imgNode + " at " + responseData.getUrl(), e);
}
} else if (firstSrcNode == null) {
final Node srcNode = attributes.getNamedItem("src");
if (srcNode != null) {
firstSrcNode = srcNode;
}
}
}
if (firstSrcNode != null) {
try {
final URL thumbnailUrl = getURL(responseData.getUrl(), firstSrcNode.getTextContent());
if (thumbnailUrl != null) {
return thumbnailUrl.toExternalForm();
}
} catch (Exception e) {
logger.debug("Failed to parse " + firstSrcNode + " at " + responseData.getUrl(), e);
}
}
} catch (final Exception e) {
logger.warn("Failed to retrieve thumbnail url from " + responseData.getUrl(), e);
}
return null;
}
protected URL getURL(final String currentUrl, final String url) throws MalformedURLException {
if (url != null) {
if (url.startsWith("://")) {
final String protocol = currentUrl.split(":")[0];
return new URL(protocol + url);
} else if (url.startsWith("//")) {
final String protocol = currentUrl.split(":")[0];
return new URL(protocol + ":" + url);
} else if (url.startsWith("/") || url.indexOf(':') == -1) {
return new URL(new URL(currentUrl), url);
}
return new URL(url);
}
return null;
}
}

View file

@ -148,6 +148,7 @@ public class QueryHelper {
fessConfig.getIndexFieldTitle(), //
fessConfig.getIndexFieldDigest(), //
fessConfig.getIndexFieldUrl(), //
fessConfig.getIndexFieldThumbnail(), //
fessConfig.getIndexFieldClickCount(), //
fessConfig.getIndexFieldFavoriteCount(), //
fessConfig.getIndexFieldConfigId(), //

View file

@ -378,6 +378,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. filename */
String INDEX_FIELD_FILENAME = "index.field.filename";
/** The key of the configuration. e.g. thumbnail */
String INDEX_FIELD_THUMBNAIL = "index.field.thumbnail";
/** The key of the configuration. e.g. content_title */
String RESPONSE_FIELD_content_title = "response.field.content_title";
@ -717,7 +720,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. 100 */
String PAGING_SEARCH_PAGE_MAX_SIZE = "paging.search.page.max.size";
/** The key of the configuration. e.g. true */
/** The key of the configuration. e.g. false */
String THUMBNAIL_HTML_PHANTOMJS_ENABLED = "thumbnail.html.phantomjs.enabled";
/** The key of the configuration. e.g. 20000 */
@ -741,6 +744,30 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. png */
String THUMBNAIL_HTML_PHANTOMJS_FORMAT = "thumbnail.html.phantomjs.format";
/** The key of the configuration. e.g. 50 */
String THUMBNAIL_HTML_IMAGE_MIN_WIDTH = "thumbnail.html.image.min.width";
/** The key of the configuration. e.g. 50 */
String THUMBNAIL_HTML_IMAGE_MIN_HEIGHT = "thumbnail.html.image.min.height";
/** The key of the configuration. e.g. 3.0 */
String THUMBNAIL_HTML_IMAGE_MAX_ASPECT_RATIO = "thumbnail.html.image.max.aspect.ratio";
/** The key of the configuration. e.g. 1200 */
String THUMBNAIL_HTML_IMAGE_WINDOW_WIDTH = "thumbnail.html.image.window.width";
/** The key of the configuration. e.g. 800 */
String THUMBNAIL_HTML_IMAGE_WINDOW_HEIGHT = "thumbnail.html.image.window.height";
/** The key of the configuration. e.g. 160 */
String THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH = "thumbnail.html.image.thumbnail.width";
/** The key of the configuration. e.g. 160 */
String THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT = "thumbnail.html.image.thumbnail.height";
/** The key of the configuration. e.g. png */
String THUMBNAIL_HTML_IMAGE_FORMAT = "thumbnail.html.image.format";
/** The key of the configuration. e.g. all */
String THUMBNAIL_GENERATOR_TARGETS = "thumbnail.generator.targets";
@ -2267,6 +2294,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
String getIndexFieldFilename();
/**
* Get the value for the key 'index.field.thumbnail'. <br>
* The value is, e.g. thumbnail <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexFieldThumbnail();
/**
* Get the value for the key 'response.field.content_title'. <br>
* The value is, e.g. content_title <br>
@ -3516,14 +3550,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/**
* Get the value for the key 'thumbnail.html.phantomjs.enabled'. <br>
* The value is, e.g. true <br>
* The value is, e.g. false <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlPhantomjsEnabled();
/**
* Is the property for the key 'thumbnail.html.phantomjs.enabled' true? <br>
* The value is, e.g. true <br>
* The value is, e.g. false <br>
* @return The determination, true or false. (if not found, exception but basically no way)
*/
boolean isThumbnailHtmlPhantomjsEnabled();
@ -3625,6 +3659,118 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
String getThumbnailHtmlPhantomjsFormat();
/**
* Get the value for the key 'thumbnail.html.image.min.width'. <br>
* The value is, e.g. 50 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageMinWidth();
/**
* Get the value for the key 'thumbnail.html.image.min.width' as {@link Integer}. <br>
* The value is, e.g. 50 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getThumbnailHtmlImageMinWidthAsInteger();
/**
* Get the value for the key 'thumbnail.html.image.min.height'. <br>
* The value is, e.g. 50 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageMinHeight();
/**
* Get the value for the key 'thumbnail.html.image.min.height' as {@link Integer}. <br>
* The value is, e.g. 50 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getThumbnailHtmlImageMinHeightAsInteger();
/**
* Get the value for the key 'thumbnail.html.image.max.aspect.ratio'. <br>
* The value is, e.g. 3.0 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageMaxAspectRatio();
/**
* Get the value for the key 'thumbnail.html.image.max.aspect.ratio' as {@link java.math.BigDecimal}. <br>
* The value is, e.g. 3.0 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not decimal.
*/
java.math.BigDecimal getThumbnailHtmlImageMaxAspectRatioAsDecimal();
/**
* Get the value for the key 'thumbnail.html.image.window.width'. <br>
* The value is, e.g. 1200 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageWindowWidth();
/**
* Get the value for the key 'thumbnail.html.image.window.width' as {@link Integer}. <br>
* The value is, e.g. 1200 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getThumbnailHtmlImageWindowWidthAsInteger();
/**
* Get the value for the key 'thumbnail.html.image.window.height'. <br>
* The value is, e.g. 800 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageWindowHeight();
/**
* Get the value for the key 'thumbnail.html.image.window.height' as {@link Integer}. <br>
* The value is, e.g. 800 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getThumbnailHtmlImageWindowHeightAsInteger();
/**
* Get the value for the key 'thumbnail.html.image.thumbnail.width'. <br>
* The value is, e.g. 160 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageThumbnailWidth();
/**
* Get the value for the key 'thumbnail.html.image.thumbnail.width' as {@link Integer}. <br>
* The value is, e.g. 160 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getThumbnailHtmlImageThumbnailWidthAsInteger();
/**
* Get the value for the key 'thumbnail.html.image.thumbnail.height'. <br>
* The value is, e.g. 160 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageThumbnailHeight();
/**
* Get the value for the key 'thumbnail.html.image.thumbnail.height' as {@link Integer}. <br>
* The value is, e.g. 160 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getThumbnailHtmlImageThumbnailHeightAsInteger();
/**
* Get the value for the key 'thumbnail.html.image.format'. <br>
* The value is, e.g. png <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageFormat();
/**
* Get the value for the key 'thumbnail.generator.targets'. <br>
* The value is, e.g. all <br>
@ -5447,6 +5593,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return get(FessConfig.INDEX_FIELD_FILENAME);
}
public String getIndexFieldThumbnail() {
return get(FessConfig.INDEX_FIELD_THUMBNAIL);
}
public String getResponseFieldContentTitle() {
return get(FessConfig.RESPONSE_FIELD_content_title);
}
@ -6143,6 +6293,66 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return get(FessConfig.THUMBNAIL_HTML_PHANTOMJS_FORMAT);
}
public String getThumbnailHtmlImageMinWidth() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_WIDTH);
}
public Integer getThumbnailHtmlImageMinWidthAsInteger() {
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_WIDTH);
}
public String getThumbnailHtmlImageMinHeight() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_HEIGHT);
}
public Integer getThumbnailHtmlImageMinHeightAsInteger() {
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_HEIGHT);
}
public String getThumbnailHtmlImageMaxAspectRatio() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_MAX_ASPECT_RATIO);
}
public java.math.BigDecimal getThumbnailHtmlImageMaxAspectRatioAsDecimal() {
return getAsDecimal(FessConfig.THUMBNAIL_HTML_IMAGE_MAX_ASPECT_RATIO);
}
public String getThumbnailHtmlImageWindowWidth() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_WIDTH);
}
public Integer getThumbnailHtmlImageWindowWidthAsInteger() {
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_WIDTH);
}
public String getThumbnailHtmlImageWindowHeight() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_HEIGHT);
}
public Integer getThumbnailHtmlImageWindowHeightAsInteger() {
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_HEIGHT);
}
public String getThumbnailHtmlImageThumbnailWidth() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH);
}
public Integer getThumbnailHtmlImageThumbnailWidthAsInteger() {
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH);
}
public String getThumbnailHtmlImageThumbnailHeight() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT);
}
public Integer getThumbnailHtmlImageThumbnailHeightAsInteger() {
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT);
}
public String getThumbnailHtmlImageFormat() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_FORMAT);
}
public String getThumbnailGeneratorTargets() {
return get(FessConfig.THUMBNAIL_GENERATOR_TARGETS);
}

View file

@ -1605,4 +1605,26 @@ public interface FessProp {
return false;
}
Integer getThumbnailHtmlImageMinWidthAsInteger();
Integer getThumbnailHtmlImageMinHeightAsInteger();
java.math.BigDecimal getThumbnailHtmlImageMaxAspectRatioAsDecimal();
public default boolean validateThumbnailSize(final int width, final int height) {
if (width <= 0 || height <= 0) {
return false;
}
if (width < getThumbnailHtmlImageMinWidthAsInteger().intValue() || height < getThumbnailHtmlImageMinHeightAsInteger().intValue()) {
return false;
}
final float ratio = getThumbnailHtmlImageMaxAspectRatioAsDecimal().floatValue();
if (((float) width) / ((float) height) > ratio || ((float) height) / ((float) width) > ratio) {
return false;
}
return true;
}
}

View file

@ -18,6 +18,8 @@ package org.codelibs.fess.thumbnail;
import java.io.File;
import java.util.Map;
import org.codelibs.core.misc.Tuple3;
public interface ThumbnailGenerator {
String getName();
@ -29,4 +31,6 @@ public interface ThumbnailGenerator {
boolean isAvailable();
void destroy();
Tuple3<String, String, String> createTask(String path, Map<String, Object> docMap);
}

View file

@ -226,13 +226,13 @@ public class ThumbnailManager {
}
public void offer(final Map<String, Object> docMap) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
for (final ThumbnailGenerator generator : generatorList) {
if (generator.isTarget(docMap)) {
final String url = DocumentUtil.getValue(docMap, fessConfig.getIndexFieldUrl(), String.class);
final String path = getImageFilename(docMap);
final Tuple3<String, String, String> task = new Tuple3<>(generator.getName(), url, path);
thumbnailTaskQueue.offer(task);
final Tuple3<String, String, String> task = generator.createTask(path, docMap);
if (task != null) {
thumbnailTaskQueue.offer(task);
}
break;
}
}

View file

@ -23,7 +23,11 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.codelibs.core.misc.Tuple3;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.thumbnail.ThumbnailGenerator;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.DocumentUtil;
public abstract class BaseThumbnailGenerator implements ThumbnailGenerator {
@ -84,6 +88,13 @@ public abstract class BaseThumbnailGenerator implements ThumbnailGenerator {
return true;
}
@Override
public Tuple3<String, String, String> createTask(final String path, final Map<String, Object> docMap) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final String url = DocumentUtil.getValue(docMap, fessConfig.getIndexFieldUrl(), String.class);
return new Tuple3<>(getName(), url, path);
}
public void setDirectoryNameLength(final int directoryNameLength) {
this.directoryNameLength = directoryNameLength;
}

View file

@ -0,0 +1,121 @@
/*
* Copyright 2012-2017 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.thumbnail.impl;
import java.awt.Image;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import javax.imageio.ImageIO;
import javax.imageio.ImageReadParam;
import javax.imageio.ImageReader;
import javax.imageio.stream.ImageInputStream;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.misc.Tuple3;
import org.codelibs.elasticsearch.runner.net.Curl;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.DocumentUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HtmlTagBasedGenerator extends BaseThumbnailGenerator {
private static final Logger logger = LoggerFactory.getLogger(HtmlTagBasedGenerator.class);
@Override
public void destroy() {
}
@Override
public Tuple3<String, String, String> createTask(final String path, final Map<String, Object> docMap) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final String url = DocumentUtil.getValue(docMap, fessConfig.getIndexFieldThumbnail(), String.class);
if (StringUtil.isBlank(url)) {
return null;
}
return new Tuple3<>(getName(), url, path);
}
@Override
public boolean generate(final String url, final File outputFile) {
if (logger.isDebugEnabled()) {
logger.debug("Generate Thumbnail: " + url);
}
if (outputFile.exists()) {
if (logger.isDebugEnabled()) {
logger.debug("The thumbnail file exists: " + outputFile.getAbsolutePath());
}
return true;
}
final File parentFile = outputFile.getParentFile();
if (!parentFile.exists()) {
parentFile.mkdirs();
}
if (!parentFile.isDirectory()) {
logger.warn("Not found: " + parentFile.getAbsolutePath());
return false;
}
Curl.get(url).execute(con -> {
try (ImageInputStream input = ImageIO.createImageInputStream(con.getInputStream())) {
saveImage(input, outputFile);
} catch (final Throwable t) {
logger.warn("Failed to convert " + url, t);
}
});
return false;
}
protected void saveImage(final ImageInputStream input, final File outputFile) throws IOException {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final Iterator<ImageReader> readers = ImageIO.getImageReaders(input);
if (readers.hasNext()) {
final ImageReader reader = readers.next();
try {
reader.setInput(input);
final ImageReadParam param = reader.getDefaultReadParam();
final int width = reader.getWidth(0);
final int height = reader.getHeight(0);
final int samplingWidth = width / fessConfig.getThumbnailHtmlImageThumbnailWidthAsInteger();
final int samplingHeight = height / fessConfig.getThumbnailHtmlImageThumbnailHeightAsInteger();
param.setSourceSubsampling(samplingWidth <= 0 ? 1 : samplingWidth, samplingHeight <= 0 ? 1 : samplingHeight, 0, 0);
param.setSourceRegion(new Rectangle(width, height > width ? width : height));
final BufferedImage image = reader.read(0, param);
final int thumbnailWidth = fessConfig.getThumbnailHtmlImageThumbnailWidthAsInteger();
final int thumbnailHeight =
(int) (((float) (height > width ? width : height))
* fessConfig.getThumbnailHtmlImageThumbnailWidthAsInteger().floatValue() / (float) width);
BufferedImage thumbnail = new BufferedImage(thumbnailWidth, thumbnailHeight, image.getType());
thumbnail.getGraphics().drawImage(image.getScaledInstance(thumbnailWidth, thumbnailHeight, Image.SCALE_AREA_AVERAGING), 0,
0, thumbnailWidth, thumbnailHeight, null);
ImageIO.write(thumbnail, fessConfig.getThumbnailHtmlImageFormat(), outputFile);
image.flush();
} finally {
reader.dispose();
}
}
}
}

View file

@ -180,6 +180,7 @@ index.field.site=site
index.field.content_length=content_length
index.field.filetype=filetype
index.field.filename=filename
index.field.thumbnail=thumbnail
response.field.content_title=content_title
response.field.content_description=content_description
response.field.url_link=url_link
@ -375,7 +376,7 @@ paging.search.page.start=0
paging.search.page.size=20
paging.search.page.max.size=100
thumbnail.html.phantomjs.enabled=true
thumbnail.html.phantomjs.enabled=false
thumbnail.html.phantomjs.max.height=20000
thumbnail.html.phantomjs.keep.alive=600000
thumbnail.html.phantomjs.window.width=1200
@ -383,6 +384,14 @@ thumbnail.html.phantomjs.window.height=800
thumbnail.html.phantomjs.thumbnail.width=160
thumbnail.html.phantomjs.thumbnail.height=160
thumbnail.html.phantomjs.format=png
thumbnail.html.image.min.width=50
thumbnail.html.image.min.height=50
thumbnail.html.image.max.aspect.ratio=3.0
thumbnail.html.image.window.width=1200
thumbnail.html.image.window.height=800
thumbnail.html.image.thumbnail.width=160
thumbnail.html.image.thumbnail.height=160
thumbnail.html.image.format=png
thumbnail.generator.targets=all
thumbnail.crawler.enabled=false

View file

@ -532,6 +532,9 @@
"analyzer": "standard_analyzer",
"term_vector": "with_positions_offsets"
},
"thumbnail": {
"type": "keyword"
},
"url": {
"type": "keyword"
}

View file

@ -13,6 +13,14 @@
<arg>pdfThumbnailGenerator</arg>
</postConstruct>
</component>
<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.HtmlTagBasedGenerator">
<property name="name">"htmlThumbnailGenerator"</property>
<postConstruct name="addCondition">
<arg>"mimetype"</arg>
<arg>"text/html"</arg>
</postConstruct>
</component>
<!--
<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.WebDriverGenerator">
<property name="name">"htmlThumbnailGenerator"</property>
<property name="generatorList">
@ -31,7 +39,6 @@
<arg>"text/html"</arg>
</postConstruct>
</component>
<!--
<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.CommandGenerator">
<property name="name">"htmlThumbnailGenerator"</property>
<property name="commandList">

View file

@ -642,5 +642,81 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
value = transformer.getBaseUrl("https://hoge.com/", "//hoge.com/aaa/");
assertEquals("https://hoge.com/aaa/", value.toExternalForm());
value = transformer.getBaseUrl("https://hoge.com/", "aaa/");
assertEquals("https://hoge.com/aaa/", value.toExternalForm());
}
public void test_getThumbnailUrl_no() throws Exception {
final FessXpathTransformer transformer = new FessXpathTransformer();
final ResponseData responseData = new ResponseData();
responseData.setUrl("http://example.com/");
String data = "<html><body>foo</body></html>";
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
data = "<img src=\"http://example/foo.jpg\" width=\"x\" height=\"x\">";
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
data = "<img src=\"http://example/foo.jpg\" width=\"10\" height=\"100\">";
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
data = "<img src=\"http://example/foo.jpg\" width=\"100\" height=\"10\">";
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
data = "<img src=\"http://example/foo.jpg\" width=\"400\" height=\"100\">";
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
data = "<img src=\"http://example/foo.jpg\" width=\"100\" height=\"400\">";
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
}
public void test_getThumbnailUrl() throws Exception {
String data = "<meta property=\"og:image\" content=\"http://example/foo.jpg\" />";
String expected = "http://example/foo.jpg";
assertGetThumbnailUrl(data, expected);
data = "<meta property=\"og:image\" content=\"://example/foo.jpg\" />";
expected = "http://example/foo.jpg";
assertGetThumbnailUrl(data, expected);
data = "<meta property=\"og:image\" content=\"http://example/foo.jpg\" />";
expected = "http://example/foo.jpg";
assertGetThumbnailUrl(data, expected);
data = "<meta property=\"og:image\" content=\"/foo.jpg\" />";
expected = "http://example.com/foo.jpg";
assertGetThumbnailUrl(data, expected);
data = "<img src=\"http://example/foo.jpg\">";
expected = "http://example/foo.jpg";
assertGetThumbnailUrl(data, expected);
data = "<img src=\"http://example/foo.jpg\">" //
+ "<img src=\"http://example/bar.jpg\">";
expected = "http://example/foo.jpg";
assertGetThumbnailUrl(data, expected);
data = "<img src=\"http://example/foo.jpg\">" //
+ "<img src=\"http://example/bar.jpg\" width=\"100\" height=\"100\">";
expected = "http://example/bar.jpg";
assertGetThumbnailUrl(data, expected);
data = "<img src=\"http://example/foo.jpg\" width=\"100\" height=\"100\">";
expected = "http://example/foo.jpg";
assertGetThumbnailUrl(data, expected);
}
private void assertGetThumbnailUrl(String data, String expected) throws Exception {
final Document document = getDocument(data);
final FessXpathTransformer transformer = new FessXpathTransformer();
transformer.init();
final ResponseData responseData = new ResponseData();
responseData.setUrl("http://example.com/");
assertEquals(expected, transformer.getThumbnailUrl(responseData, document));
}
}

View file

@ -0,0 +1,99 @@
/*
* Copyright 2012-2017 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.thumbnail.impl;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import org.codelibs.fess.unit.UnitFessTestCase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HtmlTagBasedGeneratorTest extends UnitFessTestCase {
private static final Logger logger = LoggerFactory.getLogger(HtmlTagBasedGeneratorTest.class);
public void test_saveImage() throws Exception {
HtmlTagBasedGenerator generator = new HtmlTagBasedGenerator();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
File outputFile = File.createTempFile("generator_", ".png");
String imagePath = "thumbnail/600x400.png";
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
generator.saveImage(input, outputFile);
}
assertImageSize(outputFile, 160, 106);
imagePath = "thumbnail/600x400.gif";
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
generator.saveImage(input, outputFile);
}
assertImageSize(outputFile, 160, 106);
imagePath = "thumbnail/600x400.jpg";
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
generator.saveImage(input, outputFile);
}
assertImageSize(outputFile, 160, 106);
imagePath = "thumbnail/400x400.png";
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
generator.saveImage(input, outputFile);
}
assertImageSize(outputFile, 160, 160);
imagePath = "thumbnail/400x400.gif";
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
generator.saveImage(input, outputFile);
}
assertImageSize(outputFile, 160, 160);
imagePath = "thumbnail/400x400.jpg";
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
generator.saveImage(input, outputFile);
}
assertImageSize(outputFile, 160, 160);
imagePath = "thumbnail/400x600.png";
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
generator.saveImage(input, outputFile);
}
assertImageSize(outputFile, 160, 160);
imagePath = "thumbnail/400x600.gif";
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
generator.saveImage(input, outputFile);
}
assertImageSize(outputFile, 160, 160);
imagePath = "thumbnail/400x600.jpg";
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
generator.saveImage(input, outputFile);
}
assertImageSize(outputFile, 160, 160);
}
private void assertImageSize(File file, int width, int height) throws IOException {
BufferedImage img = ImageIO.read(file);
logger.debug("width: " + img.getWidth() + ", height: " + img.getHeight());
assertEquals("Image Width", width, img.getWidth());
assertEquals("Image Height", height, img.getHeight());
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB