fix #1081 add HtmlTagBasedGenerator
|
@ -68,6 +68,7 @@ import org.cyberneko.html.parsers.DOMParser;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.NamedNodeMap;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.InputSource;
|
||||
|
@ -75,6 +76,10 @@ import org.xml.sax.InputSource;
|
|||
public class FessXpathTransformer extends XpathTransformer implements FessTransformer {
|
||||
private static final Logger logger = LoggerFactory.getLogger(FessXpathTransformer.class);
|
||||
|
||||
private static final String META_NAME_THUMBNAIL_CONTENT = "//META[@name=\"thumbnail\" or @name=\"THUMBNAIL\"]/@content";
|
||||
|
||||
private static final String META_PROPERTY_OGIMAGE_CONTENT = "//META[@property=\"og:image\"]/@content";
|
||||
|
||||
private static final String META_NAME_ROBOTS_CONTENT = "//META[@name=\"robots\" or @name=\"ROBOTS\"]/@content";
|
||||
|
||||
private static final String META_ROBOTS_NONE = "none";
|
||||
|
@ -360,6 +365,11 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
putResultDataBody(dataMap, fessConfig.getIndexFieldParentId(), crawlingInfoHelper.generateId(dataMap));
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldUrl(), url); // set again
|
||||
}
|
||||
// thumbnail
|
||||
final String thumbnailUrl = getThumbnailUrl(responseData, document);
|
||||
if (StringUtil.isNotBlank(thumbnailUrl)) {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldThumbnail(), thumbnailUrl);
|
||||
}
|
||||
|
||||
// from config
|
||||
final Map<String, String> scriptConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.SCRIPT);
|
||||
|
@ -598,16 +608,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
|
||||
protected URL getBaseUrl(final String currentUrl, final String baseHref) throws MalformedURLException {
|
||||
if (baseHref != null) {
|
||||
if (baseHref.startsWith("://")) {
|
||||
final String protocol = currentUrl.split(":")[0];
|
||||
return new URL(protocol + baseHref);
|
||||
} else if (baseHref.startsWith("//")) {
|
||||
final String protocol = currentUrl.split(":")[0];
|
||||
return new URL(protocol + ":" + baseHref);
|
||||
} else if (baseHref.startsWith("/")) {
|
||||
return new URL(new URL(currentUrl), baseHref);
|
||||
}
|
||||
return new URL(baseHref);
|
||||
return getURL(currentUrl, baseHref);
|
||||
}
|
||||
return new URL(currentUrl);
|
||||
}
|
||||
|
@ -687,4 +688,87 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
this.useGoogleOffOn = useGoogleOffOn;
|
||||
}
|
||||
|
||||
protected String getThumbnailUrl(final ResponseData responseData, final Document document) {
|
||||
// TODO PageMap
|
||||
try {
|
||||
// meta thumbnail
|
||||
final Node thumbnailNode = getXPathAPI().selectSingleNode(document, META_NAME_THUMBNAIL_CONTENT);
|
||||
if (thumbnailNode != null) {
|
||||
final URL thumbnailUrl = getURL(responseData.getUrl(), thumbnailNode.getTextContent());
|
||||
if (thumbnailUrl != null) {
|
||||
return thumbnailUrl.toExternalForm();
|
||||
}
|
||||
}
|
||||
|
||||
// meta og:image
|
||||
final Node ogImageNode = getXPathAPI().selectSingleNode(document, META_PROPERTY_OGIMAGE_CONTENT);
|
||||
if (ogImageNode != null) {
|
||||
final URL thumbnailUrl = getURL(responseData.getUrl(), ogImageNode.getTextContent());
|
||||
if (thumbnailUrl != null) {
|
||||
return thumbnailUrl.toExternalForm();
|
||||
}
|
||||
}
|
||||
|
||||
final NodeList imgNodeList = getXPathAPI().selectNodeList(document, "//IMG");
|
||||
Node firstSrcNode = null;
|
||||
for (int i = 0; i < imgNodeList.getLength(); i++) {
|
||||
final Node imgNode = imgNodeList.item(i);
|
||||
final NamedNodeMap attributes = imgNode.getAttributes();
|
||||
final Node heightAttr = attributes.getNamedItem("height");
|
||||
final Node widthAttr = attributes.getNamedItem("width");
|
||||
if (heightAttr != null && widthAttr != null) {
|
||||
try {
|
||||
final int height = Integer.parseInt(heightAttr.getTextContent());
|
||||
final int width = Integer.parseInt(widthAttr.getTextContent());
|
||||
if (fessConfig.validateThumbnailSize(width, height)) {
|
||||
final Node srcNode = attributes.getNamedItem("src");
|
||||
if (srcNode != null) {
|
||||
final URL thumbnailUrl = getURL(responseData.getUrl(), srcNode.getTextContent());
|
||||
if (thumbnailUrl != null) {
|
||||
return thumbnailUrl.toExternalForm();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.debug("Failed to parse " + imgNode + " at " + responseData.getUrl(), e);
|
||||
}
|
||||
} else if (firstSrcNode == null) {
|
||||
final Node srcNode = attributes.getNamedItem("src");
|
||||
if (srcNode != null) {
|
||||
firstSrcNode = srcNode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (firstSrcNode != null) {
|
||||
try {
|
||||
final URL thumbnailUrl = getURL(responseData.getUrl(), firstSrcNode.getTextContent());
|
||||
if (thumbnailUrl != null) {
|
||||
return thumbnailUrl.toExternalForm();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.debug("Failed to parse " + firstSrcNode + " at " + responseData.getUrl(), e);
|
||||
}
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Failed to retrieve thumbnail url from " + responseData.getUrl(), e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
protected URL getURL(final String currentUrl, final String url) throws MalformedURLException {
|
||||
if (url != null) {
|
||||
if (url.startsWith("://")) {
|
||||
final String protocol = currentUrl.split(":")[0];
|
||||
return new URL(protocol + url);
|
||||
} else if (url.startsWith("//")) {
|
||||
final String protocol = currentUrl.split(":")[0];
|
||||
return new URL(protocol + ":" + url);
|
||||
} else if (url.startsWith("/") || url.indexOf(':') == -1) {
|
||||
return new URL(new URL(currentUrl), url);
|
||||
}
|
||||
return new URL(url);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -148,6 +148,7 @@ public class QueryHelper {
|
|||
fessConfig.getIndexFieldTitle(), //
|
||||
fessConfig.getIndexFieldDigest(), //
|
||||
fessConfig.getIndexFieldUrl(), //
|
||||
fessConfig.getIndexFieldThumbnail(), //
|
||||
fessConfig.getIndexFieldClickCount(), //
|
||||
fessConfig.getIndexFieldFavoriteCount(), //
|
||||
fessConfig.getIndexFieldConfigId(), //
|
||||
|
|
|
@ -378,6 +378,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. filename */
|
||||
String INDEX_FIELD_FILENAME = "index.field.filename";
|
||||
|
||||
/** The key of the configuration. e.g. thumbnail */
|
||||
String INDEX_FIELD_THUMBNAIL = "index.field.thumbnail";
|
||||
|
||||
/** The key of the configuration. e.g. content_title */
|
||||
String RESPONSE_FIELD_content_title = "response.field.content_title";
|
||||
|
||||
|
@ -717,7 +720,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. 100 */
|
||||
String PAGING_SEARCH_PAGE_MAX_SIZE = "paging.search.page.max.size";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
/** The key of the configuration. e.g. false */
|
||||
String THUMBNAIL_HTML_PHANTOMJS_ENABLED = "thumbnail.html.phantomjs.enabled";
|
||||
|
||||
/** The key of the configuration. e.g. 20000 */
|
||||
|
@ -741,6 +744,30 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. png */
|
||||
String THUMBNAIL_HTML_PHANTOMJS_FORMAT = "thumbnail.html.phantomjs.format";
|
||||
|
||||
/** The key of the configuration. e.g. 50 */
|
||||
String THUMBNAIL_HTML_IMAGE_MIN_WIDTH = "thumbnail.html.image.min.width";
|
||||
|
||||
/** The key of the configuration. e.g. 50 */
|
||||
String THUMBNAIL_HTML_IMAGE_MIN_HEIGHT = "thumbnail.html.image.min.height";
|
||||
|
||||
/** The key of the configuration. e.g. 3.0 */
|
||||
String THUMBNAIL_HTML_IMAGE_MAX_ASPECT_RATIO = "thumbnail.html.image.max.aspect.ratio";
|
||||
|
||||
/** The key of the configuration. e.g. 1200 */
|
||||
String THUMBNAIL_HTML_IMAGE_WINDOW_WIDTH = "thumbnail.html.image.window.width";
|
||||
|
||||
/** The key of the configuration. e.g. 800 */
|
||||
String THUMBNAIL_HTML_IMAGE_WINDOW_HEIGHT = "thumbnail.html.image.window.height";
|
||||
|
||||
/** The key of the configuration. e.g. 160 */
|
||||
String THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH = "thumbnail.html.image.thumbnail.width";
|
||||
|
||||
/** The key of the configuration. e.g. 160 */
|
||||
String THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT = "thumbnail.html.image.thumbnail.height";
|
||||
|
||||
/** The key of the configuration. e.g. png */
|
||||
String THUMBNAIL_HTML_IMAGE_FORMAT = "thumbnail.html.image.format";
|
||||
|
||||
/** The key of the configuration. e.g. all */
|
||||
String THUMBNAIL_GENERATOR_TARGETS = "thumbnail.generator.targets";
|
||||
|
||||
|
@ -2267,6 +2294,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
String getIndexFieldFilename();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'index.field.thumbnail'. <br>
|
||||
* The value is, e.g. thumbnail <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexFieldThumbnail();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'response.field.content_title'. <br>
|
||||
* The value is, e.g. content_title <br>
|
||||
|
@ -3516,14 +3550,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.phantomjs.enabled'. <br>
|
||||
* The value is, e.g. true <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlPhantomjsEnabled();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'thumbnail.html.phantomjs.enabled' true? <br>
|
||||
* The value is, e.g. true <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isThumbnailHtmlPhantomjsEnabled();
|
||||
|
@ -3625,6 +3659,118 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
String getThumbnailHtmlPhantomjsFormat();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.min.width'. <br>
|
||||
* The value is, e.g. 50 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageMinWidth();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.min.width' as {@link Integer}. <br>
|
||||
* The value is, e.g. 50 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getThumbnailHtmlImageMinWidthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.min.height'. <br>
|
||||
* The value is, e.g. 50 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageMinHeight();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.min.height' as {@link Integer}. <br>
|
||||
* The value is, e.g. 50 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getThumbnailHtmlImageMinHeightAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.max.aspect.ratio'. <br>
|
||||
* The value is, e.g. 3.0 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageMaxAspectRatio();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.max.aspect.ratio' as {@link java.math.BigDecimal}. <br>
|
||||
* The value is, e.g. 3.0 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not decimal.
|
||||
*/
|
||||
java.math.BigDecimal getThumbnailHtmlImageMaxAspectRatioAsDecimal();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.window.width'. <br>
|
||||
* The value is, e.g. 1200 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageWindowWidth();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.window.width' as {@link Integer}. <br>
|
||||
* The value is, e.g. 1200 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getThumbnailHtmlImageWindowWidthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.window.height'. <br>
|
||||
* The value is, e.g. 800 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageWindowHeight();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.window.height' as {@link Integer}. <br>
|
||||
* The value is, e.g. 800 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getThumbnailHtmlImageWindowHeightAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.thumbnail.width'. <br>
|
||||
* The value is, e.g. 160 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageThumbnailWidth();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.thumbnail.width' as {@link Integer}. <br>
|
||||
* The value is, e.g. 160 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getThumbnailHtmlImageThumbnailWidthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.thumbnail.height'. <br>
|
||||
* The value is, e.g. 160 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageThumbnailHeight();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.thumbnail.height' as {@link Integer}. <br>
|
||||
* The value is, e.g. 160 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getThumbnailHtmlImageThumbnailHeightAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.format'. <br>
|
||||
* The value is, e.g. png <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageFormat();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.generator.targets'. <br>
|
||||
* The value is, e.g. all <br>
|
||||
|
@ -5447,6 +5593,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return get(FessConfig.INDEX_FIELD_FILENAME);
|
||||
}
|
||||
|
||||
public String getIndexFieldThumbnail() {
|
||||
return get(FessConfig.INDEX_FIELD_THUMBNAIL);
|
||||
}
|
||||
|
||||
public String getResponseFieldContentTitle() {
|
||||
return get(FessConfig.RESPONSE_FIELD_content_title);
|
||||
}
|
||||
|
@ -6143,6 +6293,66 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return get(FessConfig.THUMBNAIL_HTML_PHANTOMJS_FORMAT);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageMinWidth() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_WIDTH);
|
||||
}
|
||||
|
||||
public Integer getThumbnailHtmlImageMinWidthAsInteger() {
|
||||
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_WIDTH);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageMinHeight() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_HEIGHT);
|
||||
}
|
||||
|
||||
public Integer getThumbnailHtmlImageMinHeightAsInteger() {
|
||||
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_MIN_HEIGHT);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageMaxAspectRatio() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_MAX_ASPECT_RATIO);
|
||||
}
|
||||
|
||||
public java.math.BigDecimal getThumbnailHtmlImageMaxAspectRatioAsDecimal() {
|
||||
return getAsDecimal(FessConfig.THUMBNAIL_HTML_IMAGE_MAX_ASPECT_RATIO);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageWindowWidth() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_WIDTH);
|
||||
}
|
||||
|
||||
public Integer getThumbnailHtmlImageWindowWidthAsInteger() {
|
||||
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_WIDTH);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageWindowHeight() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_HEIGHT);
|
||||
}
|
||||
|
||||
public Integer getThumbnailHtmlImageWindowHeightAsInteger() {
|
||||
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_WINDOW_HEIGHT);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageThumbnailWidth() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH);
|
||||
}
|
||||
|
||||
public Integer getThumbnailHtmlImageThumbnailWidthAsInteger() {
|
||||
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageThumbnailHeight() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT);
|
||||
}
|
||||
|
||||
public Integer getThumbnailHtmlImageThumbnailHeightAsInteger() {
|
||||
return getAsInteger(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageFormat() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_FORMAT);
|
||||
}
|
||||
|
||||
public String getThumbnailGeneratorTargets() {
|
||||
return get(FessConfig.THUMBNAIL_GENERATOR_TARGETS);
|
||||
}
|
||||
|
|
|
@ -1605,4 +1605,26 @@ public interface FessProp {
|
|||
return false;
|
||||
}
|
||||
|
||||
Integer getThumbnailHtmlImageMinWidthAsInteger();
|
||||
|
||||
Integer getThumbnailHtmlImageMinHeightAsInteger();
|
||||
|
||||
java.math.BigDecimal getThumbnailHtmlImageMaxAspectRatioAsDecimal();
|
||||
|
||||
public default boolean validateThumbnailSize(final int width, final int height) {
|
||||
if (width <= 0 || height <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (width < getThumbnailHtmlImageMinWidthAsInteger().intValue() || height < getThumbnailHtmlImageMinHeightAsInteger().intValue()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final float ratio = getThumbnailHtmlImageMaxAspectRatioAsDecimal().floatValue();
|
||||
if (((float) width) / ((float) height) > ratio || ((float) height) / ((float) width) > ratio) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.codelibs.fess.thumbnail;
|
|||
import java.io.File;
|
||||
import java.util.Map;
|
||||
|
||||
import org.codelibs.core.misc.Tuple3;
|
||||
|
||||
public interface ThumbnailGenerator {
|
||||
|
||||
String getName();
|
||||
|
@ -29,4 +31,6 @@ public interface ThumbnailGenerator {
|
|||
boolean isAvailable();
|
||||
|
||||
void destroy();
|
||||
|
||||
Tuple3<String, String, String> createTask(String path, Map<String, Object> docMap);
|
||||
}
|
||||
|
|
|
@ -226,13 +226,13 @@ public class ThumbnailManager {
|
|||
}
|
||||
|
||||
public void offer(final Map<String, Object> docMap) {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
for (final ThumbnailGenerator generator : generatorList) {
|
||||
if (generator.isTarget(docMap)) {
|
||||
final String url = DocumentUtil.getValue(docMap, fessConfig.getIndexFieldUrl(), String.class);
|
||||
final String path = getImageFilename(docMap);
|
||||
final Tuple3<String, String, String> task = new Tuple3<>(generator.getName(), url, path);
|
||||
thumbnailTaskQueue.offer(task);
|
||||
final Tuple3<String, String, String> task = generator.createTask(path, docMap);
|
||||
if (task != null) {
|
||||
thumbnailTaskQueue.offer(task);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,11 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.codelibs.core.misc.Tuple3;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.thumbnail.ThumbnailGenerator;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.fess.util.DocumentUtil;
|
||||
|
||||
public abstract class BaseThumbnailGenerator implements ThumbnailGenerator {
|
||||
|
||||
|
@ -84,6 +88,13 @@ public abstract class BaseThumbnailGenerator implements ThumbnailGenerator {
|
|||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tuple3<String, String, String> createTask(final String path, final Map<String, Object> docMap) {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final String url = DocumentUtil.getValue(docMap, fessConfig.getIndexFieldUrl(), String.class);
|
||||
return new Tuple3<>(getName(), url, path);
|
||||
}
|
||||
|
||||
public void setDirectoryNameLength(final int directoryNameLength) {
|
||||
this.directoryNameLength = directoryNameLength;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* Copyright 2012-2017 CodeLibs Project and the Others.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific language
|
||||
* governing permissions and limitations under the License.
|
||||
*/
|
||||
package org.codelibs.fess.thumbnail.impl;
|
||||
|
||||
import java.awt.Image;
|
||||
import java.awt.Rectangle;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import javax.imageio.ImageReadParam;
|
||||
import javax.imageio.ImageReader;
|
||||
import javax.imageio.stream.ImageInputStream;
|
||||
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.core.misc.Tuple3;
|
||||
import org.codelibs.elasticsearch.runner.net.Curl;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.fess.util.DocumentUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class HtmlTagBasedGenerator extends BaseThumbnailGenerator {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(HtmlTagBasedGenerator.class);
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tuple3<String, String, String> createTask(final String path, final Map<String, Object> docMap) {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final String url = DocumentUtil.getValue(docMap, fessConfig.getIndexFieldThumbnail(), String.class);
|
||||
if (StringUtil.isBlank(url)) {
|
||||
return null;
|
||||
}
|
||||
return new Tuple3<>(getName(), url, path);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean generate(final String url, final File outputFile) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Generate Thumbnail: " + url);
|
||||
}
|
||||
|
||||
if (outputFile.exists()) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("The thumbnail file exists: " + outputFile.getAbsolutePath());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
final File parentFile = outputFile.getParentFile();
|
||||
if (!parentFile.exists()) {
|
||||
parentFile.mkdirs();
|
||||
}
|
||||
if (!parentFile.isDirectory()) {
|
||||
logger.warn("Not found: " + parentFile.getAbsolutePath());
|
||||
return false;
|
||||
}
|
||||
|
||||
Curl.get(url).execute(con -> {
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(con.getInputStream())) {
|
||||
saveImage(input, outputFile);
|
||||
} catch (final Throwable t) {
|
||||
logger.warn("Failed to convert " + url, t);
|
||||
}
|
||||
});
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
protected void saveImage(final ImageInputStream input, final File outputFile) throws IOException {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final Iterator<ImageReader> readers = ImageIO.getImageReaders(input);
|
||||
if (readers.hasNext()) {
|
||||
final ImageReader reader = readers.next();
|
||||
try {
|
||||
reader.setInput(input);
|
||||
final ImageReadParam param = reader.getDefaultReadParam();
|
||||
final int width = reader.getWidth(0);
|
||||
final int height = reader.getHeight(0);
|
||||
final int samplingWidth = width / fessConfig.getThumbnailHtmlImageThumbnailWidthAsInteger();
|
||||
final int samplingHeight = height / fessConfig.getThumbnailHtmlImageThumbnailHeightAsInteger();
|
||||
param.setSourceSubsampling(samplingWidth <= 0 ? 1 : samplingWidth, samplingHeight <= 0 ? 1 : samplingHeight, 0, 0);
|
||||
param.setSourceRegion(new Rectangle(width, height > width ? width : height));
|
||||
final BufferedImage image = reader.read(0, param);
|
||||
final int thumbnailWidth = fessConfig.getThumbnailHtmlImageThumbnailWidthAsInteger();
|
||||
final int thumbnailHeight =
|
||||
(int) (((float) (height > width ? width : height))
|
||||
* fessConfig.getThumbnailHtmlImageThumbnailWidthAsInteger().floatValue() / (float) width);
|
||||
BufferedImage thumbnail = new BufferedImage(thumbnailWidth, thumbnailHeight, image.getType());
|
||||
thumbnail.getGraphics().drawImage(image.getScaledInstance(thumbnailWidth, thumbnailHeight, Image.SCALE_AREA_AVERAGING), 0,
|
||||
0, thumbnailWidth, thumbnailHeight, null);
|
||||
ImageIO.write(thumbnail, fessConfig.getThumbnailHtmlImageFormat(), outputFile);
|
||||
image.flush();
|
||||
} finally {
|
||||
reader.dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -180,6 +180,7 @@ index.field.site=site
|
|||
index.field.content_length=content_length
|
||||
index.field.filetype=filetype
|
||||
index.field.filename=filename
|
||||
index.field.thumbnail=thumbnail
|
||||
response.field.content_title=content_title
|
||||
response.field.content_description=content_description
|
||||
response.field.url_link=url_link
|
||||
|
@ -375,7 +376,7 @@ paging.search.page.start=0
|
|||
paging.search.page.size=20
|
||||
paging.search.page.max.size=100
|
||||
|
||||
thumbnail.html.phantomjs.enabled=true
|
||||
thumbnail.html.phantomjs.enabled=false
|
||||
thumbnail.html.phantomjs.max.height=20000
|
||||
thumbnail.html.phantomjs.keep.alive=600000
|
||||
thumbnail.html.phantomjs.window.width=1200
|
||||
|
@ -383,6 +384,14 @@ thumbnail.html.phantomjs.window.height=800
|
|||
thumbnail.html.phantomjs.thumbnail.width=160
|
||||
thumbnail.html.phantomjs.thumbnail.height=160
|
||||
thumbnail.html.phantomjs.format=png
|
||||
thumbnail.html.image.min.width=50
|
||||
thumbnail.html.image.min.height=50
|
||||
thumbnail.html.image.max.aspect.ratio=3.0
|
||||
thumbnail.html.image.window.width=1200
|
||||
thumbnail.html.image.window.height=800
|
||||
thumbnail.html.image.thumbnail.width=160
|
||||
thumbnail.html.image.thumbnail.height=160
|
||||
thumbnail.html.image.format=png
|
||||
thumbnail.generator.targets=all
|
||||
thumbnail.crawler.enabled=false
|
||||
|
||||
|
|
|
@ -532,6 +532,9 @@
|
|||
"analyzer": "standard_analyzer",
|
||||
"term_vector": "with_positions_offsets"
|
||||
},
|
||||
"thumbnail": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"url": {
|
||||
"type": "keyword"
|
||||
}
|
||||
|
|
|
@ -13,6 +13,14 @@
|
|||
<arg>pdfThumbnailGenerator</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.HtmlTagBasedGenerator">
|
||||
<property name="name">"htmlThumbnailGenerator"</property>
|
||||
<postConstruct name="addCondition">
|
||||
<arg>"mimetype"</arg>
|
||||
<arg>"text/html"</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
<!--
|
||||
<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.WebDriverGenerator">
|
||||
<property name="name">"htmlThumbnailGenerator"</property>
|
||||
<property name="generatorList">
|
||||
|
@ -31,7 +39,6 @@
|
|||
<arg>"text/html"</arg>
|
||||
</postConstruct>
|
||||
</component>
|
||||
<!--
|
||||
<component name="htmlThumbnailGenerator" class="org.codelibs.fess.thumbnail.impl.CommandGenerator">
|
||||
<property name="name">"htmlThumbnailGenerator"</property>
|
||||
<property name="commandList">
|
||||
|
|
|
@ -642,5 +642,81 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
|
|||
|
||||
value = transformer.getBaseUrl("https://hoge.com/", "//hoge.com/aaa/");
|
||||
assertEquals("https://hoge.com/aaa/", value.toExternalForm());
|
||||
|
||||
value = transformer.getBaseUrl("https://hoge.com/", "aaa/");
|
||||
assertEquals("https://hoge.com/aaa/", value.toExternalForm());
|
||||
}
|
||||
|
||||
public void test_getThumbnailUrl_no() throws Exception {
|
||||
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer();
|
||||
final ResponseData responseData = new ResponseData();
|
||||
responseData.setUrl("http://example.com/");
|
||||
|
||||
String data = "<html><body>foo</body></html>";
|
||||
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
|
||||
|
||||
data = "<img src=\"http://example/foo.jpg\" width=\"x\" height=\"x\">";
|
||||
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
|
||||
|
||||
data = "<img src=\"http://example/foo.jpg\" width=\"10\" height=\"100\">";
|
||||
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
|
||||
|
||||
data = "<img src=\"http://example/foo.jpg\" width=\"100\" height=\"10\">";
|
||||
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
|
||||
|
||||
data = "<img src=\"http://example/foo.jpg\" width=\"400\" height=\"100\">";
|
||||
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
|
||||
|
||||
data = "<img src=\"http://example/foo.jpg\" width=\"100\" height=\"400\">";
|
||||
assertNull(transformer.getThumbnailUrl(responseData, getDocument(data)));
|
||||
}
|
||||
|
||||
public void test_getThumbnailUrl() throws Exception {
|
||||
String data = "<meta property=\"og:image\" content=\"http://example/foo.jpg\" />";
|
||||
String expected = "http://example/foo.jpg";
|
||||
assertGetThumbnailUrl(data, expected);
|
||||
|
||||
data = "<meta property=\"og:image\" content=\"://example/foo.jpg\" />";
|
||||
expected = "http://example/foo.jpg";
|
||||
assertGetThumbnailUrl(data, expected);
|
||||
|
||||
data = "<meta property=\"og:image\" content=\"http://example/foo.jpg\" />";
|
||||
expected = "http://example/foo.jpg";
|
||||
assertGetThumbnailUrl(data, expected);
|
||||
|
||||
data = "<meta property=\"og:image\" content=\"/foo.jpg\" />";
|
||||
expected = "http://example.com/foo.jpg";
|
||||
assertGetThumbnailUrl(data, expected);
|
||||
|
||||
data = "<img src=\"http://example/foo.jpg\">";
|
||||
expected = "http://example/foo.jpg";
|
||||
assertGetThumbnailUrl(data, expected);
|
||||
|
||||
data = "<img src=\"http://example/foo.jpg\">" //
|
||||
+ "<img src=\"http://example/bar.jpg\">";
|
||||
expected = "http://example/foo.jpg";
|
||||
assertGetThumbnailUrl(data, expected);
|
||||
|
||||
data = "<img src=\"http://example/foo.jpg\">" //
|
||||
+ "<img src=\"http://example/bar.jpg\" width=\"100\" height=\"100\">";
|
||||
expected = "http://example/bar.jpg";
|
||||
assertGetThumbnailUrl(data, expected);
|
||||
|
||||
data = "<img src=\"http://example/foo.jpg\" width=\"100\" height=\"100\">";
|
||||
expected = "http://example/foo.jpg";
|
||||
assertGetThumbnailUrl(data, expected);
|
||||
}
|
||||
|
||||
private void assertGetThumbnailUrl(String data, String expected) throws Exception {
|
||||
final Document document = getDocument(data);
|
||||
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer();
|
||||
transformer.init();
|
||||
|
||||
final ResponseData responseData = new ResponseData();
|
||||
responseData.setUrl("http://example.com/");
|
||||
|
||||
assertEquals(expected, transformer.getThumbnailUrl(responseData, document));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Copyright 2012-2017 CodeLibs Project and the Others.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific language
|
||||
* governing permissions and limitations under the License.
|
||||
*/
|
||||
package org.codelibs.fess.thumbnail.impl;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import javax.imageio.stream.ImageInputStream;
|
||||
|
||||
import org.codelibs.fess.unit.UnitFessTestCase;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class HtmlTagBasedGeneratorTest extends UnitFessTestCase {
|
||||
private static final Logger logger = LoggerFactory.getLogger(HtmlTagBasedGeneratorTest.class);
|
||||
|
||||
public void test_saveImage() throws Exception {
|
||||
HtmlTagBasedGenerator generator = new HtmlTagBasedGenerator();
|
||||
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
|
||||
File outputFile = File.createTempFile("generator_", ".png");
|
||||
|
||||
String imagePath = "thumbnail/600x400.png";
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
|
||||
generator.saveImage(input, outputFile);
|
||||
}
|
||||
assertImageSize(outputFile, 160, 106);
|
||||
|
||||
imagePath = "thumbnail/600x400.gif";
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
|
||||
generator.saveImage(input, outputFile);
|
||||
}
|
||||
assertImageSize(outputFile, 160, 106);
|
||||
|
||||
imagePath = "thumbnail/600x400.jpg";
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
|
||||
generator.saveImage(input, outputFile);
|
||||
}
|
||||
assertImageSize(outputFile, 160, 106);
|
||||
|
||||
imagePath = "thumbnail/400x400.png";
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
|
||||
generator.saveImage(input, outputFile);
|
||||
}
|
||||
assertImageSize(outputFile, 160, 160);
|
||||
|
||||
imagePath = "thumbnail/400x400.gif";
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
|
||||
generator.saveImage(input, outputFile);
|
||||
}
|
||||
assertImageSize(outputFile, 160, 160);
|
||||
|
||||
imagePath = "thumbnail/400x400.jpg";
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
|
||||
generator.saveImage(input, outputFile);
|
||||
}
|
||||
assertImageSize(outputFile, 160, 160);
|
||||
|
||||
imagePath = "thumbnail/400x600.png";
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
|
||||
generator.saveImage(input, outputFile);
|
||||
}
|
||||
assertImageSize(outputFile, 160, 160);
|
||||
|
||||
imagePath = "thumbnail/400x600.gif";
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
|
||||
generator.saveImage(input, outputFile);
|
||||
}
|
||||
assertImageSize(outputFile, 160, 160);
|
||||
|
||||
imagePath = "thumbnail/400x600.jpg";
|
||||
try (ImageInputStream input = ImageIO.createImageInputStream(classLoader.getResourceAsStream(imagePath))) {
|
||||
generator.saveImage(input, outputFile);
|
||||
}
|
||||
assertImageSize(outputFile, 160, 160);
|
||||
|
||||
}
|
||||
|
||||
private void assertImageSize(File file, int width, int height) throws IOException {
|
||||
BufferedImage img = ImageIO.read(file);
|
||||
logger.debug("width: " + img.getWidth() + ", height: " + img.getHeight());
|
||||
assertEquals("Image Width", width, img.getWidth());
|
||||
assertEquals("Image Height", height, img.getHeight());
|
||||
}
|
||||
}
|
BIN
src/test/resources/thumbnail/400x400.gif
Normal file
After Width: | Height: | Size: 2.2 KiB |
BIN
src/test/resources/thumbnail/400x400.jpg
Normal file
After Width: | Height: | Size: 5.5 KiB |
BIN
src/test/resources/thumbnail/400x400.png
Normal file
After Width: | Height: | Size: 1.3 KiB |
BIN
src/test/resources/thumbnail/400x600.gif
Normal file
After Width: | Height: | Size: 3.4 KiB |
BIN
src/test/resources/thumbnail/400x600.jpg
Normal file
After Width: | Height: | Size: 6 KiB |
BIN
src/test/resources/thumbnail/400x600.png
Normal file
After Width: | Height: | Size: 1.7 KiB |
BIN
src/test/resources/thumbnail/600x400.gif
Normal file
After Width: | Height: | Size: 3.4 KiB |
BIN
src/test/resources/thumbnail/600x400.jpg
Normal file
After Width: | Height: | Size: 7.8 KiB |
BIN
src/test/resources/thumbnail/600x400.png
Normal file
After Width: | Height: | Size: 1.7 KiB |