fix #1193 add thumbnail.html.image.xpath and thumbnail.html.image.exclude.extensions

This commit is contained in:
Shinsuke Sugaya 2017-07-27 11:37:08 +09:00
parent bc3727849e
commit c6b6f0bbea
4 changed files with 86 additions and 24 deletions

View file

@ -740,44 +740,34 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
}
}
final NodeList imgNodeList = getXPathAPI().selectNodeList(document, "//IMG");
Node firstSrcNode = null;
final NodeList imgNodeList = getXPathAPI().selectNodeList(document, fessConfig.getThumbnailHtmlImageXpath());
String firstThumbnailUrl = null;
for (int i = 0; i < imgNodeList.getLength(); i++) {
final Node imgNode = imgNodeList.item(i);
if (logger.isDebugEnabled()) {
logger.debug("img tag: " + imgNode);
}
final NamedNodeMap attributes = imgNode.getAttributes();
final String thumbnailUrl = getThumbnailSrc(responseData.getUrl(), attributes);
final Integer height = getAttributeAsInteger(attributes, "height");
final Integer width = getAttributeAsInteger(attributes, "width");
if (height != null && width != null) {
if (!fessConfig.isThumbnailHtmlImageUrl(thumbnailUrl)) {
continue;
} else if (height != null && width != null) {
try {
if (fessConfig.validateThumbnailSize(width, height)) {
final Node srcNode = attributes.getNamedItem("src");
if (srcNode != null) {
final URL thumbnailUrl = getURL(responseData.getUrl(), srcNode.getTextContent());
if (thumbnailUrl != null) {
return thumbnailUrl.toExternalForm();
}
}
return thumbnailUrl;
}
} catch (final Exception e) {
logger.debug("Failed to parse " + imgNode + " at " + responseData.getUrl(), e);
}
} else if (firstSrcNode == null) {
final Node srcNode = attributes.getNamedItem("src");
if (srcNode != null) {
firstSrcNode = srcNode;
}
} else if (firstThumbnailUrl == null) {
firstThumbnailUrl = thumbnailUrl;
}
}
if (firstSrcNode != null) {
try {
final URL thumbnailUrl = getURL(responseData.getUrl(), firstSrcNode.getTextContent());
if (thumbnailUrl != null) {
return thumbnailUrl.toExternalForm();
}
} catch (final Exception e) {
logger.debug("Failed to parse " + firstSrcNode + " at " + responseData.getUrl(), e);
}
if (firstThumbnailUrl != null) {
return firstThumbnailUrl;
}
} catch (final Exception e) {
logger.warn("Failed to retrieve thumbnail url from " + responseData.getUrl(), e);
@ -785,6 +775,23 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
return null;
}
protected String getThumbnailSrc(final String url, final NamedNodeMap attributes) {
final Node srcNode = attributes.getNamedItem("src");
if (srcNode != null) {
try {
final URL thumbnailUrl = getURL(url, srcNode.getTextContent());
if (thumbnailUrl != null) {
return thumbnailUrl.toExternalForm();
}
} catch (Exception e) {
if (logger.isDebugEnabled()) {
logger.debug("Failed to parse thumbnail url for " + url + " : " + attributes, e);
}
}
}
return null;
}
protected Integer getAttributeAsInteger(final NamedNodeMap attributes, final String name) {
final Node namedItem = attributes.getNamedItem(name);
if (namedItem == null) {

View file

@ -812,6 +812,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. png */
String THUMBNAIL_HTML_IMAGE_FORMAT = "thumbnail.html.image.format";
/** The key of the configuration. e.g. //IMG */
String THUMBNAIL_HTML_IMAGE_XPATH = "thumbnail.html.image.xpath";
/** The key of the configuration. e.g. svg,html,css,js */
String THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS = "thumbnail.html.image.exclude.extensions";
/** The key of the configuration. e.g. 0 */
String THUMBNAIL_GENERATOR_INTERVAL = "thumbnail.generator.interval";
@ -3971,6 +3977,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
String getThumbnailHtmlImageFormat();
/**
* Get the value for the key 'thumbnail.html.image.xpath'. <br>
* The value is, e.g. //IMG <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageXpath();
/**
* Get the value for the key 'thumbnail.html.image.exclude.extensions'. <br>
* The value is, e.g. svg,html,css,js <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getThumbnailHtmlImageExcludeExtensions();
/**
* Get the value for the key 'thumbnail.generator.interval'. <br>
* The value is, e.g. 0 <br>
@ -6661,6 +6681,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return get(FessConfig.THUMBNAIL_HTML_IMAGE_FORMAT);
}
public String getThumbnailHtmlImageXpath() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_XPATH);
}
public String getThumbnailHtmlImageExcludeExtensions() {
return get(FessConfig.THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS);
}
public String getThumbnailGeneratorInterval() {
return get(FessConfig.THUMBNAIL_GENERATOR_INTERVAL);
}
@ -7617,6 +7645,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH, "100");
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT, "100");
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_FORMAT, "png");
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_XPATH, "//IMG");
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS, "svg,html,css,js");
defaultMap.put(FessConfig.THUMBNAIL_GENERATOR_INTERVAL, "0");
defaultMap.put(FessConfig.THUMBNAIL_GENERATOR_TARGETS, "all");
defaultMap.put(FessConfig.THUMBNAIL_CRAWLER_ENABLED, "true");

View file

@ -69,6 +69,8 @@ import org.lastaflute.web.validation.theme.typed.LongTypeValidator;
public interface FessProp {
public static final String THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS = "ThumbnailHtmlImageExcludeExtensions";
public static final String VIRTUAL_HOST_VALUE = "VirtualHostValue";
public static final String QUERY_DEFAULT_LANGUAGES = "queryDefaultLanguages";
@ -1736,4 +1738,25 @@ public interface FessProp {
}
return proxy;
}
String getThumbnailHtmlImageExcludeExtensions();
public default boolean isThumbnailHtmlImageUrl(final String url) {
if (StringUtil.isBlank(url)) {
return false;
}
String[] excludeExtensions = (String[]) propMap.get(THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS);
if (excludeExtensions == null) {
excludeExtensions =
split(getThumbnailHtmlImageExcludeExtensions(), ",").get(
stream -> stream.map(s -> s.toLowerCase(Locale.ROOT).trim()).filter(StringUtil::isNotBlank)
.toArray(n -> new String[n]));
propMap.put(THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS, excludeExtensions);
}
final String u = url.toLowerCase(Locale.ROOT);
return !stream(excludeExtensions).get(stream -> stream.anyMatch(s -> u.endsWith(s)));
}
}

View file

@ -414,6 +414,8 @@ thumbnail.html.image.window.height=800
thumbnail.html.image.thumbnail.width=100
thumbnail.html.image.thumbnail.height=100
thumbnail.html.image.format=png
thumbnail.html.image.xpath=//IMG
thumbnail.html.image.exclude.extensions=svg,html,css,js
thumbnail.generator.interval=0
thumbnail.generator.targets=all
thumbnail.crawler.enabled=true