fix #1193 add thumbnail.html.image.xpath and thumbnail.html.image.exclude.extensions
This commit is contained in:
parent
bc3727849e
commit
c6b6f0bbea
4 changed files with 86 additions and 24 deletions
|
@ -740,44 +740,34 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
}
|
||||
}
|
||||
|
||||
final NodeList imgNodeList = getXPathAPI().selectNodeList(document, "//IMG");
|
||||
Node firstSrcNode = null;
|
||||
final NodeList imgNodeList = getXPathAPI().selectNodeList(document, fessConfig.getThumbnailHtmlImageXpath());
|
||||
String firstThumbnailUrl = null;
|
||||
for (int i = 0; i < imgNodeList.getLength(); i++) {
|
||||
final Node imgNode = imgNodeList.item(i);
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("img tag: " + imgNode);
|
||||
}
|
||||
final NamedNodeMap attributes = imgNode.getAttributes();
|
||||
final String thumbnailUrl = getThumbnailSrc(responseData.getUrl(), attributes);
|
||||
final Integer height = getAttributeAsInteger(attributes, "height");
|
||||
final Integer width = getAttributeAsInteger(attributes, "width");
|
||||
if (height != null && width != null) {
|
||||
if (!fessConfig.isThumbnailHtmlImageUrl(thumbnailUrl)) {
|
||||
continue;
|
||||
} else if (height != null && width != null) {
|
||||
try {
|
||||
if (fessConfig.validateThumbnailSize(width, height)) {
|
||||
final Node srcNode = attributes.getNamedItem("src");
|
||||
if (srcNode != null) {
|
||||
final URL thumbnailUrl = getURL(responseData.getUrl(), srcNode.getTextContent());
|
||||
if (thumbnailUrl != null) {
|
||||
return thumbnailUrl.toExternalForm();
|
||||
}
|
||||
}
|
||||
return thumbnailUrl;
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
logger.debug("Failed to parse " + imgNode + " at " + responseData.getUrl(), e);
|
||||
}
|
||||
} else if (firstSrcNode == null) {
|
||||
final Node srcNode = attributes.getNamedItem("src");
|
||||
if (srcNode != null) {
|
||||
firstSrcNode = srcNode;
|
||||
}
|
||||
} else if (firstThumbnailUrl == null) {
|
||||
firstThumbnailUrl = thumbnailUrl;
|
||||
}
|
||||
}
|
||||
|
||||
if (firstSrcNode != null) {
|
||||
try {
|
||||
final URL thumbnailUrl = getURL(responseData.getUrl(), firstSrcNode.getTextContent());
|
||||
if (thumbnailUrl != null) {
|
||||
return thumbnailUrl.toExternalForm();
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
logger.debug("Failed to parse " + firstSrcNode + " at " + responseData.getUrl(), e);
|
||||
}
|
||||
if (firstThumbnailUrl != null) {
|
||||
return firstThumbnailUrl;
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Failed to retrieve thumbnail url from " + responseData.getUrl(), e);
|
||||
|
@ -785,6 +775,23 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
return null;
|
||||
}
|
||||
|
||||
protected String getThumbnailSrc(final String url, final NamedNodeMap attributes) {
|
||||
final Node srcNode = attributes.getNamedItem("src");
|
||||
if (srcNode != null) {
|
||||
try {
|
||||
final URL thumbnailUrl = getURL(url, srcNode.getTextContent());
|
||||
if (thumbnailUrl != null) {
|
||||
return thumbnailUrl.toExternalForm();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Failed to parse thumbnail url for " + url + " : " + attributes, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
protected Integer getAttributeAsInteger(final NamedNodeMap attributes, final String name) {
|
||||
final Node namedItem = attributes.getNamedItem(name);
|
||||
if (namedItem == null) {
|
||||
|
|
|
@ -812,6 +812,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. png */
|
||||
String THUMBNAIL_HTML_IMAGE_FORMAT = "thumbnail.html.image.format";
|
||||
|
||||
/** The key of the configuration. e.g. //IMG */
|
||||
String THUMBNAIL_HTML_IMAGE_XPATH = "thumbnail.html.image.xpath";
|
||||
|
||||
/** The key of the configuration. e.g. svg,html,css,js */
|
||||
String THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS = "thumbnail.html.image.exclude.extensions";
|
||||
|
||||
/** The key of the configuration. e.g. 0 */
|
||||
String THUMBNAIL_GENERATOR_INTERVAL = "thumbnail.generator.interval";
|
||||
|
||||
|
@ -3971,6 +3977,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
String getThumbnailHtmlImageFormat();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.xpath'. <br>
|
||||
* The value is, e.g. //IMG <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageXpath();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.html.image.exclude.extensions'. <br>
|
||||
* The value is, e.g. svg,html,css,js <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getThumbnailHtmlImageExcludeExtensions();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'thumbnail.generator.interval'. <br>
|
||||
* The value is, e.g. 0 <br>
|
||||
|
@ -6661,6 +6681,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_FORMAT);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageXpath() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_XPATH);
|
||||
}
|
||||
|
||||
public String getThumbnailHtmlImageExcludeExtensions() {
|
||||
return get(FessConfig.THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS);
|
||||
}
|
||||
|
||||
public String getThumbnailGeneratorInterval() {
|
||||
return get(FessConfig.THUMBNAIL_GENERATOR_INTERVAL);
|
||||
}
|
||||
|
@ -7617,6 +7645,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_WIDTH, "100");
|
||||
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_THUMBNAIL_HEIGHT, "100");
|
||||
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_FORMAT, "png");
|
||||
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_XPATH, "//IMG");
|
||||
defaultMap.put(FessConfig.THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS, "svg,html,css,js");
|
||||
defaultMap.put(FessConfig.THUMBNAIL_GENERATOR_INTERVAL, "0");
|
||||
defaultMap.put(FessConfig.THUMBNAIL_GENERATOR_TARGETS, "all");
|
||||
defaultMap.put(FessConfig.THUMBNAIL_CRAWLER_ENABLED, "true");
|
||||
|
|
|
@ -69,6 +69,8 @@ import org.lastaflute.web.validation.theme.typed.LongTypeValidator;
|
|||
|
||||
public interface FessProp {
|
||||
|
||||
public static final String THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS = "ThumbnailHtmlImageExcludeExtensions";
|
||||
|
||||
public static final String VIRTUAL_HOST_VALUE = "VirtualHostValue";
|
||||
|
||||
public static final String QUERY_DEFAULT_LANGUAGES = "queryDefaultLanguages";
|
||||
|
@ -1736,4 +1738,25 @@ public interface FessProp {
|
|||
}
|
||||
return proxy;
|
||||
}
|
||||
|
||||
String getThumbnailHtmlImageExcludeExtensions();
|
||||
|
||||
public default boolean isThumbnailHtmlImageUrl(final String url) {
|
||||
if (StringUtil.isBlank(url)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
String[] excludeExtensions = (String[]) propMap.get(THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS);
|
||||
if (excludeExtensions == null) {
|
||||
excludeExtensions =
|
||||
split(getThumbnailHtmlImageExcludeExtensions(), ",").get(
|
||||
stream -> stream.map(s -> s.toLowerCase(Locale.ROOT).trim()).filter(StringUtil::isNotBlank)
|
||||
.toArray(n -> new String[n]));
|
||||
propMap.put(THUMBNAIL_HTML_IMAGE_EXCLUDE_EXTENSIONS, excludeExtensions);
|
||||
}
|
||||
|
||||
final String u = url.toLowerCase(Locale.ROOT);
|
||||
return !stream(excludeExtensions).get(stream -> stream.anyMatch(s -> u.endsWith(s)));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -414,6 +414,8 @@ thumbnail.html.image.window.height=800
|
|||
thumbnail.html.image.thumbnail.width=100
|
||||
thumbnail.html.image.thumbnail.height=100
|
||||
thumbnail.html.image.format=png
|
||||
thumbnail.html.image.xpath=//IMG
|
||||
thumbnail.html.image.exclude.extensions=svg,html,css,js
|
||||
thumbnail.generator.interval=0
|
||||
thumbnail.generator.targets=all
|
||||
thumbnail.crawler.enabled=true
|
||||
|
|
Loading…
Add table
Reference in a new issue