Add crawler.ignore.robots.txt property #568

This commit is contained in:
yfujita 2016-07-15 12:01:05 +09:00
parent cd1a1bc936
commit 6532321e0c
3 changed files with 28 additions and 0 deletions

2
.gitignore vendored
View file

@ -21,3 +21,5 @@
.DS_Store
/plugins/
/tomcat.8080/
dbflute_fess/output/doc/lastadoc-fess.html
dbflute_fess/schema/project-lastadoc-fess.json

View file

@ -149,6 +149,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. file,smb,ftp */
String CRAWLER_FILE_PROTOCOLS = "crawler.file.protocols";
/** The key of the configuration. e.g. false */
String CRAWLER_IGNORE_ROBOTS_TXT = "crawler.ignore.robots.txt";
/** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.* */
String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes";
@ -1309,6 +1312,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
String getCrawlerFileProtocols();
/**
* Get the value for the key 'crawler.ignore.robots.txt'. <br>
* The value is, e.g. false <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerIgnoreRobotsTxt();
/**
* Is the property for the key 'crawler.ignore.robots.txt' true? <br>
* The value is, e.g. false <br>
* @return The determination, true or false. (if not found, exception but basically no way)
*/
boolean isCrawlerIgnoreRobotsTxt();
/**
* Get the value for the key 'crawler.metadata.content.excludes'. <br>
* The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.* <br>
@ -4077,6 +4094,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return get(FessConfig.CRAWLER_FILE_PROTOCOLS);
}
public String getCrawlerIgnoreRobotsTxt() {
return get(FessConfig.CRAWLER_IGNORE_ROBOTS_TXT);
}
public boolean isCrawlerIgnoreRobotsTxt() {
return is(FessConfig.CRAWLER_IGNORE_ROBOTS_TXT);
}
public String getCrawlerMetadataContentExcludes() {
return get(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES);
}

View file

@ -88,6 +88,7 @@ crawler.document.duplicate.term.removed=false
crawler.crawling.data.encoding=UTF-8
crawler.web.protocols=http,https
crawler.file.protocols=file,smb,ftp
crawler.ignore.robots.txt=false
crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*
crawler.metadata.name.mapping=\
title=title:string\n\