Add crawler.ignore.robots.txt property #568
This commit is contained in:
parent
cd1a1bc936
commit
6532321e0c
3 changed files with 28 additions and 0 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -21,3 +21,5 @@
|
|||
.DS_Store
|
||||
/plugins/
|
||||
/tomcat.8080/
|
||||
dbflute_fess/output/doc/lastadoc-fess.html
|
||||
dbflute_fess/schema/project-lastadoc-fess.json
|
||||
|
|
|
@ -149,6 +149,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. file,smb,ftp */
|
||||
String CRAWLER_FILE_PROTOCOLS = "crawler.file.protocols";
|
||||
|
||||
/** The key of the configuration. e.g. false */
|
||||
String CRAWLER_IGNORE_ROBOTS_TXT = "crawler.ignore.robots.txt";
|
||||
|
||||
/** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.* */
|
||||
String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes";
|
||||
|
||||
|
@ -1309,6 +1312,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
String getCrawlerFileProtocols();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.ignore.robots.txt'. <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerIgnoreRobotsTxt();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'crawler.ignore.robots.txt' true? <br>
|
||||
* The value is, e.g. false <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isCrawlerIgnoreRobotsTxt();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.metadata.content.excludes'. <br>
|
||||
* The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.* <br>
|
||||
|
@ -4077,6 +4094,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return get(FessConfig.CRAWLER_FILE_PROTOCOLS);
|
||||
}
|
||||
|
||||
public String getCrawlerIgnoreRobotsTxt() {
|
||||
return get(FessConfig.CRAWLER_IGNORE_ROBOTS_TXT);
|
||||
}
|
||||
|
||||
public boolean isCrawlerIgnoreRobotsTxt() {
|
||||
return is(FessConfig.CRAWLER_IGNORE_ROBOTS_TXT);
|
||||
}
|
||||
|
||||
public String getCrawlerMetadataContentExcludes() {
|
||||
return get(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES);
|
||||
}
|
||||
|
|
|
@ -88,6 +88,7 @@ crawler.document.duplicate.term.removed=false
|
|||
crawler.crawling.data.encoding=UTF-8
|
||||
crawler.web.protocols=http,https
|
||||
crawler.file.protocols=file,smb,ftp
|
||||
crawler.ignore.robots.txt=false
|
||||
crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*
|
||||
crawler.metadata.name.mapping=\
|
||||
title=title:string\n\
|
||||
|
|
Loading…
Add table
Reference in a new issue