diff --git a/c/cr.c b/c/cr.c index 9403ec2..5d0f38e 100755 --- a/c/cr.c +++ b/c/cr.c @@ -590,8 +590,8 @@ int main(int argc, char **argv) printf("\nURL is too long"); } - if(canCrawl(finalURLsize,finalURL)==0){ - printf("\nfinalURL failed crawl rules."); + if(task != 0 && task[0]=='2' && canCrawl(finalURLsize,finalURL)==0){ + printf("\nEffective URL failed crawl rules."); skipurl=1; } diff --git a/c/htmlparse.h b/c/htmlparse.h index 697109d..361c17f 100755 --- a/c/htmlparse.h +++ b/c/htmlparse.h @@ -569,7 +569,7 @@ int canCrawl(int urlSize, char *urltocheck){ } //restrict file extensions to these - if(extfound==1 && (locateInURL(urltocheck,".html",".HTML",5,urlSize)==1 || locateInURL(urltocheck,".htm",".HTM",4,urlSize)==1 || locateInURL(urltocheck,".txt",".TXT",4,urlSize)==1 || locateInURL(urltocheck,".php",".PHP",4,urlSize)==1 || locateInURL(urltocheck,".asp",".ASP",4,urlSize)==1 || locateInURL(urltocheck,".xhtml",".XHTML",6,urlSize)==1)){ + if(extfound==1 && (locateInURL(urltocheck,".html",".HTML",5,urlSize)==1 || locateInURL(urltocheck,".htm",".HTM",4,urlSize)==1 || locateInURL(urltocheck,".txt",".TXT",4,urlSize)==1 || locateInURL(urltocheck,".php",".PHP",4,urlSize)==1 || locateInURL(urltocheck,".asp",".ASP",4,urlSize)==1 || locateInURL(urltocheck,".xhtml",".XHTML",6,urlSize)==1 || locateInURL(urltocheck,".shtml",".SHTML",6,urlSize)==1)){ return 1; } if(extfound==0 )