Add files via upload
This commit is contained in:
parent
8eb1398e85
commit
9caba2ece6
2 changed files with 15 additions and 11 deletions
1
c/cr.c
1
c/cr.c
|
@ -1389,3 +1389,4 @@ int main(int argc, char **argv)
|
|||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
|
|
@ -84,16 +84,20 @@ void htmlparse(){
|
|||
|
||||
for(int i=0;i<fsize;i++){
|
||||
c = fileStr[i];
|
||||
int skipchar = 0;
|
||||
if(c== 10 || c == 13 || c == 14 || c == 15 || c == 127 || c == 0 || c == 9){
|
||||
skipchar = 1;
|
||||
}
|
||||
|
||||
//use a rolling window of 100 bytes to detect elements, ignore lf/cr/so/si/space/null/tab
|
||||
if(c!= 10 && c != 13 && c != 14 && c != 15 && c != 127 && c != 32 && c != 0 && c != 9){
|
||||
if(skipchar == 0 && c != 32){
|
||||
for(int j=0;j<window_len-1;j++){
|
||||
window[j] = window[j+1];
|
||||
}
|
||||
window[window_len-1] = c;
|
||||
}
|
||||
//use a rolling window of 100 bytes to detect elements, but permit space, ignore lf/cr/null/tab
|
||||
if(c!= 10 && c != 13 && c != 14 && c != 15 && c != 127 && c != 0 && c != 9){
|
||||
if(skipchar == 0){
|
||||
for(int j=0;j<window_len-1;j++){
|
||||
windowWithSpaces[j] = windowWithSpaces[j+1];
|
||||
}
|
||||
|
@ -102,14 +106,14 @@ void htmlparse(){
|
|||
|
||||
//Get Title
|
||||
if(titlefound == 2){
|
||||
if(titlesize < (title_len-2) && c!= 10 && c != 13 && c != 14 && c != 15 && c != 127 && c != 0 && c != 9){
|
||||
if(titlesize < (title_len-2) && skipchar == 0){
|
||||
title[titlesize]=c;
|
||||
titlesize++;
|
||||
if(c == 39){//check for single quotes and double them up for sql safety
|
||||
title[titlesize]=c;
|
||||
titlesize++;
|
||||
}
|
||||
if(c != 32 && c != 12 && c != 13 && c != 14 && c != 15 && c != 127 && c != 10 && c != 9){//some titles are just a bunch of spaces or garbage, need to check for that
|
||||
if(c != 32 && skipchar == 0){//some titles are just a bunch of spaces or garbage, need to check for that
|
||||
emptytitle = 0;
|
||||
}
|
||||
}
|
||||
|
@ -133,7 +137,7 @@ void htmlparse(){
|
|||
charsetfound = 2;
|
||||
//printf("\n%s",charset);
|
||||
}
|
||||
if(charsetfound == 1 && charsetsize < charset_len && c != '"' && c != '\''){
|
||||
if(charsetfound == 1 && charsetsize < charset_len && c != '"' && c != '\'' && skipchar == 0){
|
||||
charset[charsetsize]=c;
|
||||
charsetsize++;
|
||||
}
|
||||
|
@ -148,7 +152,7 @@ void htmlparse(){
|
|||
descriptionfound = 2;
|
||||
//printf("\n%s",description);
|
||||
}
|
||||
if(descriptionfound == 1 && descriptionsize < (description_len-2) && c != '"'){
|
||||
if(descriptionfound == 1 && descriptionsize < (description_len-2) && c != '"' && skipchar == 0){
|
||||
description[descriptionsize]=c;
|
||||
descriptionsize++;
|
||||
if(c == 39){//check for single quotes and double them up for sql safety
|
||||
|
@ -167,7 +171,7 @@ void htmlparse(){
|
|||
keywordsfound = 2;
|
||||
//printf("\n%s",keywords);
|
||||
}
|
||||
if(keywordsfound == 1 && keywordssize < (keywords_len-2) && c != '"'){
|
||||
if(keywordsfound == 1 && keywordssize < (keywords_len-2) && c != '"' && skipchar == 0){
|
||||
keywords[keywordssize]=c;
|
||||
keywordssize++;
|
||||
if(c == 39){//check for single quotes and double them up for sql safety
|
||||
|
@ -190,15 +194,14 @@ void htmlparse(){
|
|||
if(locateInWindow(window,"noindex","NOINDEX",7)==1 || locateInWindow(window,"none","NONE",4)==1)
|
||||
noindex=nofollow=1;
|
||||
}
|
||||
if(robotsfound == 1 && robotssize < robots_len && c != '"' && c != '\''){
|
||||
if(robotsfound == 1 && robotssize < robots_len && c != '"' && c != '\'' && skipchar == 0){
|
||||
robots[robotssize]=c;
|
||||
robotssize++;
|
||||
}
|
||||
}
|
||||
if(robotsfound == 0 && locateInWindow(window,"robots\"content=","ROBOTS\"CONTENT=",15)==1){
|
||||
robotsfound = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(titlefound != 2){
|
||||
//Ignore between scripts, styles, and remove all tags, repeated spaces, tabs, cr, lf, null, add a space at end of every tag
|
||||
|
@ -240,7 +243,7 @@ void htmlparse(){
|
|||
|
||||
//Get Body
|
||||
//exclude remaining tags, comments, scripts, styles, cr, lf, null, tab, add a space after a '>' but only allow one
|
||||
if(intag == 0 && incomment == 0 && inscript == 0 && instyle == 0 && inlink == 0 && c!= 13 && c != 14 && c != 15 && c != 127 && c != 10 && c != 0 && c != 9 && bodysize < (body_len-2)){
|
||||
if(intag == 0 && incomment == 0 && inscript == 0 && instyle == 0 && inlink == 0 && skipchar == 0 && bodysize < (body_len-2)){
|
||||
if(putspace == 1){
|
||||
if(spacecount == 0){
|
||||
body[bodysize]=32;
|
||||
|
|
Loading…
Add table
Reference in a new issue