Add files via upload
This commit is contained in:
parent
a72ff77e9d
commit
552bcce204
2 changed files with 47 additions and 6 deletions
12
c/cr.c
12
c/cr.c
|
@ -1257,7 +1257,7 @@ int main(int argc, char **argv)
|
|||
strcat(windexRandUpdate,finalURLnoprefix);
|
||||
strcat(windexRandUpdate,"', title = '");
|
||||
if(titlesize > 0 && emptytitle == 0){
|
||||
strcat(windexRandUpdate,title);
|
||||
strcat(windexRandUpdate,title_filtered);
|
||||
}
|
||||
else{
|
||||
if(finalURLsize < 111){
|
||||
|
@ -1268,9 +1268,9 @@ int main(int argc, char **argv)
|
|||
}
|
||||
}
|
||||
strcat(windexRandUpdate,"', tags = NULL, description = '");
|
||||
strcat(windexRandUpdate,description);
|
||||
strcat(windexRandUpdate,description_filtered);
|
||||
strcat(windexRandUpdate,"', body = '");
|
||||
strcat(windexRandUpdate,body);
|
||||
strcat(windexRandUpdate,body_filtered);
|
||||
strcat(windexRandUpdate,"', worksafe = ");
|
||||
strcat(windexRandUpdate,worksafe);
|
||||
strcat(windexRandUpdate,", approver = '");
|
||||
|
@ -1404,7 +1404,7 @@ int main(int argc, char **argv)
|
|||
strcat(windexupdate,finalURLnoprefix);
|
||||
strcat(windexupdate,"', title = '");
|
||||
if(titlesize > 0 && emptytitle == 0){
|
||||
strcat(windexupdate,title);
|
||||
strcat(windexupdate,title_filtered);
|
||||
}
|
||||
else{
|
||||
if(finalURLsize < 111){
|
||||
|
@ -1419,9 +1419,9 @@ int main(int argc, char **argv)
|
|||
else{
|
||||
strcat(windexupdate,"', tags = NULL, description = '");
|
||||
}
|
||||
strcat(windexupdate,description);
|
||||
strcat(windexupdate,description_filtered);
|
||||
strcat(windexupdate,"', body = '");
|
||||
strcat(windexupdate,body);
|
||||
strcat(windexupdate,body_filtered);
|
||||
strcat(windexupdate,"', worksafe = ");
|
||||
strcat(windexupdate,worksafe);
|
||||
//strcat(windexupdate,", approver = '");
|
||||
|
|
|
@ -24,6 +24,7 @@ static char filename[] = "page.out";
|
|||
|
||||
char window[window_len],windowWithSpaces[window_len],charset[charset_len+1],mysqlcharset[mysqlcharset_len+1],title[title_len+1],keywords[keywords_len+1],description[description_len+1],robots[robots_len+1],body[body_len+1];
|
||||
char urlList[urlList_len+1],strURL[strURL_len+1],urlListShuffled[urlList_len+1],urlListHoldShuffled[urlList_len+1];
|
||||
char title_filtered[title_len+1], body_filtered[body_len+1], description_filtered[description_len+1];
|
||||
int titlefound=0,charsetfound=0,descriptionfound=0,keywordsfound=0,robotsfound=0,nofollow=0,noindex=0,scriptfound=0,stylefound=0,urlFound=0,urlTagFound=0,numURL=0,emptytitle=1,spaces=0,seeded=0,num_stylesheets=0,num_scripts=0,getURLs=1;
|
||||
long charsetsize=0,titlesize=0,keywordssize=0,descriptionsize=0,robotssize=0,bodysize=0;
|
||||
|
||||
|
@ -34,6 +35,7 @@ int canCrawl(int urlSize, char *urltocheck);
|
|||
void shuffleURLs(int iterations, long urlListSize);
|
||||
void sqlsafe();
|
||||
void charset2mysql();
|
||||
void filtervars();
|
||||
|
||||
FILE *f;
|
||||
char *fileStr;
|
||||
|
@ -62,6 +64,9 @@ void htmlparse(){
|
|||
memset(strURL,0,strURL_len+1);
|
||||
memset(urlListShuffled,0,urlList_len+1);
|
||||
memset(urlListHoldShuffled,0,urlList_len+1);
|
||||
memset(title_filtered,0,title_len+1);
|
||||
memset(body_filtered,0,body_len+1);
|
||||
memset(description_filtered,0,description_len+1);
|
||||
printf("Parsing HTML... ");
|
||||
|
||||
//open html file and load into memory
|
||||
|
@ -312,6 +317,9 @@ void htmlparse(){
|
|||
//Convert charset to mysql equivalent
|
||||
charset2mysql();
|
||||
|
||||
//Filter additional characters *if* required
|
||||
filtervars();
|
||||
|
||||
//print body to file
|
||||
/* bodyfile = fopen("body.txt","wb");
|
||||
fputs(body,bodyfile);
|
||||
|
@ -576,3 +584,36 @@ int canCrawl(int urlSize, char *urltocheck){
|
|||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void filtervars(){
|
||||
//Creates a copy of title, description, body variables with single-qutoes filtered out
|
||||
//will be used for the shard tables, but not on the primary 'windex' table
|
||||
//allows a more restrictive query to be used. Is agnostic to searches containing single-quotes as a compromise
|
||||
|
||||
//filter title
|
||||
int j=0;
|
||||
for(int i=0;i<titlesize;i++){
|
||||
if(title[i]!=39){
|
||||
title_filtered[j]=title[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
//filter description
|
||||
j=0;
|
||||
for(int i=0;i<descriptionsize;i++){
|
||||
if(description[i]!=39){
|
||||
description_filtered[j]=description[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
//filter body
|
||||
j=0;
|
||||
for(int i=0;i<bodysize;i++){
|
||||
if(body[i]!=39){
|
||||
body_filtered[j]=body[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue