|
@@ -296,7 +296,7 @@ int main(int argc, char **argv)
|
|
memset(checkurl,0,checkurlsize);
|
|
memset(checkurl,0,checkurlsize);
|
|
if(task == 0 || task[0] == '2'){//index request did not come from refresh scheduler, or is an autocrawl url
|
|
if(task == 0 || task[0] == '2'){//index request did not come from refresh scheduler, or is an autocrawl url
|
|
//strcpy(checkurl,"SELECT id,updatable,title,enable,fault,url FROM windex WHERE url = 'http://"); //replace this with a simple check for url_noprefix column match
|
|
//strcpy(checkurl,"SELECT id,updatable,title,enable,fault,url FROM windex WHERE url = 'http://"); //replace this with a simple check for url_noprefix column match
|
|
- strcpy(checkurl,"SELECT id,updatable,title,enable,fault,url,shard FROM windex WHERE url_noprefix = '");
|
|
|
|
|
|
+ strcpy(checkurl,"SELECT id,updatable,title,enable,fault,url,url_noprefix,shard FROM windex WHERE url_noprefix = '");
|
|
if(slashfound==0)
|
|
if(slashfound==0)
|
|
{
|
|
{
|
|
strcat(checkurl,urlnoprefix);
|
|
strcat(checkurl,urlnoprefix);
|
|
@@ -346,6 +346,7 @@ int main(int argc, char **argv)
|
|
char *dbtitle;
|
|
char *dbtitle;
|
|
char *fault;
|
|
char *fault;
|
|
char *dburl;
|
|
char *dburl;
|
|
|
|
+ char *dburl_noprefix;
|
|
char *shard;
|
|
char *shard;
|
|
|
|
|
|
//Catalog the previous crawl attempts (to see if they are all for the same page - which would be a bad sign)
|
|
//Catalog the previous crawl attempts (to see if they are all for the same page - which would be a bad sign)
|
|
@@ -368,7 +369,8 @@ int main(int argc, char **argv)
|
|
enableOldDBval = row[3];
|
|
enableOldDBval = row[3];
|
|
fault = row[4];
|
|
fault = row[4];
|
|
dburl=row[5];
|
|
dburl=row[5];
|
|
- shard=row[6];
|
|
|
|
|
|
+ dburl_noprefix=row[6];
|
|
|
|
+ shard=row[7];
|
|
if(task != 0 && task[0]=='2')
|
|
if(task != 0 && task[0]=='2')
|
|
alreadydone=1;
|
|
alreadydone=1;
|
|
}
|
|
}
|
|
@@ -616,12 +618,12 @@ int main(int argc, char **argv)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- prefixsize = httpswww+httpwww+https+http;
|
|
|
|
|
|
+ int finalURL_prefixsize = httpswww+httpwww+https+http;
|
|
urlcount=urlnoprefixcount=0;
|
|
urlcount=urlnoprefixcount=0;
|
|
|
|
|
|
//store the url without prefix to urlnoprefix
|
|
//store the url without prefix to urlnoprefix
|
|
while(finalURL[urlcount] != 0){
|
|
while(finalURL[urlcount] != 0){
|
|
- if(urlcount>prefixsize-1)
|
|
|
|
|
|
+ if(urlcount>finalURL_prefixsize-1)
|
|
{
|
|
{
|
|
finalURLnoprefix[urlnoprefixcount]=finalURL[urlcount];
|
|
finalURLnoprefix[urlnoprefixcount]=finalURL[urlcount];
|
|
urlnoprefixcount++;
|
|
urlnoprefixcount++;
|
|
@@ -635,7 +637,7 @@ int main(int argc, char **argv)
|
|
mysql_free_result(resulturlcheck);
|
|
mysql_free_result(resulturlcheck);
|
|
char doublecheckurl[finalURLsize+100];
|
|
char doublecheckurl[finalURLsize+100];
|
|
memset(doublecheckurl,0,finalURLsize+100);
|
|
memset(doublecheckurl,0,finalURLsize+100);
|
|
- strcpy(doublecheckurl,"SELECT id,updatable,title,enable,fault,url,shard FROM windex WHERE url = '");
|
|
|
|
|
|
+ strcpy(doublecheckurl,"SELECT id,updatable,title,enable,fault,url,url_noprefix,shard FROM windex WHERE url = '");
|
|
strcat(doublecheckurl,finalURL);
|
|
strcat(doublecheckurl,finalURL);
|
|
strcat(doublecheckurl,"';");
|
|
strcat(doublecheckurl,"';");
|
|
if (mysql_query(con, doublecheckurl))
|
|
if (mysql_query(con, doublecheckurl))
|
|
@@ -659,8 +661,9 @@ int main(int argc, char **argv)
|
|
enableOldDBval = row[3];
|
|
enableOldDBval = row[3];
|
|
fault = row[4];
|
|
fault = row[4];
|
|
dburl=row[5];
|
|
dburl=row[5];
|
|
- shard=row[6];
|
|
|
|
- if(task != 0 && task[0]=='2')
|
|
|
|
|
|
+ dburl_noprefix=row[6];
|
|
|
|
+ shard=row[7];
|
|
|
|
+ if((task != 0 && task[0]=='2') || updatableOldDBval[0] == '0')
|
|
alreadydone=1;
|
|
alreadydone=1;
|
|
foundindoublecheck=1;
|
|
foundindoublecheck=1;
|
|
}
|
|
}
|
|
@@ -908,27 +911,13 @@ int main(int argc, char **argv)
|
|
}
|
|
}
|
|
|
|
|
|
//check if original dburl is now getting redirected from finalurl (should be sent to review)
|
|
//check if original dburl is now getting redirected from finalurl (should be sent to review)
|
|
- int finalUrlsize_noprefix, dburlsize_noprefix = 0, finalURL_prefixsize = 0, dburl_prefixsize = 0,dburlsize=strlen(dburl);
|
|
|
|
- if(finalURL[4] == ':'){//if its just a switch from http to https, ignore
|
|
|
|
- finalUrlsize_noprefix = finalURLsize - 7;
|
|
|
|
- finalURL_prefixsize = 7;
|
|
|
|
- }else{
|
|
|
|
- finalUrlsize_noprefix = finalURLsize - 8;
|
|
|
|
- finalURL_prefixsize = 8;
|
|
|
|
- }
|
|
|
|
- if(dburl[4] == ':'){
|
|
|
|
- dburlsize_noprefix = dburlsize - 7;
|
|
|
|
- dburl_prefixsize = 7;
|
|
|
|
- }else{
|
|
|
|
- dburlsize_noprefix = dburlsize - 8;
|
|
|
|
- dburl_prefixsize = 8;
|
|
|
|
- }
|
|
|
|
- if(finalURLsize-finalURL_prefixsize != dburlsize-dburl_prefixsize){
|
|
|
|
|
|
+ int finalURLnoprefix_size = strlen(finalURLnoprefix), dburl_noprefix_size = strlen(dburl_noprefix);
|
|
|
|
+ if(finalURLnoprefix_size != dburl_noprefix_size){
|
|
redirected = 1;
|
|
redirected = 1;
|
|
printf("\nIndexed page is being redirected.");
|
|
printf("\nIndexed page is being redirected.");
|
|
}else{
|
|
}else{
|
|
- for(int i=0;i<finalUrlsize_noprefix;i++){
|
|
|
|
- if(dburl[i+dburl_prefixsize] != finalURL[i+finalURL_prefixsize]){
|
|
|
|
|
|
+ for(int i=0;i<finalURLnoprefix_size;i++){
|
|
|
|
+ if(dburl_noprefix[i] != finalURLnoprefix[i]){
|
|
redirected = 1;
|
|
redirected = 1;
|
|
printf("\nIndexed page is being redirected.");
|
|
printf("\nIndexed page is being redirected.");
|
|
break;
|
|
break;
|