Browse Source

Add files via upload

wibyweb 2 năm trước cách đây
mục cha
commit
25a8e227ee
2 tập tin đã thay đổi với 155 bổ sung134 xóa
  1. 24 5
      c/checkrobots.h
  2. 131 129
      c/cr.c

+ 24 - 5
c/checkrobots.h

@@ -32,10 +32,11 @@ int checkrobots(char *rURLprefix, char *rDomain, char *rURLpath)
 	memset(rwindow,'?',rwindow_len);
 //	rwindow[rwindow_len]=0;
 	
-	//curl_global_init(CURL_GLOBAL_ALL);
+	curl_global_init(CURL_GLOBAL_DEFAULT);
 	CURL *curl;
 	FILE *fp;
 	CURLcode res;
+	curl = curl_easy_init();
 	memset(robotsurl,0,1011);
 	strcpy(robotsurl,rURLprefix);
 	strcat(robotsurl,rDomain);
@@ -45,10 +46,9 @@ int checkrobots(char *rURLprefix, char *rDomain, char *rURLpath)
 	strcpy(outfilename,"robots/");
 	strcat(outfilename,rDomain);
 	strcat(outfilename,".txt");
-	curl = curl_easy_init();
 	long fsize=0,response_code_checkrobots=0;	
 	char *finalURL_checkrobots = NULL;
-	int foundfile=0;
+	int foundfile=0,alloced=0;
 	char rb,rwb;
 	printf("\nChecking robots.txt: ");
 
@@ -59,6 +59,7 @@ int checkrobots(char *rURLprefix, char *rDomain, char *rURLpath)
 		fseek(robotsfile, 0, SEEK_SET);  /* same as rewind(f); */
 
 		robotsfilestr = malloc(fsize + 1);
+		alloced=1;
 		if(fread(robotsfilestr, 1, fsize, robotsfile)){}
 		fclose(robotsfile);
 
@@ -71,7 +72,7 @@ int checkrobots(char *rURLprefix, char *rDomain, char *rURLpath)
 		if(fp = fopen(outfilename,"wb")){
 			//set curl options
 			curl_easy_setopt(curl, CURLOPT_URL, robotsurl);// set URL to get here 
-			curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; compatible; WebCrawler; SearchEngine)"); 
+			curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Wibybot; https://wiby.me/)"); 
 			curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data_checkrobots);// send all data to this function  // 
 			curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);// write the page body to this file handle  
 			curl_easy_setopt(curl,CURLOPT_FOLLOWLOCATION,1L);//allow redirects
@@ -83,13 +84,15 @@ int checkrobots(char *rURLprefix, char *rDomain, char *rURLpath)
 			res = curl_easy_perform(curl);// get it! 
 			curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &finalURL_checkrobots);
 			curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code_checkrobots);
-			curl_easy_cleanup(curl);// always cleanup 
+			//curl_easy_cleanup(curl);// always cleanup (done further down)
 			fclose(fp);
 			if(response_code_checkrobots!=200){
 				fp = fopen(outfilename,"wb");
 				fclose(fp);
 			}
 		}else{
+			curl_easy_cleanup(curl);
+			curl_global_cleanup();
 			printf("\nFailed to create file: %s - proceeding anyway.",outfilename);
 			return 1;
 		}
@@ -101,6 +104,7 @@ int checkrobots(char *rURLprefix, char *rDomain, char *rURLpath)
 		fseek(robotsfile, 0, SEEK_SET);  // same as rewind(f); 
 
 		robotsfilestr = malloc(fsize + 1);
+		alloced=1;
 		if(fread(robotsfilestr, 1, fsize, robotsfile)){}
 		fclose(robotsfile);
 
@@ -190,6 +194,10 @@ int checkrobots(char *rURLprefix, char *rDomain, char *rURLpath)
 					}
 					if((i==fsize-1 && match==1) || ((rwb==10 || rwb==13) && match==1)){
 						printf("Permitted.");
+						curl_easy_cleanup(curl);
+						curl_global_cleanup();
+						if(alloced==1)
+							free(robotsfilestr);
 						return 1;
 					}
 					if(match==0)
@@ -214,13 +222,24 @@ int checkrobots(char *rURLprefix, char *rDomain, char *rURLpath)
 		
 		if(result==0){
 			printf("Denied.");
+			curl_easy_cleanup(curl);
+			curl_global_cleanup();
+			if(alloced==1)
+				free(robotsfilestr);
 			return 0;
 		}else{
 			printf("Permitted.");
+			curl_easy_cleanup(curl);
+			curl_global_cleanup();
+			if(alloced==1)
+				free(robotsfilestr);
 			return 1;
 		}
 	}
 	printf("Permitted.");
+	curl_easy_cleanup(curl);
+	if(alloced==1)
+		free(robotsfilestr);
 	return 1;
 }
 

+ 131 - 129
c/cr.c

@@ -23,9 +23,9 @@ char *shardfilestr;
 
 void finish_with_error(MYSQL *con)
 {
-  fprintf(stderr, "%s\n", mysql_error(con));
-  mysql_close(con);
-  exit(1);        
+	fprintf(stderr, "%s\n", mysql_error(con));
+	mysql_close(con);
+	exit(1);        
 }
 int isnum(char *source){
 	int sourcelength = strlen(source);
@@ -37,8 +37,8 @@ int isnum(char *source){
 	return 1;
 }
 size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
-    size_t written = fwrite(ptr, size, nmemb, stream);
-    return written;
+	size_t written = fwrite(ptr, size, nmemb, stream);
+	return written;
 }
 
 int main(int argc, char **argv)
@@ -60,7 +60,8 @@ int main(int argc, char **argv)
 
 	//check if there are shards to include
 	int nShards=0,fsize=0,shardnum=0;
-	char shardc, numshards[20], shardnumstr[20];
+	char numshards[20], shardnumstr[20];
+	memset(numshards,0,20);
 	memset(shardnumstr,0,20);
 	sprintf(shardnumstr,"0");
 	if(shardfile = fopen("shards", "r")){
@@ -72,16 +73,15 @@ int main(int argc, char **argv)
 			if(fread(shardfilestr, 1, fsize, shardfile)){}
 			shardfilestr[fsize] = 0;
 			for(int i=0;i<fsize;i++){
-				shardc = shardfilestr[i];
-				if(shardc != 10 && shardc != 13){
-					numshards[i]=shardc;
+				if(shardfilestr[i] > 47 && shardfilestr[i] < 58){
+					numshards[i]=shardfilestr[i];
 				}
 			}
 			//check if its a number
 			if(isnum(numshards)==1){
 				nShards = atoi(numshards);
 			}else{
-				printf("\nThe shard file contains gibberish: '%s'. Fix this to continue.",shardfilestr);
+				printf("\nThe shard file must contain a number. Indicate the number of available shards you are using or set it to 0 if you aren't.\n\n");
 				exit(0);
 			}
 			free(shardfilestr);
@@ -91,8 +91,8 @@ int main(int argc, char **argv)
 		}
 		fclose(shardfile);
 	}else{
-                printf("\nWarning: 'shards' file is missing. Create the file and indicate the number of available shards you are using or set it to 0 if you aren't.\n\n");
-        }
+		printf("\nWarning: 'shards' file is missing. Create the file and indicate the number of available shards you are using or set it to 0 if you aren't.\n\n");
+	}
 	if(nShards > 0){
 		srand(time(NULL));
 		shardnum = (rand() % nShards);
@@ -104,7 +104,7 @@ int main(int argc, char **argv)
 	{	
 		//printf("MySQL client version: %s\n", mysql_get_client_info());
 		int alreadydone = 0, permitted=1;
-		 //allocates or initialises a MYSQL object
+		//allocates or initialises a MYSQL object
 
 		MYSQL *con = mysql_init(NULL);
 
@@ -121,13 +121,13 @@ int main(int argc, char **argv)
 
 		if (mysql_query(con, "SET CHARSET utf8;")) 
 		{
-		    finish_with_error(con);
+			finish_with_error(con);
 		}
-		
+
 		if(id_assigned == 0){
 			if (mysql_query(con, "SELECT id, url, worksafe, approver, surprise, updatable, task, crawl_tree, crawl_family, crawl_depth, crawl_pages, crawl_type, crawl_repeat, force_rules FROM indexqueue limit 1;")) 
 			{
-			    	finish_with_error(con);
+				finish_with_error(con);
 			}
 		}else{
 			char indexqueuequery[2001];
@@ -137,13 +137,13 @@ int main(int argc, char **argv)
 			strcat(indexqueuequery,"' LIMIT 1;");
 			if (mysql_query(con, indexqueuequery)) 
 			{
-			    	finish_with_error(con);
+				finish_with_error(con);
 			}
 		}
 
 		//We get the result set using the mysql_store_result() function. The MYSQL_RES is a structure for holding a result set
 		MYSQL_RES *result = mysql_store_result(con);
-		
+
 		if(result == NULL)
 		{
 			finish_with_error(con);
@@ -154,17 +154,17 @@ int main(int argc, char **argv)
 
 		//We fetch the rows and print them to the screen. 
 		/*MYSQL_ROW row;
-		while (row = mysql_fetch_row(result))	
-		{
-			for(int i=0; i<num_fields; i++)
-			{
-				printf("%s ", row[i] ? row[i] : "NULL");
-			}
-			printf("\n");
-		}*/
+		  while (row = mysql_fetch_row(result))	
+		  {
+		  for(int i=0; i<num_fields; i++)
+		  {
+		  printf("%s ", row[i] ? row[i] : "NULL");
+		  }
+		  printf("\n");
+		  }*/
 
 		MYSQL_ROW row = mysql_fetch_row(result);
-		
+
 		int empty=0;
 		if(row == NULL){
 			//printf("\nQueue is empty\n");
@@ -181,9 +181,9 @@ int main(int argc, char **argv)
 			printf("-----------------------------------------------------------------------------------\nFetching:");
 			//grab the first entry (fifo)
 			/*for(int i=0; i<num_fields; i++)
-			{
-				printf("%s ", row[i] ? row[i] : "NULL");
-			}*/
+			  {
+			  printf("%s ", row[i] ? row[i] : "NULL");
+			  }*/
 
 			//Store data in first row into variables
 			char *url = row[1];
@@ -201,7 +201,7 @@ int main(int argc, char **argv)
 			char *crawl_type = row[11];
 			char *crawl_repeat = row[12];
 			char *force_rules = row[13];
-			
+
 			//convert crawl depth, pages to int
 			int n_crawl_depth=0, n_crawl_pages=0;
 			if(crawl_depth!=0){
@@ -242,7 +242,7 @@ int main(int argc, char **argv)
 			}
 
 			//set the prefix
-			
+
 			if(http > 0) strcat(prefix,"://");
 			else if(https > 0) strcat(prefix,"s://");
 			else if(httpwww > 0) strcat(prefix,"://www.");
@@ -321,7 +321,7 @@ int main(int argc, char **argv)
 
 			if (mysql_query(con, checkurl)) 
 			{
-			    finish_with_error(con);
+				finish_with_error(con);
 			}
 
 			//We get the result set using the mysql_store_result() function. The MYSQL_RES is a structure for holding a result set
@@ -331,7 +331,7 @@ int main(int argc, char **argv)
 			{
 				finish_with_error(con);
 			}
-	
+
 			//grab the first entry (fifo)
 			printf("Found ID ");
 			row = mysql_fetch_row(resulturlcheck);
@@ -342,7 +342,7 @@ int main(int argc, char **argv)
 			char *fault;
 			char *dburl;
 			char *shard;
-			
+
 			//Catalog the previous crawl attempts (to see if they are all for the same page - which would be a bad sign)
 			previousID[4] = previousID[3];
 			previousID[3] = previousID[2];
@@ -405,13 +405,13 @@ int main(int argc, char **argv)
 				}else{
 					sanity = 1;
 				}
-				
+
 			}else{
 				sanity = 1;
 			}
 
 			//printf("\n\n%ld, %ld, %ld, %ld, %ld\n",previousID[0],previousID[1],previousID[2],previousID[3],previousID[4]);
-			
+
 			//see if the server will accept http only connections on older browsers, change url to HTTP only:
 			char urlHTTP[strlen(url)+100];
 			memset(urlHTTP,0,strlen(url)+100);
@@ -429,7 +429,7 @@ int main(int argc, char **argv)
 				printf("\nAttempt HTTP connection: %s",urlHTTP);
 				printf("\nDownloading page... ");
 				//===============do the curl (download the webpage)=====================
-				//curl_global_init(CURL_GLOBAL_ALL);
+				curl_global_init(CURL_GLOBAL_DEFAULT);
 				CURL *curl;
 				FILE *fp;
 				CURLcode res;
@@ -467,7 +467,7 @@ int main(int argc, char **argv)
 					}
 
 					//curl_easy_cleanup(curl); //cleanup moved further down because finalURL is needed at insert
-					
+
 					//get file size
 					fseek(fp, 0L, SEEK_END);
 					size = ftell(fp);
@@ -495,7 +495,7 @@ int main(int argc, char **argv)
 					else if(http > 0 || httpwww > 0){
 						httpAllow[0] = '1';
 					}
-					
+
 					//Remove the prefix from the final URL, to store into url_noprefix
 					//find out if its http or https or http://www. or https://www.
 					httpwww=httpswww=http=https=0;
@@ -541,7 +541,7 @@ int main(int argc, char **argv)
 						strcat(doublecheckurl,"';");
 						if (mysql_query(con, doublecheckurl)) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 						resulturlcheck = mysql_store_result(con);
 						if(resulturlcheck == NULL)
@@ -586,7 +586,7 @@ int main(int argc, char **argv)
 					if(alreadydone==0 && id_assigned==1 && idexistsalready==1){
 						if (mysql_query(con, "use wibytemp;")) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 						memset(idReserve,0,100);
 						strcpy(idReserve,"INSERT into reserve_id (id) VALUES (");
@@ -605,7 +605,7 @@ int main(int argc, char **argv)
 						//back to wiby database
 						if (mysql_query(con, "use wiby;")) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 						updatereserve=1;
 						if(alreadydone==0){
@@ -646,7 +646,7 @@ int main(int argc, char **argv)
 							//query db
 							if (mysql_query(con, checkurl)) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}
 							MYSQL_RES *resulturlcheck = mysql_store_result(con);
 							if(resulturlcheck == NULL)
@@ -719,18 +719,18 @@ int main(int argc, char **argv)
 					windexupdate = (char*)calloc(finalURLsize+urlnoprefixcount+bodysize+descriptionsize+keywordssize+titlesize+1001,sizeof(char));
 					windexRandUpdate = (char*)calloc(finalURLsize+urlnoprefixcount+bodysize+descriptionsize+keywordssize+titlesize+1001,sizeof(char));
 					titlecheckinsert = (char*)calloc(finalURLsize+titlesize+1001,sizeof(char));
-					
+
 					/*if(title == NULL || keywords == NULL || description == NULL || page == NULL || windexinsert == NULL || windexupdate == NULL)
-					{
-						printf("\nError allocating memory for webpage");
-						//cleanup sql stuff
-						mysql_free_result(resulturlcheck);
-						mysql_free_result(result);
-						mysql_close(con);
-						exit(0);
+					  {
+					  printf("\nError allocating memory for webpage");
+					//cleanup sql stuff
+					mysql_free_result(resulturlcheck);
+					mysql_free_result(result);
+					mysql_close(con);
+					exit(0);
 					}*/
 
-				
+
 					//Check if this is a new page: check if the title found in windex is the same as the parsed title. If not, put the page back into review.
 					int dbtitlesize = 0,titlecheckTitleSize = 0, dbNoTitle=0,extrapos=0;				
 					if(idexistsalready==1)
@@ -742,12 +742,12 @@ int main(int argc, char **argv)
 
 						if (mysql_query(con, "use wibytemp;")) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 						//set charset based on crawled page charset tag
 						if (mysql_query(con, mysqlcharset))
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 						//insert title into wibytemp for comparison
 						strcpy(titlecheckinsert,"INSERT INTO titlecheck (url,title) VALUES ('");
@@ -757,11 +757,11 @@ int main(int argc, char **argv)
 						strcat(titlecheckinsert,"');");
 						if (mysql_query(con, titlecheckinsert)) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 						if (mysql_query(con, "SET CHARSET utf8;")) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 						//now read back the title from the database
 						char checktitle[finalURLsize+dbtitlesize+1000];
@@ -771,14 +771,14 @@ int main(int argc, char **argv)
 						//query db
 						if (mysql_query(con, checktitle)) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 						MYSQL_RES *resulttitlecheck = mysql_store_result(con);
 						if(resulttitlecheck == NULL)
 						{
 							finish_with_error(con);
 						}
-	
+
 						//grab the first entry (fifo)
 						MYSQL_ROW rowTitleCheck = mysql_fetch_row(resulttitlecheck);
 						char *titlecheckTitle;
@@ -793,13 +793,13 @@ int main(int argc, char **argv)
 						strcat(titlecheckremove,finalURL);strcat(titlecheckremove,"';");
 						if (mysql_query(con, titlecheckremove)) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 
 						//back to wiby database
 						if (mysql_query(con, "use wiby;")) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 
 						//check if original dburl is now getting redirected from finalurl (should be sent to review)
@@ -868,7 +868,7 @@ int main(int argc, char **argv)
 									dbNoTitle=0;
 							}
 						}
-						
+
 						//if((dbNoTitle == 0 && dbtitlesize != (titlesize-extrapos)) || (dbNoTitle == 1 && titlesize > 0 && emptytitle == 0))  //previous, before db wibytemp titlecheck method
 						if((dbNoTitle == 0 && dbtitlesize != titlecheckTitleSize) || (dbNoTitle == 1 && titlesize > 0 && emptytitle == 0) || (URL_is_dbtitle == 1 && dbtitlesize != titlecheckTitleSize && titlesize > 0 && emptytitle == 0))
 						{
@@ -886,12 +886,12 @@ int main(int argc, char **argv)
 
 						if (mysql_query(con, mysqlcharset))//set charset based on page charset tag
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 
 						//strcpy(windexinsert,"INSERT INTO windex (url,title,tags,description,body,worksafe,enable,date,approver,surprise,updatable) VALUES ('");
 						strcpy(windexinsert,"INSERT INTO windex (url,url_noprefix,title,description,body,worksafe,enable,date,approver,surprise,http,updatable,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,shard) VALUES ('");
-						
+
 						strcpy(windexupdate,"UPDATE windex SET url = '");
 
 						int copiedRandom = 0;
@@ -914,12 +914,12 @@ int main(int argc, char **argv)
 
 							if (mysql_query(con, "SELECT id, shard, url_noprefix FROM windex WHERE enable = 1 ORDER BY rand() LIMIT 1;")) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}						
 							resultRandID = mysql_store_result(con);
 							if (resultRandID==NULL) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}
 							MYSQL_ROW row = mysql_fetch_row(resultRandID);
 							if(row != NULL){
@@ -933,7 +933,7 @@ int main(int argc, char **argv)
 							if(row != NULL && id_assigned==1){
 								if (mysql_query(con, "use wibytemp;")) 
 								{
-								    finish_with_error(con);
+									finish_with_error(con);
 								}
 								memset(randomreserve,0,100);
 								strcpy(randomreserve,"INSERT into reserve_id (id) VALUES (");
@@ -952,7 +952,7 @@ int main(int argc, char **argv)
 								//back to wiby database
 								if (mysql_query(con, "use wiby;")) 
 								{
-								    finish_with_error(con);
+									finish_with_error(con);
 								}								
 							}
 
@@ -1013,7 +1013,7 @@ int main(int argc, char **argv)
 								strcat(windexinsert,")");
 								if (mysql_query(con, windexinsert)) 
 								{
-								    finish_with_error(con);
+									finish_with_error(con);
 								}
 
 								//insert into the shard table for the new row
@@ -1023,30 +1023,30 @@ int main(int argc, char **argv)
 									strcat(windexinsert,shardnumstr);
 									strcat(windexinsert," (id,url,url_noprefix,title,tags,description,body,surprise,http,updatable,worksafe,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,enable,date,updated,approver,fault,shard) SELECT id,url,url_noprefix,title,tags,description,body,surprise,http,updatable,worksafe,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,enable,date,updated,approver,fault,shard FROM windex WHERE id = LAST_INSERT_ID();");
 									/*//get the last ID
-									MYSQL_RES *resultIDnum;
-									char *lastIDnum;
-
-									if (mysql_query(con, "SELECT LAST_INSERT_ID() FROM windex limit 1")) 
-									{
-									    finish_with_error(con);
-									}	
-									MYSQL_ROW rowLastID = mysql_fetch_row(resultIDnum);
-									if(rowLastID != NULL){
-										lastIDnum = rowLastID[0];
-									}						
-
-									strcpy(shardinsert,"INSERT INTO ws");
-									strcat(shardinsert,shardnumstr);
-									strcat(shardinsert," (id,url,url_noprefix,title,tags,description,body,surprise,http,updatable,worksafe,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,enable,date,updated,approver,fault,shard) SELECT id,url,url_noprefix,title,tags,description,body,surprise,http,updatable,worksafe,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,enable,date,updated,approver,fault,shard FROM windex WHERE id = ");
-									strcat(shardinsert,lastIDnum);
-									if (mysql_query(con, shardinsert)) 
-									{
-									    finish_with_error(con);
-									}
-									mysql_free_result(resultIDnum);	*/
+									  MYSQL_RES *resultIDnum;
+									  char *lastIDnum;
+
+									  if (mysql_query(con, "SELECT LAST_INSERT_ID() FROM windex limit 1")) 
+									  {
+									  finish_with_error(con);
+									  }	
+									  MYSQL_ROW rowLastID = mysql_fetch_row(resultIDnum);
+									  if(rowLastID != NULL){
+									  lastIDnum = rowLastID[0];
+									  }						
+
+									  strcpy(shardinsert,"INSERT INTO ws");
+									  strcat(shardinsert,shardnumstr);
+									  strcat(shardinsert," (id,url,url_noprefix,title,tags,description,body,surprise,http,updatable,worksafe,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,enable,date,updated,approver,fault,shard) SELECT id,url,url_noprefix,title,tags,description,body,surprise,http,updatable,worksafe,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,enable,date,updated,approver,fault,shard FROM windex WHERE id = ");
+									  strcat(shardinsert,lastIDnum);
+									  if (mysql_query(con, shardinsert)) 
+									  {
+									  finish_with_error(con);
+									  }
+									  mysql_free_result(resultIDnum);	*/
 									if (mysql_query(con, windexinsert)) 
 									{
-									    finish_with_error(con);
+										finish_with_error(con);
 									}
 								}			
 							}
@@ -1056,7 +1056,7 @@ int main(int argc, char **argv)
 								strcat(windexRandUpdate,randID);
 								if (mysql_query(con, windexRandUpdate))
 								{
-								    finish_with_error(con);
+									finish_with_error(con);
 								}
 								if(nShards>0){//Also copy that new row into a new row of the same ID in the round-robin assigned shard table
 									//update the shard id in windex
@@ -1066,7 +1066,7 @@ int main(int argc, char **argv)
 									strcat(windexRandUpdate," WHERE id = LAST_INSERT_ID()");
 									if (mysql_query(con, windexRandUpdate))
 									{
-									    finish_with_error(con);
+										finish_with_error(con);
 									}
 									//insert that row into the next shard
 									memset(windexRandUpdate,0,strlen(windexRandUpdate));
@@ -1075,7 +1075,7 @@ int main(int argc, char **argv)
 									strcat(windexRandUpdate," (id,url,url_noprefix,title,tags,description,body,surprise,http,updatable,worksafe,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,enable,date,updated,approver,fault,shard) SELECT id,url,url_noprefix,title,tags,description,body,surprise,http,updatable,worksafe,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,enable,date,updated,approver,fault,shard FROM windex WHERE id = LAST_INSERT_ID()");
 									if (mysql_query(con, windexRandUpdate))
 									{
-									    finish_with_error(con);
+										finish_with_error(con);
 									}
 
 									//Overwrite the randomly selected row with the contents of the newly crawled webpage
@@ -1133,9 +1133,9 @@ int main(int argc, char **argv)
 									strcat(windexRandUpdate,randID);
 									if (mysql_query(con, windexRandUpdate))
 									{
-									    finish_with_error(con);
+										finish_with_error(con);
 									}
-																
+
 									//Finally, update the corresponding shard table row
 									if(randshard != 0){
 										memset(windexRandUpdate,0,strlen(windexRandUpdate));
@@ -1194,7 +1194,7 @@ int main(int argc, char **argv)
 										strcat(windexRandUpdate,randID);
 										if (mysql_query(con, windexRandUpdate))
 										{
-										    finish_with_error(con);
+											finish_with_error(con);
 										}	
 									}
 								}
@@ -1263,7 +1263,7 @@ int main(int argc, char **argv)
 							strcat(windexupdate,idexistsvalue);//will be same as randID if a new page is replacing that row
 							if (mysql_query(con, windexupdate)) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}
 
 							//update shard
@@ -1317,7 +1317,7 @@ int main(int argc, char **argv)
 								strcat(windexupdate,idexistsvalue);//will be same as randID if a new page is replacing that row
 								if (mysql_query(con, windexupdate)) 
 								{
-								    finish_with_error(con);
+									finish_with_error(con);
 								}
 							}
 						}
@@ -1326,7 +1326,7 @@ int main(int argc, char **argv)
 						if(id_assigned==1 && idexistsalready==0 && reserveFail==0){
 							if (mysql_query(con, "use wibytemp;")) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}
 							memset(randomreserve,0,100);
 							strcpy(randomreserve,"DELETE FROM reserve_id where id = ");
@@ -1339,14 +1339,14 @@ int main(int argc, char **argv)
 							//back to wiby database
 							if (mysql_query(con, "use wiby;")) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}							
 						}
 						//unreserve ID if doing an update 
 						if(id_assigned==1 && updatereserve==1){
 							if (mysql_query(con, "use wibytemp;")) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}
 							memset(idReserve,0,100);
 							strcpy(idReserve,"DELETE FROM reserve_id where id = ");
@@ -1359,14 +1359,14 @@ int main(int argc, char **argv)
 							//back to wiby database
 							if (mysql_query(con, "use wiby;")) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}
 						}
 						//free result
 						if(idexistsalready == 0){
 							mysql_free_result(resultRandID);							
 						}
-						
+
 						//===================remove the entry from the indexqueue===============
 						//printf("\nRemoving from queue...");
 						char sqlqueryremove[200];
@@ -1375,9 +1375,9 @@ int main(int argc, char **argv)
 						strcat(sqlqueryremove,id);strcat(sqlqueryremove,";");
 						if (mysql_query(con, sqlqueryremove)) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
-	
+
 						printf("\n\nSuccess!");
 					}
 					//clear page from memory
@@ -1405,10 +1405,10 @@ int main(int argc, char **argv)
 					memset(sqlqueryremove,0,200);
 					strcpy(sqlqueryremove,"DELETE FROM indexqueue WHERE id=");
 					strcat(sqlqueryremove,id);strcat(sqlqueryremove,";");
-					
+
 					if (mysql_query(con, sqlqueryremove)) 
 					{
-					    finish_with_error(con);
+						finish_with_error(con);
 					}
 					if(alreadydone==0){
 						if(idexistsalready == 1 && fault[0] == '1')
@@ -1427,7 +1427,7 @@ int main(int argc, char **argv)
 							strcat(sqlqueryremove,idexistsvalue);
 							if (mysql_query(con, sqlqueryremove)) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}
 							if(nShards > 0 && shard != 0){
 								memset(sqlqueryremove,0,200);
@@ -1437,7 +1437,7 @@ int main(int argc, char **argv)
 								strcat(sqlqueryremove,idexistsvalue);
 								if (mysql_query(con, sqlqueryremove)) 
 								{
-								    finish_with_error(con);
+									finish_with_error(con);
 								}
 							}
 							if(crawl_family == 0 || (crawl_family != 0 && crawl_family[0] =='0')){
@@ -1448,7 +1448,7 @@ int main(int argc, char **argv)
 								strcat(sqlqueryreview,worksafe);strcat(sqlqueryreview,");");	
 								if (mysql_query(con, sqlqueryreview)) 
 								{
-								    finish_with_error(con);
+									finish_with_error(con);
 								}
 							}
 						}
@@ -1461,7 +1461,7 @@ int main(int argc, char **argv)
 							strcat(sqlqueryfault,idexistsvalue);
 							if (mysql_query(con, sqlqueryfault)) 
 							{
-							    finish_with_error(con);
+								finish_with_error(con);
 							}
 							if(nShards>0 && shard != 0){
 								memset(sqlqueryfault,0,450);
@@ -1471,7 +1471,7 @@ int main(int argc, char **argv)
 								strcat(sqlqueryfault,idexistsvalue);
 								if (mysql_query(con, sqlqueryfault)) 
 								{
-								    finish_with_error(con);
+									finish_with_error(con);
 								}
 							}				
 						}
@@ -1481,16 +1481,16 @@ int main(int argc, char **argv)
 							fputs ("\r\n",abandoned);
 							fclose(abandoned);
 						}
-				}
+					}
 
-				//check if link crawling is specified
-				//make sure duplicates don't get crawled more than once
-				//check db if its already indexed too - do this at beginning instead?
+					//check if link crawling is specified
+					//make sure duplicates don't get crawled more than once
+					//check db if its already indexed too - do this at beginning instead?
 
-				//crawl links if refresh is from link crawler, or from regular refresh while crawl_repeat is on, or during manual submission when appropriate limits are set
+					//crawl links if refresh is from link crawler, or from regular refresh while crawl_repeat is on, or during manual submission when appropriate limits are set
 				}else if(nofollow==0 && getURLs==1 && alreadydone==0){
 					//cycle through url list, then construct an sql string around it, then insert it to indexqueue;	
-					
+
 					//force crawl depth of 1 during a refresh if crawl_repeat is set
 					if(crawl_repeat != 0 && crawl_repeat[0]=='1' && task != 0 && task[0]=='1'){
 						n_crawl_depth=1;
@@ -1514,7 +1514,7 @@ int main(int argc, char **argv)
 					while(urlListShuffled[loopcount]!=0){
 						switch(urlListShuffled[loopcount]){
 							case '\n' ://see if url can be indexed, if so, add to sql insert statement
-	
+
 								urlparse(url_fromlist);
 
 								//check if internal or external url
@@ -1643,7 +1643,7 @@ int main(int argc, char **argv)
 									}
 									strcat(url_insert,")");										
 								}
-								
+
 								memset(url_fromlist,0,url_fromlist_arraylen);
 								elementnum=0;
 								loopcount++;
@@ -1662,12 +1662,14 @@ int main(int argc, char **argv)
 						//insert into db
 						if (mysql_query(con, url_insert)) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 					}
 				}
-				if (curl)
+				if (curl){
 					curl_easy_cleanup(curl);// cleanup curl (finalURL used at inserts, thats why we cleanup and the end here 
+					curl_global_cleanup();	
+				} 
 			}else{
 				if(alreadydone == 0){
 					printf("\nPage was flagged as unable to crawl or banned.");
@@ -1681,7 +1683,7 @@ int main(int argc, char **argv)
 				strcat(sqlqueryremove,id);
 				if (mysql_query(con, sqlqueryremove)) 
 				{
-				    finish_with_error(con);
+					finish_with_error(con);
 				}
 				if(idexistsalready==1 && permitted==0){
 					printf(" Removing from index...");
@@ -1691,7 +1693,7 @@ int main(int argc, char **argv)
 					strcat(sqlqueryremove," AND updatable != '0'");
 					if (mysql_query(con, sqlqueryremove)) 
 					{
-					    finish_with_error(con);
+						finish_with_error(con);
 					}	
 					if(nShards>0 && shard != 0){
 						memset(sqlqueryremove,0,200);
@@ -1702,7 +1704,7 @@ int main(int argc, char **argv)
 						strcat(sqlqueryremove," AND updatable != '0'");
 						if (mysql_query(con, sqlqueryremove)) 
 						{
-						    finish_with_error(con);
+							finish_with_error(con);
 						}
 					}				
 				}
@@ -1721,8 +1723,7 @@ int main(int argc, char **argv)
 					shardnum=0;
 				sprintf(shardnumstr,"%d",shardnum);
 			}
-
-			printf(" Awaiting next page in queue...\n\n");
+				printf(" Awaiting next page in queue...\n\n");
 		}
 		//cleanup more sql stuff
 		mysql_free_result(result);
@@ -1731,5 +1732,6 @@ int main(int argc, char **argv)
 		if(empty==1)
 			sleep(5);//sleep 5 seconds
 	}
-  	exit(0);
+	exit(0);
 }
+