Initial commit.

This commit is contained in:
Wiby 2022-07-07 23:48:28 -04:00
commit ae06ca73ad
182 changed files with 33804 additions and 0 deletions

4
README.md Executable file
View file

@ -0,0 +1,4 @@
These are the source files for the Wiby search engine.
Refer to the installation guide located in /html/about/guide.html
You can also access it at http://wiby.me/about/guide.html

0
c/abandoned.txt Executable file
View file

238
c/checkrobots.h Executable file
View file

@ -0,0 +1,238 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
//#include </usr/include/curl/curl.h> //ubuntu 16
//#include </usr/include/curl/easy.h> //ubuntu 16
#include </usr/include/x86_64-linux-gnu/curl/curl.h> //ubuntu 20
#include </usr/include/x86_64-linux-gnu/curl/easy.h> //ubuntu 20
//gcc checkrobots.c -o checkrobots -lcurl
#define rwindow_len 100
FILE *robotsfile;
char *robotsfilestr,robotsurl[1011],rwindow[rwindow_len];
//char rURLpath[] = "/dumpop/";
size_t write_data_checkrobots(void *ptr, size_t size, size_t nmemb, FILE *stream) {
size_t written = fwrite(ptr, size, nmemb, stream);
return written;
}
int locateInRWindow(char *window, char *birdLower, char *birdUpper, int length);
//int main(int argc, char **argv)
int checkrobots(char *rURLprefix, char *rDomain, char *rURLpath)
{
if(rURLprefix[0]==0 || rDomain[0]==0 || rURLpath[0]==0)
return 1;
if(strlen(rDomain)>253)
return 0;
if(strlen(rURLpath)>500)
return 0;
memset(rwindow,'?',rwindow_len);
rwindow[rwindow_len]=0;
//curl_global_init(CURL_GLOBAL_ALL);
CURL *curl;
FILE *fp;
CURLcode res;
memset(robotsurl,0,1011);
strcpy(robotsurl,rURLprefix);
strcat(robotsurl,rDomain);
strcat(robotsurl,"/robots.txt");
char outfilename[300];
memset(outfilename,0,300);
strcpy(outfilename,"robots/");
strcat(outfilename,rDomain);
strcat(outfilename,".txt");
curl = curl_easy_init();
long fsize=0,response_code_checkrobots=0;
char *finalURL_checkrobots = NULL;
int foundfile=0;
char rb,rwb;
printf("\nChecking robots.txt: ");
//open robots.txt file and load into memory, or download it if it doesn't exist
if(robotsfile = fopen(outfilename, "rb")){
fseek(robotsfile, 0, SEEK_END);
fsize = ftell(robotsfile);
fseek(robotsfile, 0, SEEK_SET); /* same as rewind(f); */
robotsfilestr = malloc(fsize + 1);
if(fread(robotsfilestr, 1, fsize, robotsfile)){}
fclose(robotsfile);
robotsfilestr[fsize] = 0;
//printf("%ld",fsize);
foundfile=1;
}else if (curl) {
printf("Downloading... ");
if(fp = fopen(outfilename,"wb")){
//set curl options
curl_easy_setopt(curl, CURLOPT_URL, robotsurl);// set URL to get here
curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; compatible; WebCrawler; SearchEngine)");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data_checkrobots);// send all data to this function //
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);// write the page body to this file handle
curl_easy_setopt(curl,CURLOPT_FOLLOWLOCATION,1L);//allow redirects
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 60L);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 55L);
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);//max num of redirects
curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 1000000L);//don't download if over 1MB
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);//0 or 1 to verify ssl
res = curl_easy_perform(curl);// get it!
curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &finalURL_checkrobots);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code_checkrobots);
curl_easy_cleanup(curl);// always cleanup
fclose(fp);
if(response_code_checkrobots!=200){
fp = fopen(outfilename,"wb");
fclose(fp);
}
}else{
printf("\nFailed to create file: %s - proceeding anyway.",outfilename);
return 1;
}
}
if(response_code_checkrobots==200 && foundfile==0){
robotsfile = fopen(outfilename, "rb");
fseek(robotsfile, 0, SEEK_END);
fsize = ftell(robotsfile);
fseek(robotsfile, 0, SEEK_SET); // same as rewind(f);
robotsfilestr = malloc(fsize + 1);
if(fread(robotsfilestr, 1, fsize, robotsfile)){}
fclose(robotsfile);
robotsfilestr[fsize] = 0;
//printf("%ld",fsize);
}
//parse the robots.txt file
if(response_code_checkrobots==200 || foundfile==1 && fsize > 11){
int foundUserAgent=0,foundDisallow=0,foundAllow=0,comment=0,match=0;
int k=0,lenurlpath=strlen(rURLpath),rwupdated=0,result=1;
for(int i=0;i<fsize;i++){
rb = robotsfilestr[i];
//use a rolling window of 100 bytes to detect elements, ignore space/null/tab
if(rb != 32 && rb != 0 && rb != 9){
for(int j=0;j<rwindow_len-1;j++){
rwindow[j] = rwindow[j+1];
}
rwindow[rwindow_len-1] = rwb = rb;
rwupdated=1;
}
if(rwb==35){
comment=1;
}
if(rwb==10 || rwb==13){
comment=0;
}
if(comment==0){
//get my specific user-agent
//change this to something else if you want
//robots.txt file would need to call this ahead of the '*' user-agent or else will get ignored.
if(foundUserAgent==0 && locateInRWindow(rwindow,"user-agent:wibybot","USER-AGENT:WIBYBOT",18)==1){
foundUserAgent=1;
//printf("\nfound user agent!");
}
//get universal user-agent //change this to something else if you want
if(foundUserAgent==0 && locateInRWindow(rwindow,"user-agent:*","USER-AGENT:*",12)==1){
foundUserAgent=1;
//printf("\nfound user agent!");
}
//if another user-agent detected after, end loop
if(foundUserAgent==1 && locateInRWindow(rwindow,"user-agent:","USER-AGENT:",11)==1){
break;
}
//end if 'Disallow: /'
if(foundUserAgent==1 && locateInRWindow(rwindow,"disallow:/\n","DISALLOW:/\n",11)==1){
result=0;
}
if(foundUserAgent==1 && locateInRWindow(rwindow,"disallow:/\r","DISALLOW:/\r",11)==1){
result=0;
}
if(i==fsize-1 && foundUserAgent==1 && locateInRWindow(rwindow,"disallow:/","DISALLOW:/",10)==1){
result=0;
}
//check if path is disallowed in url
if(rwupdated==1 && foundDisallow==1){
if(rwb!=10 && rwb!=13){
//get path
if(k<lenurlpath && rwb==rURLpath[k])
match=1;
if(k<lenurlpath && rwb!=rURLpath[k])
match=0;
if(k>=lenurlpath)
match=0;
k++;
}
if((i==fsize-1 && match==1) || ((rwb==10 || rwb==13) && match==1)){
result=0;
foundDisallow=0;
}
if(match==0)
foundDisallow=k=0;
}
//check if path is allowed in url
if(rwupdated==1 && foundAllow==1){
if(rwb!=10 && rwb!=13){
//get path
if(k<lenurlpath && rwb==rURLpath[k])
match=1;
if(k<lenurlpath && rwb!=rURLpath[k])
match=0;
if(k>=lenurlpath)
match=0;
k++;
}
if((i==fsize-1 && match==1) || ((rwb==10 || rwb==13) && match==1)){
printf("Permitted.");
return 1;
}
if(match==0)
foundAllow=k=0;
}
if(foundUserAgent==1 && rwupdated && locateInRWindow(rwindow,"disallow:","DISALLOW:",9)==1){
foundDisallow=1;
foundAllow=0;
k=0;
//printf("\nfound disallow");
}
if(foundUserAgent==1 && rwupdated && locateInRWindow(rwindow,"\nallow:","\nALLOW:",7)==1){
foundDisallow=0;
foundAllow=1;
k=0;
//printf("\nfound allow");
}
}
rwupdated=0;
}
if(result==0){
printf("Denied.");
return 0;
}else{
printf("Permitted.");
return 1;
}
}
printf("Permitted.");
return 1;
}
int locateInRWindow(char *window, char *birdLower, char *birdUpper, int length)
{
int start = rwindow_len-length;
for(int i=0;i<length;i++){
if(window[start] != birdLower[i] && window[start] != birdUpper[i]){
return 0;
}
start++;
}
return 1;
}

1388
c/cr.c Executable file

File diff suppressed because it is too large Load diff

574
c/htmlparse.h Executable file
View file

@ -0,0 +1,574 @@
//HTMLparse
//Separates text from an HTML file
//Remember to also set sql_mode = "NO_BACKSLASH_ESCAPES" in my.cnf
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define window_len 100
#define charset_len 100
#define mysqlcharset_len 100
#define title_len 152
#define keywords_len 1024
#define description_len 182
#define robots_len 100
#define body_len 81920
#define urlList_len 102400
#define strURL_len 102400
FILE *bodyfile,*titlefile, *keywordsfile, *descriptionfile, *noindexfile, *nofollowfile, *charsetfile, *urlfile, *shuffledurlfile;
static char filename[] = "page.out";
char window[window_len],windowWithSpaces[window_len],charset[charset_len+1],mysqlcharset[mysqlcharset_len+1],title[title_len+1],keywords[keywords_len+1],description[description_len+1],robots[robots_len+1],body[body_len+1];
char urlList[urlList_len+1],strURL[strURL_len+1],urlListShuffled[urlList_len+1],urlListHoldShuffled[urlList_len+1];
int titlefound=0,charsetfound=0,descriptionfound=0,keywordsfound=0,robotsfound=0,nofollow=0,noindex=0,scriptfound=0,stylefound=0,urlFound=0,urlTagFound=0,numURL=0,emptytitle=1,spaces=0,seeded=0,num_stylesheets=0,num_scripts=0,getURLs=1;
long charsetsize=0,titlesize=0,keywordssize=0,descriptionsize=0,robotssize=0,bodysize=0;
int matchMySQLcharset(int html_charset_length, char *html_charset, int html_match_length, char *html_lowercase_match, char *html_uppercase_match);
int locateInWindow(char *window, char *birdLower, char *birdUpper, int length);
int locateInURL(char *url, char *birdLower, char *birdUpper, int length, int urlSize);
int canCrawl(int urlSize);
void shuffleURLs(int iterations, long urlListSize);
void sqlsafe();
void charset2mysql();
FILE *f;
char *fileStr;
char c;
void htmlparse(){
long urlListSize=0;
numURL=0;
int intag=0,incomment=0,inscript=0,instyle=0,inlink=0,putspace=0,spacecount=0;
int urlSize=0,dqcount=0;
titlefound=charsetfound=descriptionfound=keywordsfound=robotsfound=nofollow=noindex=scriptfound=stylefound=0;
charsetsize=titlesize=keywordssize=descriptionsize=robotssize=bodysize=0;
memset(window,'#',window_len);
window[window_len]=0;
memset(windowWithSpaces,'#',window_len);
windowWithSpaces[window_len]=0;
memset(charset,0,charset_len+1);
memset(mysqlcharset,0,mysqlcharset_len+1);
memset(title,0,title_len+1);
memset(keywords,0,keywords_len+1);
memset(description,0,description_len+1);
memset(robots,0,robots_len+1);
memset(body,0,body_len+1);
memset(urlList,0,urlList_len+1);
memset(strURL,0,strURL_len+1);
memset(urlListShuffled,0,urlList_len+1);
memset(urlListHoldShuffled,0,urlList_len+1);
printf("Parsing HTML... ");
//open html file and load into memory
f = fopen(filename, "rb");
fseek(f, 0, SEEK_END);
long fsize = ftell(f);
fseek(f, 0, SEEK_SET); /* same as rewind(f); */
fileStr = malloc(fsize + 1);
if(fread(fileStr, 1, fsize, f)){};
fclose(f);
fileStr[fsize] = 0;
//Locate the charset, title, description, keywords, robots, body
//must accomodate human error in markup
//must double all single quotes for mysql safety
//dont allow extra whitespace, ignore cr/lf/tabs
//complete it all in one pass
for(int i=0;i<fsize;i++){
c = fileStr[i];
//use a rolling window of 100 bytes to detect elements, ignore lf/cr/space/null/tab
if(c!= 10 && c != 13 && c != 32 && c != 0 && c != 9){
for(int j=0;j<window_len-1;j++){
window[j] = window[j+1];
}
window[window_len-1] = c;
}
//use a rolling window of 100 bytes to detect elements, but permit space, ignore lf/cr/null/tab
if(c!= 10 && c != 13 && c != 0 && c != 9){
for(int j=0;j<window_len-1;j++){
windowWithSpaces[j] = windowWithSpaces[j+1];
}
windowWithSpaces[window_len-1] = c;
}
//Get Title
if(titlefound == 2){
if(titlesize < (title_len-2) && c!= 10 && c != 13 && c != 0 && c != 9){
title[titlesize]=c;
titlesize++;
if(c == 39){//check for single quotes and double them up for sql safety
title[titlesize]=c;
titlesize++;
}
if(c != 32 && c != 13 && c != 12 && c != 10 && c != 9){//some titles are just a bunch of spaces or garbage, need to check for that
emptytitle = 0;
}
}
if(locateInWindow(window,"</title>","</TITLE>",8)==1){
titlefound = 3;
//remove </title> from end of title by inserting null at location of <
titlesize -= 8;
title[titlesize] = 0;
//printf("\n%s",title);
}
}
if(titlefound == 1 && c=='>')//in case of this situation: <title some_nonsense>
titlefound=2;
if(titlefound == 0 && locateInWindow(window,"<title","<TITLE",6)==1){
titlefound = 1;
}
//Get Charset
if(charsetfound == 1){
if(c == '>' || c == '/'){
charsetfound = 2;
//printf("\n%s",charset);
}
if(charsetfound == 1 && charsetsize < charset_len && c != '"' && c != '\''){
charset[charsetsize]=c;
charsetsize++;
}
}
if(charsetfound == 0 && locateInWindow(window,"charset=","CHARSET=",8)==1){
charsetfound = 1;
}
//Get Description
if(descriptionfound == 1){
if(c == '>' || c == '/'){
descriptionfound = 2;
//printf("\n%s",description);
}
if(descriptionfound == 1 && descriptionsize < (description_len-2) && c != '"'){
description[descriptionsize]=c;
descriptionsize++;
if(c == 39){//check for single quotes and double them up for sql safety
description[descriptionsize]=c;
descriptionsize++;
}
}
}
if(descriptionfound == 0 && locateInWindow(window,"description\"content=","DESCRIPTION\"CONTENT=",20)==1){
descriptionfound = 1;
}
//Get Keywords
if(keywordsfound == 1){
if(c == '>' || c == '/'){
keywordsfound = 2;
//printf("\n%s",keywords);
}
if(keywordsfound == 1 && keywordssize < (keywords_len-2) && c != '"'){
keywords[keywordssize]=c;
keywordssize++;
if(c == 39){//check for single quotes and double them up for sql safety
keywords[keywordssize]=c;
keywordssize++;
}
}
}
if(keywordsfound == 0 && locateInWindow(window,"keywords\"content=","KEYWORDS\"CONTENT=",17)==1){
keywordsfound = 1;
}
//Get Robots (nofollow, noindex)
if(robotsfound == 1){
if(c == '>' || c == '/'){
robotsfound = 2;
//printf("\n%s",robots);
if(locateInWindow(window,"nofollow","NOFOLLOW",8)==1)
nofollow=1;
if(locateInWindow(window,"noindex","NOINDEX",7)==1 || locateInWindow(window,"none","NONE",4)==1)
noindex=nofollow=1;
}
if(robotsfound == 1 && robotssize < robots_len && c != '"' && c != '\''){
robots[robotssize]=c;
robotssize++;
}
}
if(robotsfound == 0 && locateInWindow(window,"robots\"content=","ROBOTS\"CONTENT=",15)==1){
robotsfound = 1;
}
if(titlefound != 1){
//Ignore between scripts, styles, and remove all tags, repeated spaces, tabs, cr, lf, null, add a space at end of every tag
if(c=='<'){
intag = 1;
}else if(c=='>'){
intag = 0;
putspace = 1;
}
if(locateInWindow(window,"<!--","<!--",4)==1){
incomment = 1;
}else if(locateInWindow(window,"-->","-->",3)==1){
incomment = 0;
}
if(locateInWindow(window,"<script","<SCRIPT",7)==1){
inscript = 1;
num_scripts++;
}else if(locateInWindow(window,"</script>","</SCRIPT>",9)==1){
inscript = 0;
}
if(locateInWindow(window,"<style","<STYLE",6)==1){
instyle = 1;
}else if(locateInWindow(window,"</style>","</STYLE>",8)==1){
instyle = 0;
}
if(locateInWindow(window,"<link","<LINK",5)==1){
inlink = 1;
}else if(inlink==1 && locateInWindow(window,">",">",1)==1){
inlink = 0;
}
if(inlink==1){
if(locateInWindow(window,".css",".CSS",4)==1)
num_stylesheets++;
}
//Get Body
//exclude remaining tags, comments, scripts, styles, cr, lf, null, tab, add a space after a '>' but only allow one
if(intag == 0 && incomment == 0 && inscript == 0 && instyle == 0 && inlink == 0 && c!= 13 && c != 10 && c != 0 && c != 9 && bodysize < (body_len-2)){
if(putspace == 1){
if(spacecount == 0){
body[bodysize]=32;
bodysize++;
}
spacecount++;
putspace=0;
}else{
if(c==32)
spacecount++;
else spacecount = 0;
if(spacecount < 2){
body[bodysize]=c;
bodysize++;
if(c == 39){//check for single quotes and double them up for sql safety
body[bodysize]=c;
bodysize++;
}
}
}
}
}
//Get URL's
if(getURLs==1){
if(urlFound == 1 && incomment==0 && instyle==0 && inscript==0 && inlink == 0){
if(c=='"' || c=='\'')
dqcount++;
if((c == '#' && urlSize==0) || (dqcount == 2 && urlSize == 0) || (c == ' ' && urlSize == 0))
urlFound=urlTagFound=dqcount=0;
if((c == '>' || c == ' ') && urlFound == 1){
if(canCrawl(urlSize)==0 || (urlSize+urlListSize) >= (urlList_len-1)){
memset(strURL,0,strURL_len+1);
}else{
strcat(urlList,strURL);
strcat(urlList,"\n");
urlListSize+=urlSize+1;
memset(strURL,0,strURL_len+1);
numURL++;
}
urlFound = urlTagFound = urlSize = dqcount = 0;
}
if(urlFound == 1 && urlListSize < (urlList_len-2) && c != '"' && c != '\'' && urlSize < (strURL_len-2)){
strURL[urlSize]=window[window_len-1];
urlSize++;
}
if(urlSize==11){
if(locateInWindow(window,"javascript:","JAVASCRIPT:",11)==1){
urlFound=urlTagFound=urlSize=dqcount=0;
memset(strURL,0,strURL_len+1);
}
}
}
if(urlFound == 0 && urlTagFound == 0 && incomment == 0 && instyle == 0 && inscript == 0 && inlink == 0 && locateInWindow(windowWithSpaces,"<a ","<A ",3)==1){//sometimes there is something between "<a" and "href"
urlTagFound = 1;
}
if(urlFound == 0 && urlTagFound == 1 && incomment == 0 && instyle == 0 && inscript == 0 && inlink == 0 && locateInWindow(window,"href=","HREF=",5)==1){
urlFound = 1;
}
}
}
//Convert charset to mysql equivalent
charset2mysql();
//print body to file
/* bodyfile = fopen("body.txt","wb");
fputs(body,bodyfile);
fclose(bodyfile);
//print title to file
titlefile = fopen("title.txt","wb");
fputs(title,titlefile);
fclose(titlefile);
//print keywords to file
keywordsfile = fopen("keywords.txt","wb");
fputs(keywords,keywordsfile);
fclose(keywordsfile);
//print description to file
descriptionfile = fopen("description.txt","wb");
fputs(description,descriptionfile);
fclose(descriptionfile);
//print charset to file
charsetfile = fopen("charset.txt","wb");
fputs(mysqlcharset,charsetfile);
fclose(charsetfile);
//print noindex to file
noindexfile = fopen("noindex.txt","wb");
if(noindex==1)
fputs("noindex",noindexfile);
fclose(noindexfile);
//print nofollow to file
nofollowfile = fopen("nofollow.txt","wb");
if(nofollow==1)
fputs("nofollow",nofollowfile);
fclose(nofollowfile);*/
if(getURLs==1){
//shuffle order of collected URLs list
shuffleURLs(10,urlListSize);
//printf("\n%s",urlList);
//print URLs to file
/* urlfile = fopen("url.txt","wb");
fputs(urlList,urlfile);
fclose(urlfile);
//print shuffled URLs to file
shuffledurlfile = fopen("urlshuffled.txt","wb");
fputs(urlListShuffled,shuffledurlfile);
fclose(shuffledurlfile);*/
}
free(fileStr);
printf("\nbody: %ld, title: %ld, charset: %ld, description: %ld, keywords: %ld, noindex: %d, nofollow: %d",bodysize,titlesize,charsetsize,descriptionsize,keywordssize,noindex,nofollow);
}
void shuffleURLs(int iterations, long urlListSize)
{
if(seeded==0){
srand(time(NULL));
seeded=1;
}
int r1,r2,r1to2;
int urlCount,i,j,k,l;
if(numURL>2){
strcpy(urlListHoldShuffled,urlList);
for(int loops=0;loops<iterations;loops++){
r1 = r1to2 = (rand() % numURL) + 1;
r2 = (rand() % numURL) + 1;
if(r1>r2){
r1=r2;
r2=r1to2;
}
if(r1==r2){
continue;
}
urlCount=i=j=k=l=0;
//skip to url number r1
while(urlCount < r1 /*&& i<urlList_len*/){
if(urlListHoldShuffled[i]=='\n')
urlCount++;
i++;
}
j=i;
//copy to urlListShuffled starting at j until reaching r2 location
while(urlCount<r2 /*&& j<urlList_len*/){
urlListShuffled[k]=urlListHoldShuffled[j];
if(urlListHoldShuffled[j]=='\n')
urlCount++;
j++;
k++;
}
//concat url's before i
while(l<i /*&& k<urlList_len*/){
urlListShuffled[k]=urlListHoldShuffled[l];
l++;
k++;
}
//concat url's after k
while(k<urlListSize /*&& k<urlList_len*/){
urlListShuffled[k]=urlListHoldShuffled[k];
k++;
}
strcpy(urlListHoldShuffled,urlListShuffled);
}
}else{
strcpy(urlListShuffled,urlList);
}
}
void charset2mysql()
{
//if no charset specified, use utf8
if(charsetsize == 0){
strcpy(mysqlcharset,"SET CHARSET utf8;");
printf("No Charset found. %s",mysqlcharset);
}
else{ //else, match charset with a proper mysql charset
if(matchMySQLcharset(charsetsize,charset,5,"utf-8","UTF-8")==1){
strcpy(mysqlcharset,"SET CHARSET utf8mb4;");
printf("%s",mysqlcharset);
}
else if(matchMySQLcharset(charsetsize,charset,6,"latin1","LATIN1")==1){
strcpy(mysqlcharset,"SET CHARSET latin1;");
printf("%s",mysqlcharset);
}
else if(matchMySQLcharset(charsetsize,charset,9,"shift-jis","SHIFT-JIS")==1){
strcpy(mysqlcharset,"SET CHARSET cp932;");
printf("%s",mysqlcharset);
}
else if(matchMySQLcharset(charsetsize,charset,6,"x-sjis","X-SJIS")==1){
strcpy(mysqlcharset,"SET CHARSET cp932;");
printf("%s",mysqlcharset);
}
else if(matchMySQLcharset(charsetsize,charset,10,"iso-8859-1","ISO-8859-1")==1){
strcpy(mysqlcharset,"SET CHARSET latin1;");
printf("%s",mysqlcharset);
}
else if(matchMySQLcharset(charsetsize,charset,12,"windows-1252","WINDOWS-1252")==1){
strcpy(mysqlcharset,"SET CHARSET latin1;");
printf("%s",mysqlcharset);
}
else if(matchMySQLcharset(charsetsize,charset,12,"windows-1251","WINDOWS-1251")==1){
strcpy(mysqlcharset,"SET CHARSET cp1251;");
printf("%s",mysqlcharset);
}
else if(matchMySQLcharset(charsetsize,charset,6,"koi8-r","KOI8-R")==1){
strcpy(mysqlcharset,"SET CHARSET cp1251;");
printf("%s",mysqlcharset);
}
else if(matchMySQLcharset(charsetsize,charset,6,"euc-kr","EUC-KR")==1){
strcpy(mysqlcharset,"SET CHARSET euckr;");
printf("%s",mysqlcharset);
}
else if(matchMySQLcharset(charsetsize,charset,4,"big5","BIG5")==1){
strcpy(mysqlcharset,"SET CHARSET big5;");
printf("%s",mysqlcharset);
}
else{
strcpy(mysqlcharset,"SET CHARSET utf8;");
printf("Charset mismatch. %s",mysqlcharset);
}
}
}
int matchMySQLcharset(int html_charset_length, char *html_charset, int html_match_length, char *html_lowercase_match, char *html_uppercase_match)
{
int match = 0;
int i=0;
for(;i<html_match_length;i++){
if(i > html_charset_length){
return 0;
}
if(html_charset[i] != 95 && html_charset[i] != 45 && html_lowercase_match[i] != 95 && html_lowercase_match[i] != 45){ // _ or -
if(html_lowercase_match[i] != html_charset[i] && html_uppercase_match[i] != html_charset[i]){
return 0;
}
}
match = 1;
}
return match;
}
int locateInWindow(char *window, char *birdLower, char *birdUpper, int length)
{
int start = window_len-length;
for(int i=0;i<length;i++){
if(window[start] != birdLower[i] && window[start] != birdUpper[i]){
return 0;
}
start++;
}
return 1;
}
int locateInURL(char *url, char *birdLower, char *birdUpper, int length, int urlSize)
{
long start = urlSize-length;
if(urlSize >= length){
for(int i=0;i<length;i++){
if(url[start] != birdLower[i] && window[start] != birdUpper[i]){
return 0;
}
start++;
}
return 1;
}else{
return 0;
}
}
//Check if url can be indexed (allow relative links for html and txt files. Removing this check will add to the queue everything listed including external links.
int canCrawl(int urlSize){
int numDots=0,numSlash=0;
int slashpos=0,dotspos=0;
int extfound=0,extlocation=0,prefixfound=0;
for(int i=0;i<urlSize;i++){
if(urlSize>5 && strURL[i]==':' && i>3){
if((strURL[0]!='h' && strURL[0]!='H') || (strURL[1]!='t' && strURL[1]!='T') || (strURL[2]!='t' && strURL[2]!='T') || (strURL[3]!='p' && strURL[3]!='P') || (strURL[4]!='s' && strURL[4]!='S' && strURL[4]!=':') || (strURL[5]!=':' && strURL[5]!='/'))
return 0;
prefixfound=1;
}
if(strURL[i]=='?' || strURL[i]=='\\'){
return 0;
}
if(strURL[i]=='.'){
numDots++;
}
if(strURL[i]=='/'){
numSlash++;
}
if(strURL[i]=='.' ){
extfound=1;
extlocation=i;
}
if(strURL[i]=='/' && extfound==1 && i>extlocation){
extfound=0;
}
if(prefixfound==1 && numSlash-2<=0){
extfound=0;
}
}
if(numDots == 0){
return 1;
}
//restrict file extensions to these
if(extfound==1 && (locateInURL(strURL,".html",".HTML",5,urlSize)==1 || locateInURL(strURL,".htm",".HTM",4,urlSize)==1 || locateInURL(strURL,".txt",".TXT",4,urlSize)==1 || locateInURL(strURL,".php",".PHP",4,urlSize)==1 || locateInURL(strURL,".asp",".ASP",4,urlSize)==1)){
return 1;
}
if(extfound==0 )
return 1;
return 0;
}

226
c/rs.c Executable file
View file

@ -0,0 +1,226 @@
//wiby refresh scheduler
#include <mysql.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
void finish_with_error(MYSQL *con)
{
fprintf(stderr, "%s\n", mysql_error(con));
mysql_close(con);
exit(1);
}
void help(){
printf("\nWiby Refresh Scheduler\n\nUsage: re Batch_Limit Total_Crawlers\n\nThe refresh scheduler finds pages that need to be refreshed and adds them to the indexqueue to be crawled. It will wait for the batch to complete before adding more.\n\nThere are two arguments you can set, the max number of pages to grab for each batch, and the total number of crawlers running.\n\nIf you set no arguments, it assumes you have one crawler running with an unassigned ID and will set a limit of one page per batch, rechecking if it finishes every 5 seconds. This slow paced default is fine for an index of 100k pages or so and will not use much CPU.\n\nIf you have two crawlers running and a batch limit of 100 pages, this is how you would run the scheduler:\n\n./re 100 2\n\nIn that example, each crawler will be assigned 50 pages. Once all 100 have been crawled, another batch will be assigned.\n\nYou can also specify only a batch limit and omit the total number of crawlers, it will then assume one crawler with an unassigned ID by default.\n\nIf you do not specify the number of crawlers, do not assign a number (ID) to the crawler that you have running and do not run more than one crawler.\n\nThe program will sleep for 60 seconds if there are no stale pages found.\n\n");
exit(0);
}
int isnum(char *source){
int sourcelength = strlen(source);
for(int i=0;i < sourcelength; i++){
if(source[i] < 48 || source[i] > 57){
return 0;
}
}
return 1;
}
int main(int argc, char **argv)
{
int wait_batch = 0,n_lim=1,num_cr=0,cr_count=1;
char lim[100] = "1";
if(argc == 3 && isnum(argv[2])==1 && isnum(argv[1])==1){
num_cr = atoi(argv[2]);
n_lim = atoi(argv[1]);
}else if(argc == 2 && isnum(argv[1])==1){
n_lim = atoi(argv[1]);
}else if(argc > 1){
help();
}
if(n_lim > 0 && argc > 1){
strcpy(lim,argv[1]);
}
while(1)
{
//allocates or initialises a MYSQL object
MYSQL *con = mysql_init(NULL);
if (con == NULL)
{
finish_with_error(con);
}
//establish a connection to the database. We provide connection handler, host name, user name and password parameters to the function. The other four parameters are the database name, port number, unix socket and finally the client flag
if (mysql_real_connect(con, "localhost", "crawler", "seekout", NULL, 0, NULL, 0) == NULL)
{
finish_with_error(con);
}
if (mysql_query(con, "use wiby"))
{
finish_with_error(con);
}
//check if indexqueue has rows from a previous batch sent by the scheduler (should not insert more until it's empty)
if (mysql_query(con, "SELECT id FROM indexqueue WHERE task = 1"))
{
finish_with_error(con);
}
//We get the result set using the mysql_store_result() function. The MYSQL_RES is a structure for holding a result set
MYSQL_RES *result = mysql_store_result(con);
if(result == NULL)
{
finish_with_error(con);
}
int num_rows = 0;
int re_rows = mysql_num_rows(result);
mysql_free_result(result);
if(re_rows > 0){
mysql_close(con);
if(wait_batch == 0){
printf("\nWaiting for batch to complete...\n\n");
}
wait_batch = 1;
}else{
wait_batch = 0;
char querywindex[1000];
memset(querywindex,0,1000);
strcpy(querywindex,"SELECT id,url,worksafe,approver,surprise,updatable,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules FROM windex WHERE (CASE WHEN updatable = 1 THEN updated < NOW() - INTERVAL 1 WEEK WHEN updatable = 2 THEN updated < NOW() - INTERVAL 1 DAY WHEN updatable = 3 THEN updated < NOW() - INTERVAL 12 HOUR WHEN updatable = 4 THEN updated < NOW() - INTERVAL 6 HOUR WHEN updatable = 5 THEN updated < NOW() - INTERVAL 3 HOUR WHEN updatable = 6 THEN updated < NOW() - INTERVAL 1 HOUR END) AND updatable != 0 AND enable = 1 LIMIT ");
strcat(querywindex,lim);
strcat(querywindex,";");
//printf("\n%s",querywindex);
//Get aging windex entries
if (mysql_query(con,querywindex))
{
finish_with_error(con);
}
result = mysql_store_result(con);
if(result == NULL)
{
finish_with_error(con);
}
//get the number of fields (columns) in the table
//int num_fields = mysql_num_fields(result);
num_rows = mysql_num_rows(result);
MYSQL_ROW row;
while(row = mysql_fetch_row(result)){
printf("----------------------------------------------------------\nRefresh:");
//Store data in first row into variables
char *id = row[0];
char *url = row[1];
char *worksafe = row[2];
char *approver = row[3];
char *surprise = row[4];
char *updatable = row[5];
char *crawl_tree = row[6];
char *crawl_family = row[7];
char *crawl_pages = row[8];
char *crawl_type = row[9];
char *crawl_repeat = row[10];
char *force_rules = row[11];
char str_cr_count[100];
memset(str_cr_count,0,100);
sprintf(str_cr_count,"%d",cr_count);
printf("\nURL: %s\nID: %s\nWorksafe: %s\nSurprise: %s\nApprover: %s\nUpdatable: %s", url, id, worksafe, surprise, approver, updatable);
if(num_cr > 0){
printf("\nCrawler ID: %d",cr_count);
}else{
printf("\nCrawler ID: (null)");
}
char sqlqueryinsertindexqueue[2000];
memset(sqlqueryinsertindexqueue,0,2000);
if(num_cr == 0){
strcpy(sqlqueryinsertindexqueue,"INSERT INTO indexqueue (url,worksafe,approver,surprise,updatable,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,task) VALUES ('");
}else{
strcpy(sqlqueryinsertindexqueue,"INSERT INTO indexqueue (url,worksafe,approver,surprise,updatable,crawl_tree,crawl_family,crawl_pages,crawl_type,crawl_repeat,force_rules,task,crawler_id) VALUES ('");
}
strcat(sqlqueryinsertindexqueue,url);strcat(sqlqueryinsertindexqueue,"','");
strcat(sqlqueryinsertindexqueue,worksafe);strcat(sqlqueryinsertindexqueue,"','");
strcat(sqlqueryinsertindexqueue,approver);strcat(sqlqueryinsertindexqueue,"','");
strcat(sqlqueryinsertindexqueue,surprise);strcat(sqlqueryinsertindexqueue,"','");
strcat(sqlqueryinsertindexqueue,updatable);strcat(sqlqueryinsertindexqueue,"',");
if(crawl_tree != NULL){
strcat(sqlqueryinsertindexqueue,"'");strcat(sqlqueryinsertindexqueue,crawl_tree);strcat(sqlqueryinsertindexqueue,"',");
}else{
strcat(sqlqueryinsertindexqueue,"NULL");strcat(sqlqueryinsertindexqueue,",");
}
if(crawl_family != NULL){
strcat(sqlqueryinsertindexqueue,"'");strcat(sqlqueryinsertindexqueue,crawl_family);strcat(sqlqueryinsertindexqueue,"','");
}else{
strcat(sqlqueryinsertindexqueue,"NULL");strcat(sqlqueryinsertindexqueue,",'");
}
if(crawl_pages != NULL){
strcat(sqlqueryinsertindexqueue,crawl_pages);strcat(sqlqueryinsertindexqueue,"','");
}else{
strcat(sqlqueryinsertindexqueue,"0");strcat(sqlqueryinsertindexqueue,"','");
}
if(crawl_type != NULL){
strcat(sqlqueryinsertindexqueue,crawl_type);strcat(sqlqueryinsertindexqueue,"','");
}else{
strcat(sqlqueryinsertindexqueue,"0");strcat(sqlqueryinsertindexqueue,"','");
}
if(crawl_repeat != NULL){
strcat(sqlqueryinsertindexqueue,crawl_repeat);strcat(sqlqueryinsertindexqueue,"','");
}else{
strcat(sqlqueryinsertindexqueue,"0");strcat(sqlqueryinsertindexqueue,"','");
}
if(force_rules != NULL){
strcat(sqlqueryinsertindexqueue,force_rules);strcat(sqlqueryinsertindexqueue,"','1");
}else{
strcat(sqlqueryinsertindexqueue,"0");strcat(sqlqueryinsertindexqueue,"','1");
}
if(num_cr > 0){
strcat(sqlqueryinsertindexqueue,"','");strcat(sqlqueryinsertindexqueue,str_cr_count);
}
strcat(sqlqueryinsertindexqueue,"');");
printf("\nInserting into indexqueue...\n");
if(mysql_query(con,sqlqueryinsertindexqueue))
{
finish_with_error(con);
}
//Assign to crawlers in round robin fashion if user indicated more than one crawler.
if(cr_count < num_cr && num_cr > 0){
cr_count++;
}else if(num_cr > 0){
cr_count=1;
}
}
//cleanup sql stuff
mysql_free_result(result);
mysql_close(con);
if(num_rows > 0){
printf("\nAwaiting next set of pages...\n\n");
}
}
sleep(5);//sleep 5 seconds
if(num_rows==0 && re_rows == 0)//sleep if no rows were found
sleep(60);//sleep 60 seconds
}
exit(0);
}

306
c/rt.c Executable file
View file

@ -0,0 +1,306 @@
//Wiby slave replication server tracker
//Admin creates file 'servers.csv' containing only IP and database name, one per line
//Tracker will check status of slave databases by attempting to connect to all listed every few seconds
//Tracker will create a copy of this file called 'res.csv' and display only the confirmed online servers
//as well as ID ranges divided across all servers so each has the same number of rows.
#include <mysql.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/time.h>
FILE *servers;
FILE *error;
FILE *res;
int c,d;
char ip[1000][100];
char db[1000][100];
char ipOK[1000][100];
char dbOK[1000][100];
char startID[1000][100];
char endID[1000][100];
char firstOnlineServerIP[100];
char firstOnlineServerDB[100];
char *resfiletext;
char totalRows[50];
char lastID[50];
char strSQL[200];
struct timeval stop, start;
void handle_error(MYSQL *con)
{
error = fopen("rtlog", "a");
printf("%s\n", mysql_error(con));
fprintf(error, "%s\n", mysql_error(con));
fclose(error);
mysql_close(con);
}
int main(int argc, char **argv)
{
int timetest=0,reportinit=0,running=0;
printf("\nStarting Replication Tracker:\n\nConnection Latency\n--------------------------------\n");
while(1)
{
long bytecount=0;
int serverCount=0, onlineServers=0, i=0, ipcnt=0, dbcnt=0, errcount=0, foundfirst=0,timeout=5,ignore = 0;
int ipORdb = 0; //0 = ip, 1 = space
servers = fopen("servers.csv", "rb");
if (servers==NULL)
{
printf("Error opening 'servers.csv' file.\n");
exit(0);
}
//parse server list
while((c = fgetc(servers)) != EOF)
{
if(c == 35)//check if line is commented out (#)
ignore = 1;
if(c != 10 && c != 13 && c != 32 && c != 44 && ipORdb == 0 && ignore == 0){//if no cr/lf, commas, spaces, or comments, gather ip
ip[serverCount][i] = c;
ipcnt++;
}
if(c==44 && ignore == 0){//if comma detected, switch to gather db name
ipORdb = 1;
i = -1;
}
if(c != 10 && c != 13 && c != 32 && c != 44 && ipORdb == 1 && ignore == 0){//if no cr/lf, commas, spaces, or comments, gather db
db[serverCount][i] = c;
dbcnt++;
}
if(c == 10){//count replication slaves
ipORdb = 0;
ip[serverCount][ipcnt] = 0;//null terminate string
db[serverCount][dbcnt] = 0;
if(ipcnt && dbcnt > 0)
serverCount++;
ipcnt = dbcnt = 0;
i = -1;
ignore = 0;
}
if(c != 13){
i++;
bytecount++;
}
d=c;
}
if(i>0 && d != 10)
serverCount++;
fclose(servers);
//Allocate bytes for the res file text
// resfiletext = (char*)calloc(bytecount+1000+(i*50),sizeof(char));
char resfiletext[10000];
memset(resfiletext,0,10000);
//conect to each listed server and verify it works
for (i=0;i<serverCount;i++){
int err = 0;
MYSQL *con = mysql_init(NULL);
if (con == NULL)
{
handle_error(con);
exit(0);
}
mysql_options(con,MYSQL_OPT_CONNECT_TIMEOUT,&timeout);
if(timetest==0){
gettimeofday(&start, NULL);
}
if (mysql_real_connect(con, ip[i], "remote_guest", "d0gemuchw0w", db[i], 0, NULL, 0) == NULL)
{
handle_error(con);
err=1;
}
if(timetest==0){
gettimeofday(&stop, NULL);
printf("%s %s | %lums", ip[i], db[i], ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec - start.tv_usec)/1000);
if(err==1)
printf(" (Fail)");
printf("\n");
}
if(err==0){//append successful connection info to res string
strcpy(ipOK[onlineServers],ip[i]);
strcpy(dbOK[onlineServers],db[i]);
onlineServers++;
mysql_close(con);
}
}
timetest=1;
//get more database info needed for distributed queries
//--------------------------------------------------------------------------------------------------------------------
// connect to first available slave server and get info needed for all available slaves to handle a distributed query
int initialinfo = 0, nRows=0;
for (i=0;i<onlineServers;i++){
int err = 0, startIDint=0;
long long int numrows=0;
MYSQL *con = mysql_init(NULL);
if (con == NULL)
{
handle_error(con);
exit(0);
}
mysql_options(con,MYSQL_OPT_CONNECT_TIMEOUT,&timeout);
if (mysql_real_connect(con, ipOK[0], "remote_guest", "d0gemuchw0w", dbOK[0], 0, NULL, 0) == NULL) //connect to the same server each iteration
{
handle_error(con);
err=1;
}
if(err==0){
if(i==0){//get initial info
//Get total number of rows
if (mysql_query(con, "SELECT COUNT(id) FROM windex;"))
{
handle_error(con);
}
MYSQL_RES *result = mysql_store_result(con);
if(result == NULL)
{
handle_error(con);
exit(0);
}
MYSQL_ROW row = mysql_fetch_row(result);
nRows = atoi(row[0]);
//free old result data or else you'll get a memory leak
mysql_free_result(result);
//Get the last row id number
if (mysql_query(con, "SELECT id FROM windex ORDER BY id DESC LIMIT 1;"))
{
handle_error(con);
}
result = mysql_store_result(con);
if(result == NULL)
{
handle_error(con);
exit(0);
}
row = mysql_fetch_row(result);
memset(lastID, 0, 50);
strcpy(lastID,row[0]);
//free old result data or else you'll get a memory leak
mysql_free_result(result);
if(reportinit==0)
printf("\nCurrent ID Ranges (Rows: %d)\n--------------------------------",nRows);
}
//Get id of last row of the % of the db you want to search (depending on # of slaves)
numrows = (nRows / onlineServers * i) + (nRows / onlineServers) - 1;
//printf("\n%lld",numrows);fflush(stdout);
sprintf(totalRows, "%lld", numrows);//convert int to string
strcpy(strSQL,"SELECT id FROM windex ORDER BY id LIMIT ");
strcat(strSQL,totalRows);
strcat(strSQL,",1;");
//SELECT id FROM windex ORDER BY id LIMIT n-1,1;
if (mysql_query(con, strSQL))
{
handle_error(con);
}
MYSQL_RES *result2 = mysql_store_result(con);
if(result2 == NULL)
{
handle_error(con);
exit(0);
}
MYSQL_ROW row = mysql_fetch_row(result2);
//store endID and startID
if(i+1 != onlineServers)
strcpy(endID[i],row[0]);
else
strcpy(endID[i],lastID);
//strcpy(endID[i],row[0]);
if(i==0){
strcpy(startID[i],"0");
}else{
startIDint = atoi(endID[i-1])+1;
sprintf(startID[i], "%d", startIDint);
}
if(reportinit==0){
printf("\n%s %s | %s %s",ipOK[i],dbOK[i],startID[i],endID[i]);
if(i+1 == onlineServers)
printf("\n\n");
fflush(stdout);
}
//free old result data or else you'll get a memory leak
mysql_free_result(result2);
mysql_close(con);
//update res file
if(i>0)
strcat(resfiletext,"\n");
strcat(resfiletext,ipOK[i]);
strcat(resfiletext,",");
strcat(resfiletext,dbOK[i]);
strcat(resfiletext,",");
strcat(resfiletext,startID[i]);
strcat(resfiletext,",");
strcat(resfiletext,endID[i]);
}
}
//--------------------------------------------------------------------------------------------------------------------
//get resfiletext length
long resfiletextlen = strlen(resfiletext);
res = fopen("res.csv","rb");
if (res==NULL)
{
printf("Error opening 'res.csv' file. Will create a new one.\n");
res = fopen("res","w+");
if (res==NULL)
{
printf("Error creating 'res.csv' file.\n");
exit(0);
}
}
//Get file size
fseek(res, 0L, SEEK_END);
bytecount = ftell(res);
rewind(res);
//check if res file is different from resfiletext string.
i=0;
int changed=0;
if(bytecount == resfiletextlen){
while((c = fgetc(res)) != EOF)
{
if(c != resfiletext[i]){
changed = 1;
}
i++;
}
fclose(res);
}else{
changed = 1;
}
reportinit = 1;
//store available servers in res file
if(changed == 1){
res = fopen("res.csv", "w");
fprintf(res, "%s", resfiletext);
fclose(res);
reportinit = 0;
}
if(running == 0){
printf("Running\n");
fflush(stdout);
running = 1;
}
//fflush(stdout);
//free(resfiletext);
sleep(5);
}
}

4
c/servers_example.csv Executable file
View file

@ -0,0 +1,4 @@
192.168.0.101,wiby
192.168.0.102,wiby
192.168.0.103,wiby
192.168.0.104,wiby
1 192.168.0.101 wiby
2 192.168.0.102 wiby
3 192.168.0.103 wiby
4 192.168.0.104 wiby

296
c/urlparse.h Executable file
View file

@ -0,0 +1,296 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//char url[] = "index.htm\0";
char urlcopy[1000];
char domain[1000];
char tldlist[] = "co.uk,co.jp\0";
char buffer[1000];
char rootdomain[1000];
char urlPath[1000];
char folderPath[1000];
char urlnopathnoprefix_fromlist[1000];
char urlnoprefix_fromlist[10000];
char prefix_fromlist[14];
int prefixsize_fromlist=0;
int checkDomain(char *domain, char *substrLower, char *substrUpper, int domainLen, int substrLen);
void urlparse(char* url){
//int main(int argc, char *argv[]) {
int foundDot=0,foundDotInPath=0,foundSlash=0,foundColon=0,slashPos=0,lastSlashPos=0,folderPathLength=0,isFile=0,pathlen=0;
int rootdomaincount=0;
int isIPv4=1,isIPv6=1;
memset(buffer,0,1000);
memset(urlcopy,0,1000);
memset(domain,0,1000);
memset(rootdomain,0,1000);
memset(urlPath,0,1000);
memset(folderPath,0,1000);
memset(urlnoprefix_fromlist,0,1000);
memset(urlnopathnoprefix_fromlist,0,1000);
//find out if its http or https or http://www. or https://www.
int httpwww=0, httpswww=0, http=0, https=0;
//char prefix[12];
memset(prefix_fromlist,0,14);
strcpy(prefix_fromlist,"http");
int urlsize = strlen(url);
if(urlsize<998){
//copy url (variable from crawler)
strcpy(urlcopy,url);
//truncate any "index.html" files and just use the directory path
if(urlsize == 10){
if(checkDomain(urlcopy,"index.html","INDEX.HTML",urlsize,10)==1){
urlcopy[0]=0;
urlsize=0;
}
}else if(urlsize == 9){
if(checkDomain(urlcopy,"index.htm","INDEX.HTM",urlsize,9)==1){
urlcopy[0]=0;
urlsize=0;
}
}
if(urlsize > 10){
if(checkDomain(urlcopy,"/index.html","/INDEX.HTML",urlsize,11)==1){
urlcopy[urlsize-10]=0;
urlsize-=10;
}
}
if(urlsize > 9){
if(checkDomain(urlcopy,"/index.htm","/INDEX.HTM",urlsize,10)==1){
urlcopy[urlsize-9]=0;
urlsize-=9;
}
}
if(urlsize > 4){
if(url[4]==':' && (url[3]=='p' || url[3]=='P'))
http = 7;
}
if(urlsize > 5){
if(url[5]==':' && (url[4]=='s' || url[4]=='S'))
https = 8;
}
if(urlsize > 11){
if((url[7]=='w' || url[7]=='W') && (url[8]=='w' || url[8]=='W') && ((url[9]=='w' || url[9]=='W') || url[9]=='1' || url[9]=='2' || url[9]=='3') && url[10]=='.' ){
httpwww = 11;
http = https = 0;
}
if(url[7]=='/' && (url[8]=='w' || url[8]=='W') && (url[9]=='w' || url[9]=='W') && ((url[9]=='w' || url[9]=='W') || url[9]=='1' || url[9]=='2' || url[9]=='3') && url[11]=='.' ){
httpswww = 12;
http = https = 0;
}
}
//set the prefix
if(http > 0) strcat(prefix_fromlist,"://");
else if(https > 0) strcat(prefix_fromlist,"s://");
else if(httpwww > 0) strcat(prefix_fromlist,"://www.");
else if(httpswww > 0) strcat(prefix_fromlist,"s://www.");
int prefixsize_fromlist = httpswww+httpwww+https+http;
//char urlnoprefix[urlsize-prefixsize+1];
//memset(urlnoprefix,0,urlsize-prefixsize+1);
int urlcount=0,urlnoprefixcount=0,urlnopathnoprefix_done=0,urlnopathnoprefix_len=0;
//if no prefix, see if it might be a domain
int noprebutisdomain=0;
if(prefixsize_fromlist==0){
memset(prefix_fromlist,0,14);
while(urlcount < urlsize+1)
{
if(urlcopy[urlcount]=='.' && urlcount>0)
{
noprebutisdomain=1;
break;
}
if(urlcopy[urlcount]=='/')
{
noprebutisdomain=0;
break;
}
urlcount++;
}
}
//store the url without prefix to urlnoprefix
urlcount=0;
if(prefixsize_fromlist!=0 || noprebutisdomain==1){
while(urlcount < urlsize)
{
if(urlcount>prefixsize_fromlist-1)
{
urlnoprefix_fromlist[urlnoprefixcount]=urlcopy[urlcount];
//get urlnopath
if(urlcopy[urlcount] != '/' && urlnopathnoprefix_done==0){
urlnopathnoprefix_fromlist[urlnoprefixcount]=urlcopy[urlcount];
urlnopathnoprefix_len++;
}else{
urlnopathnoprefix_done=1;
}
urlnoprefixcount++;
}
urlcount++;
}
}
//check for file extension like html/htm/txt if no prefix in url
if(noprebutisdomain==1 && urlsize>4){
if(checkDomain(urlnopathnoprefix_fromlist,".html",".HTML",urlnopathnoprefix_len,5)==1 || checkDomain(urlnopathnoprefix_fromlist,".htm",".HTM",urlnopathnoprefix_len,4)==1 || checkDomain(urlnopathnoprefix_fromlist,".txt",".txt",urlnopathnoprefix_len,4)==1){
memset(domain,0,1000);
memset(urlnoprefix_fromlist,0,1000);
memset(urlnopathnoprefix_fromlist,0,1000);
urlnoprefixcount=0;
}
}
//get domain name
int lenurl=strlen(urlnoprefix_fromlist);
int numDots=0;
int i=0;
for(i;i<lenurl;i++){
//to get folder path, locate final slash position
if(urlnoprefix_fromlist[i]=='/')
lastSlashPos=i;
//Null terminate hostname at first slash
if(urlnoprefix_fromlist[i]!='/')
domain[i]=urlnoprefix_fromlist[i];
if(urlnoprefix_fromlist[i]=='.' && foundSlash==0)
numDots++;
//get path after hostname
if(urlnoprefix_fromlist[i]=='/' && foundSlash==0){
foundSlash=1;
slashPos=i-1;
pathlen++;
}
if(foundSlash==1){
urlPath[i-slashPos-1]=urlnoprefix_fromlist[i];
pathlen++;
if(urlnoprefix_fromlist[i]=='.')
foundDotInPath=1;
}
if(urlnoprefix_fromlist[i]==':')
foundColon=1;
//Check if hostname is an IPv4 address
if(((urlnoprefix_fromlist[i]<48 && urlnoprefix_fromlist[i] != '.') || (urlnoprefix_fromlist[i]>57)) && foundSlash==0)
isIPv4=0;
//Check if hostname is an IPv6 address
if(((urlnoprefix_fromlist[i]<48 && urlnoprefix_fromlist[i] > 57) || (urlnoprefix_fromlist[i]<65 && urlnoprefix_fromlist[i]>70) || (urlnoprefix_fromlist[i]<97 && urlnoprefix_fromlist[i]>102)) && foundSlash==0)
isIPv6=0;
}
if(foundColon==0)
isIPv6=0;
if(isIPv6==1)//if ipv6, force it into working
numDots=1;
if(foundDotInPath==0 && pathlen>1){
//urlPath[pathlen-1]='/';
//pathlen++;
//urlnoprefix[lenurl]='/';
//lenurl++;
lastSlashPos=lenurl;
}
//get folder path
folderPathLength=lastSlashPos-slashPos;
for(i=0;i<folderPathLength;i++){
folderPath[i]=urlnoprefix_fromlist[i+slashPos+1];
}
if(numDots==0 && isIPv6==0){
memset(urlPath,0,1000);
memset(folderPath,0,1000);
strcpy(urlPath,urlnoprefix_fromlist);
strcpy(folderPath,urlnoprefix_fromlist);
}
if(folderPathLength>2 && folderPath[i-2] != 0 && folderPath[i-2] != '/')
folderPath[i-1]='/';
if(urlPath[0]==0)
urlPath[0]='/';
if(folderPath[0]==0)
folderPath[0]='/';
int lendomain=strlen(domain);
//get tld
int lentldlist=strlen(tldlist);
int foundDoubleDotTLD=0, k=0, dotcount=0, firstSlash=0;
for(i=0;i<=lentldlist;i++){
if(tldlist[i] != ',' && tldlist[i] != 0){
buffer[k]=tldlist[i];
k++;
}else if(foundDoubleDotTLD==0 && (tldlist[i] == ',' || tldlist[i] == 0)){
if(strstr(urlnoprefix_fromlist,buffer)!=NULL)
foundDoubleDotTLD=1;
if(numDots <=2 && foundDoubleDotTLD==1)
strcpy(rootdomain,domain);
if(numDots > 2 && foundDoubleDotTLD==1){
int j=0;
for(j;j<lenurl;j++){
if(foundDot==1){
if(urlnoprefix_fromlist[j]=='/')
firstSlash=1;
if(firstSlash==0){
rootdomain[rootdomaincount]=urlnoprefix_fromlist[j];
rootdomaincount++;
}
}
if(urlnoprefix_fromlist[j]=='.')
foundDot=1;
}
}
if (tldlist[i] == ','){
memset(buffer,0,1000);
k=0;
}
}else if(foundDoubleDotTLD==1){
break;
}
}
if(foundDoubleDotTLD==0){
foundDot=rootdomaincount=0;
if(numDots==1){
strcpy(rootdomain,domain);
}else if(numDots>1){
//skip text before first dot
for(i=0;i<lendomain;i++){
if(foundDot==1 || isIPv4==1){
rootdomain[rootdomaincount]=domain[i];
rootdomaincount++;
}
if(domain[i]=='.')
foundDot=1;
}
}
}
// printf("\nURL: %s\nHostname: %s\nPath: %s\nURL nopathnopre: %s\nFolder Path: %s\nURL_noprefix: %s\nPrefix: %s\nPrefix Size: %d",url,rootdomain,urlPath,urlnopathnoprefix_fromlist,folderPath,urlnoprefix_fromlist,prefix_fromlist,prefixsize_fromlist);
}
// return 0;
}
int checkDomain(char *domain, char *substrLower, char *substrUpper, int domainLen, int substrLen){
int j=0;
for(int i=domainLen-substrLen;i<domainLen;i++){
if(domain[i]!=substrLower[j] && domain[i]!=substrUpper[j]){
return 0;
}
j++;
}
return 1;
}

214
db/wiby.sql Executable file
View file

@ -0,0 +1,214 @@
-- MySQL dump 10.13 Distrib 8.0.18, for Linux (x86_64)
--
-- Host: localhost Database: wiby
-- ------------------------------------------------------
-- Server version 8.0.18
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!50503 SET NAMES utf8mb4 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
--
-- Table structure for table `accounts`
--
DROP TABLE IF EXISTS `accounts`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `accounts` (
`name` varchar(50) NOT NULL,
`hash` text,
`level` text,
`attempts` int(11) DEFAULT '0',
`updated` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`name`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Dumping data for table `accounts`
--
LOCK TABLES `accounts` WRITE;
/*!40000 ALTER TABLE `accounts` DISABLE KEYS */;
/*!40000 ALTER TABLE `accounts` ENABLE KEYS */;
UNLOCK TABLES;
--
-- Table structure for table `feedback`
--
DROP TABLE IF EXISTS `feedback`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `feedback` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
`time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Dumping data for table `feedback`
--
LOCK TABLES `feedback` WRITE;
/*!40000 ALTER TABLE `feedback` DISABLE KEYS */;
/*!40000 ALTER TABLE `feedback` ENABLE KEYS */;
UNLOCK TABLES;
--
-- Table structure for table `graveyard`
--
DROP TABLE IF EXISTS `graveyard`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `graveyard` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`url` text,
`worksafe` tinyint(1) DEFAULT NULL,
`reserved` text,
`reservetime` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Dumping data for table `graveyard`
--
LOCK TABLES `graveyard` WRITE;
/*!40000 ALTER TABLE `graveyard` DISABLE KEYS */;
/*!40000 ALTER TABLE `graveyard` ENABLE KEYS */;
UNLOCK TABLES;
--
-- Table structure for table `indexqueue`
--
DROP TABLE IF EXISTS `indexqueue`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `indexqueue` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`url` text CHARACTER SET latin1 COLLATE latin1_swedish_ci,
`worksafe` tinyint(1) DEFAULT NULL,
`approver` text CHARACTER SET latin1 COLLATE latin1_swedish_ci,
`surprise` tinyint(1) DEFAULT NULL,
`updatable` int(11) DEFAULT '1',
`task` tinyint(4) DEFAULT NULL,
`crawl_tree` text,
`crawl_family` text,
`crawl_depth` int(11) DEFAULT NULL,
`crawl_pages` int(11) DEFAULT NULL,
`crawl_type` int(11) DEFAULT NULL,
`crawl_repeat` tinyint(4) DEFAULT NULL,
`force_rules` tinyint(1) DEFAULT NULL,
`crawler_id` int(11) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Dumping data for table `indexqueue`
--
LOCK TABLES `indexqueue` WRITE;
/*!40000 ALTER TABLE `indexqueue` DISABLE KEYS */;
/*!40000 ALTER TABLE `indexqueue` ENABLE KEYS */;
UNLOCK TABLES;
--
-- Table structure for table `reviewqueue`
--
DROP TABLE IF EXISTS `reviewqueue`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `reviewqueue` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`url` text,
`worksafe` tinyint(1) DEFAULT NULL,
`reserved` text,
`reservetime` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`time` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Dumping data for table `reviewqueue`
--
LOCK TABLES `reviewqueue` WRITE;
/*!40000 ALTER TABLE `reviewqueue` DISABLE KEYS */;
/*!40000 ALTER TABLE `reviewqueue` ENABLE KEYS */;
UNLOCK TABLES;
--
-- Table structure for table `windex`
--
DROP TABLE IF EXISTS `windex`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `windex` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`url` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci,
`url_noprefix` text,
`title` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci,
`tags` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci,
`description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci,
`body` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci,
`language` text,
`surprise` tinyint(1) DEFAULT NULL,
`http` tinyint(1) DEFAULT NULL,
`updatable` int(11) DEFAULT '1',
`worksafe` tinyint(1) DEFAULT NULL,
`crawl_tree` text,
`crawl_family` text,
`crawl_pages` int(11) DEFAULT NULL,
`crawl_type` int(11) DEFAULT NULL,
`crawl_repeat` tinyint(1) DEFAULT NULL,
`force_rules` tinyint(1) DEFAULT NULL,
`enable` tinyint(1) DEFAULT NULL,
`date` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`updated` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`approver` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci,
`fault` tinyint(1) DEFAULT '0',
PRIMARY KEY (`id`),
FULLTEXT KEY `main` (`tags`,`title`,`body`,`description`,`url`),
FULLTEXT KEY `title` (`title`),
FULLTEXT KEY `url` (`url`),
FULLTEXT KEY `url_noprefix` (`url_noprefix`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Dumping data for table `windex`
--
LOCK TABLES `windex` WRITE;
/*!40000 ALTER TABLE `windex` DISABLE KEYS */;
/*!40000 ALTER TABLE `windex` ENABLE KEYS */;
UNLOCK TABLES;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-- Dump completed on 2022-07-05 1:23:45

99
db/wibytemp.sql Executable file
View file

@ -0,0 +1,99 @@
-- MySQL dump 10.13 Distrib 8.0.18, for Linux (x86_64)
--
-- Host: localhost Database: wibytemp
-- ------------------------------------------------------
-- Server version 8.0.18
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!50503 SET NAMES utf8mb4 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
--
-- Table structure for table `rejected`
--
DROP TABLE IF EXISTS `rejected`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `rejected` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`url` text,
`user` text,
`date` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Dumping data for table `rejected`
--
LOCK TABLES `rejected` WRITE;
/*!40000 ALTER TABLE `rejected` DISABLE KEYS */;
/*!40000 ALTER TABLE `rejected` ENABLE KEYS */;
UNLOCK TABLES;
--
-- Table structure for table `reserve_id`
--
DROP TABLE IF EXISTS `reserve_id`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `reserve_id` (
`id` bigint(20) NOT NULL,
`time` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Dumping data for table `reserve_id`
--
LOCK TABLES `reserve_id` WRITE;
/*!40000 ALTER TABLE `reserve_id` DISABLE KEYS */;
/*!40000 ALTER TABLE `reserve_id` ENABLE KEYS */;
UNLOCK TABLES;
--
-- Table structure for table `titlecheck`
--
DROP TABLE IF EXISTS `titlecheck`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `titlecheck` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`url` text,
`title` text,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Dumping data for table `titlecheck`
--
LOCK TABLES `titlecheck` WRITE;
/*!40000 ALTER TABLE `titlecheck` DISABLE KEYS */;
/*!40000 ALTER TABLE `titlecheck` ENABLE KEYS */;
UNLOCK TABLES;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-- Dump completed on 2022-07-05 1:23:55

View file

@ -0,0 +1,270 @@
##
# You should look at the following URL's in order to grasp a solid understanding
# of Nginx configuration files in order to fully unleash the power of Nginx.
# https://www.nginx.com/resources/wiki/start/
# https://www.nginx.com/resources/wiki/start/topics/tutorials/config_pitfalls/
# https://wiki.debian.org/Nginx/DirectoryStructure
#
# In most cases, administrators will remove this file from sites-enabled/ and
# leave it as reference inside of sites-available where it will continue to be
# updated by the nginx packaging team.
#
# This file will automatically load configuration files provided by other
# applications, such as Drupal or Wordpress. These applications will be made
# available underneath a path with that package name, such as /drupal8.
#
# Please see /usr/share/doc/nginx-doc/examples/ for more detailed examples.
##
# Default server configuration
#
fastcgi_cache_path /etc/nginx/phpcache levels=1:2 max_size=1g keys_zone=MYAPP:100m inactive=5m;
fastcgi_cache_key "$scheme$request_method$host$request_uri";
proxy_cache_path /etc/nginx/cache levels=1:2 keys_zone=main_cache:100m max_size=1g inactive=5m;
proxy_cache_key "$scheme$request_method$host$request_uri$cookie_ws";
#server { #redirect http to https
# listen 80 default_server;
# listen [::]:80 default_server ipv6only=on;
# server_name wiby.me;
# return 301 https://$host$request_uri;
#}
upstream remote_core {
# server 10.8.0.101:8080;
# server 10.8.0.102:8080;
# server 10.8.0.103:8080;
# server 10.8.0.104:8080;
# server 127.0.0.1:8080 backup;
server 127.0.0.1:8080;
}
server { #handles http requests. Allows for legacy browsers or else redirects to https
listen 80 default_server;
# listen [::]:80 default_server ipv6only=off; #this prevented nginx from starting on my vps, said port was in use
server_name wiby.me www.wiby.me;
if ( $http_user_agent ~ (Chrome)) { #redirect to https for old chrome devices
return 301 https://$host$request_uri;
}
root /var/www/html;
# Add index.php to the list if you are using PHP
index index.php index.html index.htm;
#comment all "core app" location entries to revert wiby search to php
location = / { #core app
proxy_cache main_cache;
proxy_cache_valid 5m;
proxy_cache_bypass $no_cache;
proxy_no_cache $no_cache;
try_files $uri $uri/ =404;
proxy_set_header X-Real-IP $remote_addr;
#proxy_pass http://127.0.0.1:8080/;
proxy_pass http://remote_core/;
}
location /settings/ { #core app
try_files $uri $uri/ =404;
proxy_set_header X-Real-IP $remote_addr;
proxy_pass http://127.0.0.1:8080/settings/;
#proxy_pass http://remote_core/settings/;
}
location = /json/ { #core app
proxy_cache main_cache;
proxy_cache_valid 5m;
proxy_cache_bypass $no_cache;
proxy_no_cache $no_cache;
try_files $uri $uri/ =404;
proxy_set_header X-Real-IP $remote_addr;
#proxy_pass http://127.0.0.1:8080/json/;
proxy_pass http://remote_core/json/;
}
location = /surprise/ { #core app
# try_files $uri $uri/ =404;
proxy_set_header X-Real-IP $remote_addr;
proxy_pass http://127.0.0.1:8080/surprise/;
#proxy_pass http://remote_core/surprise/;
}
location / {
# First attempt to serve request as file, then
# as directory, then fall back to displaying a 404.
try_files $uri $uri/ =404;
}
# pass PHP scripts to FastCGI server
#
location ~ \.php$ {
# try_files $url = 404;
fastcgi_split_path_info ^(.+\.php)(/.+)$;
include snippets/fastcgi-php.conf;
include fastcgi_params;
#
# # With php-fpm (or other unix sockets):
fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
fastcgi_pass unix:/var/run/php/php7.4-fpm.sock;
# fastcgi_index index.php;
# # With php-cgi (or other tcp sockets):
# fastcgi_pass 127.0.0.1:9000;
# #for microcaching
fastcgi_cache MYAPP;
fastcgi_cache_valid 5m;
fastcgi_cache_bypass $no_cache;
fastcgi_no_cache $no_cache;
}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
# Don't cache the following URLs
if ($request_uri ~* "/(review/|index.php)") { set $no_cache 1; }
if ($request_uri ~* "/(review/|login.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(review/|inndexqueue.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(review/|review.php)") { set $no_cache 1; }
if ($request_uri ~* "/(review/|error.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|index.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|login.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|form.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|insert.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|error.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|insert.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(surprise/|index.php)") { set $no_cache 1; }
if ($request_uri ~* "/(submit/|index.php)") { set $no_cache 1; }
if ($request_uri ~* "/(submit/|form.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(submit/|submit.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(submit/|error.html.php)") { set $no_cache 1; }
}
server {
#listen 80 default_server; #comment this out if you dont want http
#listen [::]:80 default_server; #comment this out if you dont want http
# SSL configuration
#
listen 443 ssl default_server;
#listen [::]:443 ssl default_server; #nginx wasnt starting on my vps with this
ssl_certificate /etc/nginx/ssl/YOUR_SSL_CERT.crt;
ssl_certificate_key /etc/nginx/ssl/YOUR_SSL_KEY.key;
#
# Note: You should disable gzip for SSL traffic.
# See: https://bugs.debian.org/773332
#
# Read up on ssl_ciphers to ensure a secure configuration.
# See: https://bugs.debian.org/765782
#
# Self signed certs generated by the ssl-cert package
# Don't use them in a production server!
#
# include snippets/snakeoil.conf;
root /var/www/html;
# Add index.php to the list if you are using PHP
index index.php index.html index.htm;
server_name wiby.me www.wiby.me;
#comment all "core app" location entries to revert wiby search to php
location = / { #core app
proxy_cache main_cache;
proxy_cache_valid 5m;
proxy_cache_bypass $no_cache;
proxy_no_cache $no_cache;
try_files $uri $uri/ =404;
proxy_set_header X-Real-IP $remote_addr;
#proxy_pass http://127.0.0.1:8080/;
proxy_pass http://remote_core/;
}
location /settings/ { #core app
try_files $uri $uri/ =404;
proxy_set_header X-Real-IP $remote_addr;
proxy_pass http://127.0.0.1:8080/settings/;
#proxy_pass http://remote_core/settings/;
}
location = /json/ { #core app
proxy_cache main_cache;
proxy_cache_valid 5m;
proxy_cache_bypass $no_cache;
proxy_no_cache $no_cache;
try_files $uri $uri/ =404;
proxy_set_header X-Real-IP $remote_addr;
#proxy_pass http://127.0.0.1:8080/json/;
proxy_pass http://remote_core/json/;
}
location = /surprise/ { #core app
# try_files $uri $uri/ =404;
proxy_set_header X-Real-IP $remote_addr;
proxy_pass http://127.0.0.1:8080/surprise/;
#proxy_pass http://remote_core/surprise/;
}
location / {
# First attempt to serve request as file, then
# as directory, then fall back to displaying a 404.
try_files $uri $uri/ =404;
}
# pass PHP scripts to FastCGI server
#
location ~ \.php$ {
# try_files $url = 404;
fastcgi_split_path_info ^(.+\.php)(/.+)$;
include snippets/fastcgi-php.conf;
include fastcgi_params;
#
# # With php-fpm (or other unix sockets):
fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
fastcgi_pass unix:/var/run/php/php7.4-fpm.sock;
# fastcgi_index index.php;
# # With php-cgi (or other tcp sockets):
# fastcgi_pass 127.0.0.1:9000;
# #for microcaching
fastcgi_cache MYAPP;
fastcgi_cache_valid 5m;
fastcgi_cache_bypass $no_cache;
fastcgi_no_cache $no_cache;
}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
# Don't cache the following URLs
if ($request_uri ~* "/(review/|index.php)") { set $no_cache 1; }
if ($request_uri ~* "/(review/|login.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(review/|inndexqueue.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(review/|review.php)") { set $no_cache 1; }
if ($request_uri ~* "/(review/|error.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|index.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|login.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|form.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|insert.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|error.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(insert/|insert.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(surprise/|index.php)") { set $no_cache 1; }
if ($request_uri ~* "/(submit/|index.php)") { set $no_cache 1; }
if ($request_uri ~* "/(submit/|form.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(submit/|submit.html.php)") { set $no_cache 1; }
if ($request_uri ~* "/(submit/|error.html.php)") { set $no_cache 1; }
}

976
go/core/1core.go Executable file
View file

@ -0,0 +1,976 @@
package main
import (
"database/sql"
_ "github.com/go-sql-driver/mysql"
// "fmt"
"html"
"html/template"
"log"
"net/http"
"net/url"
"strconv"
"strings"
"unicode/utf8"
// "time"
)
type indexPage struct{}
type errorReport struct{ Error string }
type surpriseURL struct{ Url string }
type settingsPage struct{ Worksafe, FilterHTTPS bool }
type MySQLResults struct{ Id, Url, Title, Description, Body string }
type PageData struct {
DBResults []MySQLResults
Query, Totalcount string
FindMore bool
}
func main() {
http.HandleFunc("/", handler)
http.HandleFunc("/json", handler)
http.HandleFunc("/json/", handler)
http.HandleFunc("/surprise", surprise)
http.HandleFunc("/surprise/", surprise)
http.HandleFunc("/settings/", settings)
http.HandleFunc("/settings", settings)
log.Fatal(http.ListenAndServe("localhost:8080", nil))
}
//https://golang.org/pkg/net/http/#Request
func handler(w http.ResponseWriter, r *http.Request) {
//fmt.Fprintf(w, "%s %s \n", r.Method, r.URL)
//fmt.Fprintf(w, "%s \n", r.URL.RawQuery)
//check if worksafe+https cookie enabled.
filterHTTPS := false
worksafe := true
worksafeHTTPSCookie, err := r.Cookie("ws")
if err != nil {
worksafe = true
filterHTTPS = false
} else if worksafeHTTPSCookie.Value == "0" {
worksafe = false
filterHTTPS = false
} else if worksafeHTTPSCookie.Value == "1" {
worksafe = true
filterHTTPS = false
} else if worksafeHTTPSCookie.Value == "2" {
worksafe = false
filterHTTPS = true
} else if worksafeHTTPSCookie.Value == "3" {
worksafe = true
filterHTTPS = true
}
//setup for error report
error := errorReport{}
//Get the raw query
m, _ := url.ParseQuery(r.URL.RawQuery)
//Get the query parameters (q and o)
//fmt.Fprintf(w,"%s\n%s\n", m["q"][0], m["o"][0])
json := false
if strings.Contains(r.URL.Path, "/json") {
json = true
if _, ok := m["nsfw"]; ok { //check if &nsfw added to json url
worksafe = false
}
}
query := ""
queryNoQuotes := ""
queryNoQuotes_SQLsafe := ""
offset := "0"
//Check if query and offset params exist
if _, ok := m["q"]; ok {
query = strings.Replace(m["q"][0], "'", "''", -1)
queryNoQuotes = m["q"][0]
}
if _, ok := m["o"]; ok {
offset = strings.Replace(m["o"][0], "'", "''", -1)
}
lim := "12"
if query == "" { //what do if no query found?
//load index if no query detected
if r.URL.Path == "/" {
p := indexPage{}
t, _ := template.ParseFiles("coreassets/form.html.go")
t.Execute(w, p)
} else if strings.Contains(r.URL.Path, "/json") { //load json info page if json selected
p := indexPage{}
t, _ := template.ParseFiles("coreassets/json/json.html.go")
t.Execute(w, p)
} else {
p := indexPage{}
t, _ := template.ParseFiles("coreassets/form.html.go")
t.Execute(w, p)
}
} else {
//Make sure offset is a number
offsetInt, err := strconv.Atoi(offset)
if err != nil {
offset = "0"
offsetInt = 0
}
//Convert lim to number also
limInt, _ := strconv.Atoi(lim)
//get some details from the raw query
var additions string
querylen := len(query)
//see if a search redirect (! or &) is used for a different search engine
if json == false && (strings.Contains(m["q"][0],"!") || strings.Contains(m["q"][0],"&")){
searchredirect(w, r, m["q"][0])
}
//phone users
if query[querylen-1] == ' '{
query = query[:querylen-1]
queryNoQuotes = queryNoQuotes[:len(queryNoQuotes)-1]
querylen = len(query)
}
//check if user wants to limit search to a specific website
sitePos := -1
siteEnd := 0
siteURL := ""
if strings.Index(strings.ToLower(query), "site:") > -1 {
//get url user wants to search and remove it from the query stringre
sitePos = strings.Index(strings.ToLower(query), "site:")
siteEnd = strings.Index(query[sitePos:], " ")
//fmt.Printf("\n%d\n%d\n",sitePos,siteEnd)
if siteEnd > -1 && sitePos > 1 { //site is not last part of query
siteURL = query[sitePos+5 : siteEnd+sitePos]
query = query[:sitePos-1] + query[siteEnd+sitePos:]
queryNoQuotes = queryNoQuotes[:sitePos-1] + queryNoQuotes[siteEnd+sitePos:]
additions = additions + "AND url LIKE '%" + siteURL + "%' "
} else if siteEnd > -1 && sitePos == 0 { //site is at beginning
siteURL = query[sitePos+5 : siteEnd]
query = query[siteEnd+1:]
queryNoQuotes = queryNoQuotes[siteEnd+1:]
additions = additions + "AND url LIKE '%" + siteURL + "%' "
} else if siteEnd < 0 && sitePos > 1 { //site is at end
siteURL = query[sitePos+5:]
query = query[:sitePos-1]
queryNoQuotes = queryNoQuotes[:sitePos-1]
additions = additions + "AND url LIKE '%" + siteURL + "%' "
}else if querylen > 5{
query = query[5:]
}
querylen = len(query)
}
//fmt.Printf("Addition: \n%s\nQuery: '%s'\n",additions,query)
//see if user uses -https flag (instead of cookie settings option)
if querylen > 7 && strings.ToLower(query[querylen-7:querylen]) == " -https" {
filterHTTPS = true
query = query[0 : querylen-7]
querylen = len(query)
}
//check if user wants to search within a time window (day,week,month)
option := ""
//fmt.Printf("\n'%s'\n",query)
location := strings.Index(query, " !")
if location == -1 {
location = strings.Index(query, " &")
}
if location > -1 && strings.Index(query[location+1:querylen], " ") == -1 { //option is at end of query
option = query[location+2 : querylen]
query = query[:location]
queryNoQuotes = queryNoQuotes[:location]
querylen = len(query)
}else if querylen > 0 && (query[0] == '!' || query[0] == '&') && strings.Index(query, " ") > -1{ //option is at start of query
option = query[1:strings.Index(query, " ")]
query = query[strings.Index(query, " ")+1:]
queryNoQuotes = queryNoQuotes[strings.Index(queryNoQuotes, " ")+1:]
querylen = len(query)
}
option = strings.ToLower(option)
if option != "" {
if option == "td" { //day
additions = additions + "AND date > NOW() - INTERVAL 1 DAY "
} else if option == "tw" { //week
additions = additions + "AND date > NOW() - INTERVAL 7 DAY "
} else if option == "tm" { //month
additions = additions + "AND date > NOW() - INTERVAL 30 DAY "
} else if option == "ty" { //year
additions = additions + "AND date > NOW() - INTERVAL 365 DAY "
}
}
//check if worksafe and filterHTTPS flags set
if worksafe == true {
additions = additions + "AND worksafe = '1' "
}
if filterHTTPS == true {
additions = additions + "AND http = '1' "
}
//if query is just 1 or 2 letters, help make it work. Also CIA :D
if len(query) < 3 || query == "cia" || query == "CIA" {
queryfix := " " + query + " *"
query = queryfix
queryNoQuotes = queryfix
}
//search if query has quotes and remove them (so we can find the longest word in the query)
exactMatch := false
//queryNoQuotes := query
if strings.Contains(query, "\"") {
exactMatch = true
queryNoQuotes = strings.TrimLeft(queryNoQuotes, "\"")
getlastquote := strings.Split(queryNoQuotes, "\"")
queryNoQuotes = getlastquote[0]
//fmt.Printf("%s \n", queryNoQuotes)
}
//Prepare to find longest word in query
words := strings.Split(queryNoQuotes, " ")
longestWordLength := 0
longestWord := ""
wordcount := 0
longestwordelementnum := 0
queryNoQuotesOrFlags := ""
requiredword := ""
//queryNoFlags := ""
//first remove any flags inside var queryNoQuotes, also grab any required words (+ prefix)
if strings.Contains(queryNoQuotes, "-") || strings.Contains(queryNoQuotes, "+") {
for i, wordNoFlags := range words {
if i > 0 && strings.HasPrefix(wordNoFlags, "-") == false && strings.HasPrefix(wordNoFlags, "+") == false { //add a space after
queryNoQuotesOrFlags += " "
}
if strings.HasPrefix(wordNoFlags, "-") == false && strings.HasPrefix(wordNoFlags, "+") == false {
queryNoQuotesOrFlags += wordNoFlags
}
if strings.HasPrefix(wordNoFlags, "+") == true && len(wordNoFlags) > 1 { //get requiredword
requiredword = wordNoFlags[1:len(wordNoFlags)]
}
}
queryNoQuotes = queryNoQuotesOrFlags
}
//now find longest word
words = strings.Split(queryNoQuotes, " ")
if exactMatch == false {
for _, word := range words {
if len(word) > longestWordLength {
longestWordLength = len(word)
longestWord = word
longestwordelementnum = wordcount
}
wordcount++
}
}
//remove the '*' if contained anywhere in queryNoQuotes
if strings.Contains(queryNoQuotes, "*") && exactMatch == false {
queryNoQuotes = strings.Replace(queryNoQuotes, "*", "", -1)
}
//get sql safe querynoquotes
queryNoQuotes_SQLsafe = strings.Replace(queryNoQuotes, "'", "''", -1)
//fmt.Printf("\nquery: %s\nquerynoquotes: %s\nquerynoquotes_sqlsafe: %s\n",query,queryNoQuotes,queryNoQuotes_SQLsafe)
//fmt.Fprintf(w,"%s\n%s\n", query,offset)
//fmt.Printf("hai\n")
//get copy of original query because we might have to modify it further
queryOriginal := query
tRes := MySQLResults{}
var res = PageData{}
//init the db and set charset
db, err := sql.Open("mysql", "guest:qwer@/wiby?charset=utf8mb4")
if err != nil {
p := indexPage{}
t, _ := template.ParseFiles("coreassets/error.html.go")
t.Execute(w, p)
}
defer db.Close()
// Open doesn't open a connection. Validate DSN data:
err = db.Ping()
if err != nil {
error.Error = err.Error()
t, _ := template.ParseFiles("coreassets/error.html.go")
t.Execute(w, error)
}
//Check if query is a url.
urlDetected := false
isURL := ""
if strings.Index(query, " ") == -1 && strings.Index(query, "\"") == -1 && strings.Index(query, ".") > -1 { //note this will also flag on file extensions
if len(query) > 6 && (query[0:7] == "http://" || query[0:7] == "HTTP://") {
query = query[7:]
} else if len(query) > 7 && (query[0:8] == "https://" || query[0:8] == "HTTPS://") {
query = query[8:]
}
if len(queryNoQuotes_SQLsafe) > 6 && (queryNoQuotes_SQLsafe[0:7] == "http://" || queryNoQuotes_SQLsafe[0:7] == "HTTP://") {
queryNoQuotes_SQLsafe = queryNoQuotes_SQLsafe[7:]
} else if len(queryNoQuotes_SQLsafe) > 7 && (queryNoQuotes_SQLsafe[0:8] == "https://" || queryNoQuotes_SQLsafe[0:8] == "HTTPS://") {
queryNoQuotes_SQLsafe = queryNoQuotes_SQLsafe[8:]
}
query = "\"" + query + "\""
urlDetected = true
isURL = "WHEN LOCATE('" + queryNoQuotes_SQLsafe + "',url)>0 THEN 25"
}
//Check if query contains a hyphenated word. Will wrap quotes around hyphenated words that aren't part of a string which is already wraped in quotes.
if (strings.Contains(queryNoQuotes_SQLsafe, "-") || strings.Contains(queryNoQuotes_SQLsafe, "+")) && urlDetected == false {
if query == "c++" || query == "C++" { //shitty but works for now
query = "c++ programming"
}
hyphenwords := strings.Split(query, " ")
query = ""
quotes := 0
for i, word := range hyphenwords {
if strings.Contains(word, "\"") {
quotes++
}
if ((strings.Contains(word, "-") && word[0] != '-') || (strings.Contains(word, "+") && word[0] != '+')) && quotes%2 == 0 { //if hyphen or plus exists, not a flag, not wrapped in quotes already
word = "\"" + word + "\""
}
if i > 0 {
query += " "
}
query += word
}
}
//fmt.Printf(">%s<\n", query)
//perform full text search FOR InnoDB STORAGE ENGINE or MyISAM
var sqlQuery, id, url, title, description, body string
sqlQuery = "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('" + query + "' IN BOOLEAN MODE) AND enable = '1' " + additions + "ORDER BY CASE WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', tags)>0 THEN 30 " + isURL + " WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', title)>0 AND Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', title)>0 THEN 16 WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', body)>0 THEN 15 WHEN Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) THEN Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) END DESC LIMIT " + lim + " OFFSET " + offset + ""
//sqlQuery = "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('" + query + "' IN BOOLEAN MODE) AND enable = '1' " + additions + "ORDER BY CASE WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', tags)>0 THEN 30 " + isURL + " WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', title)>0 AND Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', title)>0 THEN 16 WHEN Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) THEN Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', body)>0 THEN 15 END DESC LIMIT " + lim + " OFFSET " + offset + ""
//sqlQuery = "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('" + query + "' IN BOOLEAN MODE) AND enable = '1' " + additions + "ORDER BY CASE WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', tags)>0 THEN 30 " + isURL + " WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', title)>0 AND Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', title)>0 THEN 16 WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', body)>0 THEN 15 WHEN Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) THEN 14 END DESC LIMIT " + lim + " OFFSET " + offset + ""
rows, err := db.Query(sqlQuery)
if err != nil {
res.Totalcount = strconv.Itoa(0)
res.Query = m["q"][0] //get original unsafe query
if json {
w.Header().Set("Content-Type", "application/json")
t, _ := template.ParseFiles("coreassets/json/results.json.go")
t.Execute(w, res)
} else {
t, _ := template.ParseFiles("coreassets/results.html.go")
t.Execute(w, res)
}
//p := indexPage{}
//t, _ := template.ParseFiles("coreassets/form.html.go")
//t.Execute(w, p)
return
}
if urlDetected == true {
query = queryOriginal
}
count := 0
for rows.Next() {
count++
//this will get set if position of longest word of query is found within body
pos := -1
err := rows.Scan(&id, &url, &title, &description, &body)
if err != nil {
error.Error = err.Error()
t, _ := template.ParseFiles("coreassets/error.html.go")
t.Execute(w, error)
}
//find query inside body of page
if exactMatch == false {
/* //remove the '*' if contained anywhere in query
if strings.Contains(queryNoQuotes,"*"){
queryNoQuotes = strings.Replace(queryNoQuotes, "*", "", -1)
} */
if len(requiredword) > 0 { //search for position of required word if any, else search for position of whole query
pos = strings.Index(strings.ToLower(body), strings.ToLower(requiredword))
} else if pos == -1 {
pos = strings.Index(strings.ToLower(body), strings.ToLower(queryNoQuotes))
}
if pos == -1 { //prepare to find position of longest query word (or required word) within body
//remove the '*' at the end of the longest word if present
if strings.Contains(longestWord, "*") {
longestWord = strings.Replace(longestWord, "*", "", -1)
}
//search within body for position of longest query word.
pos = strings.Index(strings.ToLower(body), strings.ToLower(longestWord))
//not found?, set position to a different word, make sure there's no wildcard on it
if pos == -1 && wordcount > 1 {
if longestwordelementnum > 0 {
words[0] = strings.Replace(words[0], "*", "", -1)
pos = strings.Index(strings.ToLower(body), strings.ToLower(words[0]))
}
if longestwordelementnum == 0 {
words[1] = strings.Replace(words[1], "*", "", -1)
pos = strings.Index(strings.ToLower(body), strings.ToLower(words[1]))
}
}
}
} else { //if exact match, find position of query within body
pos = strings.Index(strings.ToLower(body), strings.ToLower(queryNoQuotes))
}
//still not found?, set position to 0
if pos == -1 {
pos = 0
}
//Adjust position for runes within body
pos = utf8.RuneCountInString(body[:pos])
starttext := 0
//ballpark := 0
ballparktext := ""
//figure out how much preceding text to use
if pos < 32 {
starttext = 0
} else if pos > 25 {
starttext = pos - 25
} else if pos > 20 {
starttext = pos - 15
}
//total length of the ballpark
textlength := 180
//populate the ballpark
if pos >= 0 {
ballparktext = substr(body, starttext, starttext+textlength)
} //else{ ballpark = 0}//looks unused
//find position of nearest Period
//foundPeriod := true
posPeriod := strings.Index(ballparktext, ". ") + starttext + 1
//find position of nearest Space
//foundSpace := true
posSpace := strings.Index(ballparktext, " ") + starttext
//if longest word in query is after a period+space within ballpark, reset starttext to that point
if (pos - starttext) > posPeriod {
starttext = posPeriod
//populate the bodymatch
if (pos - starttext) >= 0 {
body = substr(body, starttext, starttext+textlength)
} else {
body = ""
}
} else if pos > posSpace { //else if longest word in query is after a space within ballpark, reset starttext to that point
//else if(pos-starttext) > posSpace//else if longest word in query is after a space within ballpark, reset starttext to that point
starttext = posSpace
//populate the bodymatch
if (pos - starttext) >= 0 {
body = substr(body, starttext, starttext+textlength)
} else {
body = ""
}
} else //else just set the bodymatch to the ballparktext
{
//populate the bodymatch
if (pos - starttext) >= 0 {
body = ballparktext
} else {
body = ""
}
}
tRes.Id = id
tRes.Url = url
tRes.Title = html.UnescapeString(title)
tRes.Description = html.UnescapeString(description)
tRes.Body = html.UnescapeString(body)
if json == true {
tRes.Title = JSONRealEscapeString(tRes.Title)
tRes.Description = JSONRealEscapeString(tRes.Description)
tRes.Body = JSONRealEscapeString(tRes.Body)
}
res.DBResults = append(res.DBResults, tRes)
}
defer rows.Close()
rows.Close()
//================================================================================================================================
//no results found (count==0), so do a wildcard search (repeat the above process)
addWildcard := false
if count == 0 && offset == "0" && urlDetected == false && exactMatch == false {
addWildcard = true
query = strings.Replace(query, "\"", "", -1) //remove some things innodb gets fussy over
query = strings.Replace(query, "*", "", -1)
query = strings.Replace(query, "'", "", -1)
queryNoQuotes_SQLsafe = strings.Replace(queryNoQuotes_SQLsafe, "\"", "", -1)
queryNoQuotes_SQLsafe = strings.Replace(queryNoQuotes_SQLsafe, "*", "", -1)
queryNoQuotes_SQLsafe = strings.Replace(queryNoQuotes_SQLsafe, "'", "", -1)
query = query + "*"
sqlQuery = "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('" + query + "' IN BOOLEAN MODE) AND enable = '1' " + additions + "ORDER BY CASE WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', tags)>0 THEN 30 WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', title)>0 AND Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', title)>0 THEN 16 WHEN LOCATE('" + queryNoQuotes_SQLsafe + "', body)>0 THEN 15 WHEN Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) THEN Match(title) AGAINST('" + query + "' IN BOOLEAN MODE) END DESC LIMIT " + lim + " OFFSET " + offset + ""
rows2, err := db.Query(sqlQuery)
if err != nil {
res.Totalcount = strconv.Itoa(0)
res.Query = m["q"][0] //get original unsafe query
if json {
w.Header().Set("Content-Type", "application/json")
t, _ := template.ParseFiles("coreassets/json/results.json.go")
t.Execute(w, res)
} else {
t, _ := template.ParseFiles("coreassets/results.html.go")
t.Execute(w, res)
}
//p := indexPage{}
//t, _ := template.ParseFiles("coreassets/form.html.go")
//t.Execute(w, p)
return
}
for rows2.Next() {
count++
//this will get set if position of longest word of query is found within body
pos := -1
err := rows2.Scan(&id, &url, &title, &description, &body)
if err != nil {
error.Error = err.Error()
t, _ := template.ParseFiles("coreassets/error.html.go")
t.Execute(w, error)
}
//find query inside body of page
if exactMatch == false {
//remove the '*' if contained anywhere in query
/*if strings.Contains(queryNoQuotes,"*"){
queryNoQuotes = strings.Replace(queryNoQuotes, "*", "", -1)
}*/
if len(requiredword) > 0 { //search for position of required word if any, else search for position of whole query
pos = strings.Index(strings.ToLower(body), strings.ToLower(requiredword))
} else if pos == -1 {
pos = strings.Index(strings.ToLower(body), strings.ToLower(queryNoQuotes))
}
if pos == -1 { //Not found? prepare to find position of longest query word within body
//remove the '*' at the end of the longest word if present
if strings.Contains(longestWord, "*") {
longestWord = strings.Replace(longestWord, "*", "", -1)
}
//search within body for position of longest query word.
pos = strings.Index(strings.ToLower(body), strings.ToLower(longestWord))
//not found?, set position to a different word, make sure there's no wildcard on it
if pos == -1 && wordcount > 1 {
if longestwordelementnum > 0 {
words[0] = strings.Replace(words[0], "*", "", -1)
pos = strings.Index(strings.ToLower(body), strings.ToLower(words[0]))
}
if longestwordelementnum == 0 {
words[1] = strings.Replace(words[1], "*", "", -1)
pos = strings.Index(strings.ToLower(body), strings.ToLower(words[1]))
}
}
}
} else { //if exact match, find position of query within body
pos = strings.Index(strings.ToLower(body), strings.ToLower(queryNoQuotes))
}
//still not found?, set position to 0
if pos == -1 {
pos = 0
}
//Adjust position for runes within body
pos = utf8.RuneCountInString(body[:pos])
starttext := 0
//ballpark := 0
ballparktext := ""
//figure out how much preceding text to use
if pos < 32 {
starttext = 0
} else if pos > 25 {
starttext = pos - 25
} else if pos > 20 {
starttext = pos - 15
}
//total length of the ballpark
textlength := 180
//populate the ballpark
if pos >= 0 {
ballparktext = substr(body, starttext, starttext+textlength)
} //else{ ballpark = 0}//looks unused
//find position of nearest Period
//foundPeriod := true
posPeriod := strings.Index(ballparktext, ". ") + starttext + 1
//find position of nearest Space
//foundSpace := true
posSpace := strings.Index(ballparktext, " ") + starttext
//if longest word in query is after a period+space within ballpark, reset starttext to that point
if (pos - starttext) > posPeriod {
starttext = posPeriod
//populate the bodymatch
if (pos - starttext) >= 0 {
body = substr(body, starttext, starttext+textlength)
} else {
body = ""
}
} else if pos > posSpace { //else if longest word in query is after a space within ballpark, reset starttext to that point
//else if(pos-starttext) > posSpace//else if longest word in query is after a space within ballpark, reset starttext to that point
starttext = posSpace
//populate the bodymatch
if (pos - starttext) >= 0 {
body = substr(body, starttext, starttext+textlength)
} else {
body = ""
}
} else //else just set the bodymatch to the ballparktext
{
//populate the bodymatch
if (pos - starttext) >= 0 {
body = ballparktext
} else {
body = ""
}
}
tRes.Id = id
tRes.Url = url
tRes.Title = html.UnescapeString(title)
tRes.Description = html.UnescapeString(description)
tRes.Body = html.UnescapeString(body)
if json == true {
tRes.Title = JSONRealEscapeString(tRes.Title)
tRes.Description = JSONRealEscapeString(tRes.Description)
tRes.Body = JSONRealEscapeString(tRes.Body)
}
res.DBResults = append(res.DBResults, tRes)
}
defer rows2.Close()
rows2.Close()
}
//=======================================================================================================================
//http://go-database-sql.org/retrieving.html
//Close DB
db.Close()
//If results = lim, allow the find more link
if count >= limInt && addWildcard == false {
res.FindMore = true
} else {
res.FindMore = false
}
totalCountInt := count + offsetInt
res.Totalcount = strconv.Itoa(totalCountInt)
res.Query = m["q"][0] //get original unsafe query
if json {
w.Header().Set("Content-Type", "application/json")
t, _ := template.ParseFiles("coreassets/json/results.json.go")
t.Execute(w, res)
} else {
t, _ := template.ParseFiles("coreassets/results.html.go")
t.Execute(w, res)
}
}
}
func settings(w http.ResponseWriter, r *http.Request) {
//setup for error report
error := errorReport{}
//check if worksafe (adult content) cookie enabled.
filterHTTPS := false
worksafe := true
worksafewasoff := false
worksafeHTTPSCookie, err := r.Cookie("ws")
if err != nil {
worksafe = true
filterHTTPS = false
} else if worksafeHTTPSCookie.Value == "0" {
worksafe = false
filterHTTPS = false
worksafewasoff = true
} else if worksafeHTTPSCookie.Value == "1" {
worksafe = true
filterHTTPS = false
} else if worksafeHTTPSCookie.Value == "2" {
worksafe = false
filterHTTPS = true
worksafewasoff = true
} else if worksafeHTTPSCookie.Value == "3" {
worksafe = true
filterHTTPS = true
}
//check if and what is the user posting
switch r.Method {
case "POST":
if err := r.ParseForm(); err != nil {
error.Error = err.Error()
t, _ := template.ParseFiles("coreassets/error.html.go")
t.Execute(w, error)
}
worksafebox := r.Form.Get("worksafe")
agreecheck := r.Form.Get("agree")
agreesubmit := r.Form.Get("agreesubmit")
httpsbox := r.Form.Get("filterHTTPS")
//if user agrees to terms to disable adult content, set cookie and return to index
if agreecheck == "on" {
worksafe = false
//expiration := time.Now().Add(365 * 24 * time.Hour)
if filterHTTPS == false {
cookie := http.Cookie{Name: "ws", Value: "0", Path: "/"}
http.SetCookie(w, &cookie)
} else {
cookie := http.Cookie{Name: "ws", Value: "2", Path: "/"}
http.SetCookie(w, &cookie)
}
p := indexPage{}
t, _ := template.ParseFiles("coreassets/settings/gohome.html")
t.Execute(w, p)
//else if worksafebox is checked, return to index with worksafe on
} else if worksafebox == "on" || agreesubmit == "on" {
//expiration := time.Now().Add(365 * 24 * time.Hour)
if httpsbox != "on" {
cookie := http.Cookie{Name: "ws", Value: "1", Path: "/"}
http.SetCookie(w, &cookie)
} else {
cookie := http.Cookie{Name: "ws", Value: "3", Path: "/"}
http.SetCookie(w, &cookie)
}
p := indexPage{}
t, _ := template.ParseFiles("coreassets/settings/gohome.html")
t.Execute(w, p)
//else if worksafebox unchecked and no cookie, go to content agreement section
} else if worksafebox != "on" && worksafewasoff == false && agreesubmit != "on" {
p := indexPage{}
if httpsbox == "on" {
cookie := http.Cookie{Name: "ws", Value: "3", Path: "/"}
http.SetCookie(w, &cookie)
} else {
cookie := http.Cookie{Name: "ws", Value: "1", Path: "/"}
http.SetCookie(w, &cookie)
}
t, _ := template.ParseFiles("coreassets/settings/agree.html.go")
t.Execute(w, p)
//else if worksafebox unchecked and cookie alredy agreed, go back to index
} else if worksafebox != "on" && worksafewasoff == true {
if httpsbox == "on" {
cookie := http.Cookie{Name: "ws", Value: "2", Path: "/"}
http.SetCookie(w, &cookie)
} else {
cookie := http.Cookie{Name: "ws", Value: "0", Path: "/"}
http.SetCookie(w, &cookie)
}
p := indexPage{}
t, _ := template.ParseFiles("coreassets/settings/gohome.html")
t.Execute(w, p)
}
default:
//load the settings page if no post value
settingspage := settingsPage{}
settingspage.Worksafe = worksafe
settingspage.FilterHTTPS = filterHTTPS
t, _ := template.ParseFiles("coreassets/settings/settings.html.go")
t.Execute(w, settingspage)
}
}
func surprise(w http.ResponseWriter, r *http.Request) {
surprise := surpriseURL{}
//check if worksafe+HTTPS cookie enabled.
filterHTTPS := false
worksafeHTTPSCookie, err := r.Cookie("ws")
if err != nil {
filterHTTPS = false
} else if worksafeHTTPSCookie.Value == "2" {
filterHTTPS = true
} else if worksafeHTTPSCookie.Value == "3" {
filterHTTPS = true
}
//setup for error report
error := errorReport{}
//init the db and set charset
db, err := sql.Open("mysql", "guest:qwer@/wiby?charset=utf8mb4")
if err != nil {
error.Error = err.Error()
t, _ := template.ParseFiles("coreassets/error.html.go")
t.Execute(w, error)
}
defer db.Close()
// Open doesn't open a connection. Validate DSN data:
err = db.Ping()
if err != nil {
error.Error = err.Error()
t, _ := template.ParseFiles("coreassets/error.html.go")
t.Execute(w, error)
}
//grab a random page
var sqlQuery string
if filterHTTPS == false {
sqlQuery = "select url from windex where worksafe = 1 and surprise = 1 order by rand() limit 1"
} else {
sqlQuery = "select url from windex where worksafe = 1 and surprise = 1 and http = 1 order by rand() limit 1"
}
rows, err := db.Query(sqlQuery)
if err != nil {
error.Error = err.Error()
t, _ := template.ParseFiles("coreassets/error.html.go")
t.Execute(w, error)
}
var url string
for rows.Next() {
err := rows.Scan(&url)
if err != nil {
error.Error = err.Error()
t, _ := template.ParseFiles("coreassets/error.html.go")
t.Execute(w, error)
}
surprise.Url = url
}
defer rows.Close()
rows.Close()
db.Close()
t, _ := template.ParseFiles("coreassets/surprise.html.go")
t.Execute(w, surprise)
}
func MysqlRealEscapeString(value string) string {
replace := map[string]string{"\\": "\\\\", "'": `\'`, "\\0": "\\\\0", "\n": "\\n", "\r": "\\r", `"`: `\"`, "\x1a": "\\Z"}
for b, a := range replace {
value = strings.Replace(value, b, a, -1)
}
return value
}
func JSONRealEscapeString(value string) string {
replace := map[string]string{"\\": "\\\\", "\t": "\\t", "\b": "\\b", "\n": "\\n", "\r": "\\r", "\f": "\\f" /*, `"`:`\"`*/}
for b, a := range replace {
value = strings.Replace(value, b, a, -1)
}
return value
}
func substr(s string, start int, end int) string {
start_str_idx := 0
i := 0
for j := range s {
if i == start {
start_str_idx = j
}
if i == end {
return s[start_str_idx:j]
}
i++
}
return s[start_str_idx:]
}
func searchredirect(w http.ResponseWriter, r *http.Request, query string) {
//separate actual query from search redirect
actualquery := ""
redirect := ""
lenquery := len(query)
if strings.Index(query," ") > -1{
location := strings.Index(query, " !")
if location == -1 {
location = strings.Index(query, " &")
}
if location > -1 && strings.Index(query[location+1:lenquery], " ") == -1 { //redirect is at end of query
redirect = query[location+2 : lenquery]
actualquery = query[:location]
} else if (strings.Index(query, "!") == 0 || strings.Index(query, "&") == 0){ //redirect is at start of query
redirect = query[1:strings.Index(query, " ")]
actualquery = query[strings.Index(query, " ")+1:]
//fmt.Printf("\nRedirect: %s\nquery: %s\n",redirect,actualquery)
}
redirect = strings.ToLower(redirect)
}else if (query[0] == '!' || query[0] == '&') && lenquery > 1{
redirect = query[1:]
}
if redirect != "" {
//determine which search engine to redirect
if redirect == "g" { //if google text search
http.Redirect(w, r, "http://google.com/search?q="+actualquery, http.StatusSeeOther)
} else if redirect == "b" { //if bing text search
http.Redirect(w, r, "http://bing.com/search?q="+actualquery, http.StatusSeeOther)
} else if redirect == "gi" { //if google image search
http.Redirect(w, r, "http://www.google.com/search?tbm=isch&q="+actualquery, http.StatusSeeOther)
} else if redirect == "bi" { //if bing image search
http.Redirect(w, r, "http://www.bing.com/images/search?q="+actualquery, http.StatusSeeOther)
} else if redirect == "gv" { //if google video search
http.Redirect(w, r, "http://www.google.com/search?tbm=vid&q="+actualquery, http.StatusSeeOther)
} else if redirect == "bv" { //if bing video search
http.Redirect(w, r, "http://www.bing.com/videos/search?q="+actualquery, http.StatusSeeOther)
} else if redirect == "gm" { //if google maps search
http.Redirect(w, r, "http://www.google.com/maps/search/"+actualquery, http.StatusSeeOther)
} else if redirect == "bm" { //if bing maps search
http.Redirect(w, r, "http://www.bing.com/maps?q="+actualquery, http.StatusSeeOther)
}/* else {
http.Redirect(w, r, "/?q="+actualquery, http.StatusSeeOther)
}*/
}
}
/*func caseInsenstiveContains(fullstring, substring string) bool {
return strings.Contains(strings.ToLower(fullstring), strings.ToLower(substring))
}*/
/*
A QueryString is, by definition, in the URL. You can access the URL of the request using req.URL (doc). The URL object has a Query() method (doc) that returns a Values type, which is simply a map[string][]string of the QueryString parameters.
If what you're looking for is the POST data as submitted by an HTML form, then this is (usually) a key-value pair in the request body. You're correct in your answer that you can call ParseForm() and then use req.Form field to get the map of key-value pairs, but you can also call FormValue(key) to get the value of a specific key. This calls ParseForm() if required, and gets values regardless of how they were sent (i.e. in query string or in the request body).
req.URL.RawQuery returns everything after the ? on a GET request, if that helps.
*/
/*import (
"net/http"
)
func main() {
http.Handle("/", http.StripPrefix("/", http.FileServer(http.Dir("./"))))
if err := http.ListenAndServe(":8080", nil); err != nil {
panic(err)
}
}*/
/*func handler(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "%s %s %s \n", r.Method, r.URL, r.Proto)
//Iterate over all header fields
for k, v := range r.Header {
fmt.Fprintf(w, "Header field %q, Value %q\n", k, v)
}
fmt.Fprintf(w, "Host = %q\n", r.Host)
fmt.Fprintf(w, "RemoteAddr= %q\n", r.RemoteAddr)
//Get value for a specified token
fmt.Fprintf(w, "\n\nFinding value of \"Accept\" %q", r.Header["Accept"])
}*/

1172
go/core/core.go Executable file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html>
<head>
<title>Wiby Error</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>Wiby kaputnik :( ...</p>
</body>
</html>

45
go/core/coreassets/form.html.go Executable file
View file

@ -0,0 +1,45 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Put your title</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<meta name="description" content="Put your description"/>
<link rel="stylesheet" href="/styles.css" type="text/css">
<link rel="search" type="application/opensearchdescription+xml" title="Put your title" href="/opensearch.xml">
</head>
<body>
<div align="right"><a class="tiny" href="/submit/">Submit a page here!</a><br></div>
<div align="right"><a class="tiny" href="/settings/">Settings</a></div>
<div align="center"><h1 class="titlep">Name</h1><br></div>
<form method="get">
<div align="center">
<input type="text" name="q" id="q" size="45" role="form" aria-label="Main search form" autofocus/>
<input type="submit" value="Search"/>
</div><br>
<div align="center"><a class="tiny" href="/surprise/">surprise me...</a></div>
</form>
<div align="center">
<div style="width:400px;text-align: left;">
<pre role="image" aria-label="Ascii art of a lighthouse overlooking the sea.">
.n. |
/___\ _.---. \ _ /
[|||] (_._ ) )--;_) =-
[___] '---'.__,' \
}-=-{ |
|-" |
|.-"| p
~^=~^~-|_.-|~^-~^~ ~^~ -^~^~|\ ~^-~^~-
^ .=.| _.|__ ^ ~ /| \
~ /:. \" _|_/\ ~ /_|__\ ^
.-/::. | |""|-._ ^ ~~~~
`===-'-----'""` '-. ~
jgs __.-' ^
</pre>
</div>
<br><a class="pin1" href="/about/pp.html">Privacy</a> | <a class="pin1" href="/about/">About</a>
</div>
</body>
</html>

View file

@ -0,0 +1,19 @@
<!DOCTYPE html>
<html>
<head>
<title>JSON API</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<meta name="description" content="Using the JSON API"/>
</head>
<body>
<h1>Using JSON API</h1>
<p>Use https://domain/json/ to get a JSON output of search results.<br><br>
Example: <a href="https://domain/json/?q=test">https://domain/json/?q=test</a> outputs results for the query 'test'.<br><br>
Append the parameter &o=NUM to get the next page of results.<br>
To determine the value of NUM, look for the presence of <b>NextOffset</b> at the end of the JSON data.<br><br>
Example: <a href="https://domain/json/?q=test&o=12">https://domain/json/?q=test&o=12</a>
<br><br><b>Terms of Use:
<br>1. Set terms here.
</b></p>
</body>
</html>

View file

@ -0,0 +1,13 @@
[
{{range $i, $e:=.DBResults}}{{if $i}},
{{end}} {
"URL": "{{.Url}}",
"Title": "{{.Title}}",
"Snippet": "{{.Body}}",
"Description": "{{.Description}}"
}{{end}}{{if .FindMore }},
{
"NextOffset": "{{.Totalcount}}"
}
{{end}}
]

View file

@ -0,0 +1,32 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8"/>
<title>{{.Query}}</title>
<link rel="stylesheet" href="/styles.css" type="text/css">
<link rel="search" type="application/opensearchdescription+xml" title="Your title" href="/opensearch.xml">
</head>
<body>
<form method="get">
<div style="float: left">
<a class="title" href="../">name</a>&nbsp;&nbsp;
<input type="text" size="35" name="q" id="q" value="{{.Query}}" role="form" aria-label="Main search form"/>
<input type="submit" value="Search"/>
</div>
<div style="float: right"><a class="tiny" href="/settings/">Settings</a></div><br><br>
</form>
<p class="pin"><br></p>
{{range .DBResults}}
<blockquote>
<a class="tlink" href="{{.Url}}">{{ printf "%.150s" .Title}}</a><br><p class="url">{{.Url}}</p><p>{{printf "%.180s" .Body}}<br>{{printf "%.180s" .Description}}</p>
</blockquote>
{{end}}
{{if .FindMore }}
<p class="pin"><blockquote></p><br><a class="more" href="/?q={{.Query}}&o={{.Totalcount}}">Find more...</a></blockquote>
{{else}}
<blockquote><p class="pin"> <br>That's everything I could find.<br>Help make me smarter by <a class="pin1" href="/submit">submitting a page</a>.</p></blockquote>
{{end}}
</body>
</html>

View file

@ -0,0 +1,33 @@
<!DOCTYPE html>
<html>
<head>
<title>Adult Content Agreement</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<LINK REL=STYLESHEET HREF="/styles.css" TYPE="text/css">
</head>
<body>
<h4>Adult Content Agreement</h4>
<p.main>
You have indicated that you do not want adult content filtered.<br>
By clicking agree, you accept that you will not freak out over what could end up displayed in the search results.<br>
We try to ensure content that is illegal does not get stored into the index.
If you are 18 years of age or older and agree to the terms, check the box and press Submit.
</p>
<br><br>
<form action="" method="post">
<div>I agree to the terms and conditions (check and submit)<input type="checkbox" id="agree" name="agree"> <input type="submit" value="Submit"/></div>
<br><br>
<a href="/">Return to Wiby search</a>
<input type="hidden" name="agreesubmit" id="agreesubmit" value="on">
</form>
</body>
</html>

View file

@ -0,0 +1,9 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="refresh" content="0; URL='/'"/>
</head>
<body>
</body>
</html>

View file

@ -0,0 +1,75 @@
<!DOCTYPE html>
<html>
<head>
<title>Settings</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<LINK REL=STYLESHEET HREF="/styles.css" TYPE="text/css">
<style type="text/css">
textarea {
display: block;
width: 100%;
}
</style>
</head>
<body>
<form action="" method="post">
<div>
<label for="worksafe">Filter adult content</label>
{{ if .Worksafe }}
<input type="checkbox" id="worksafe" name="worksafe" checked="checked" >
{{else}}
<input type="checkbox" id="worksafe" name="worksafe">
{{end}}&nbsp;&nbsp;<br><br>
<label for="filterHTTPS">Filter HTTPS</label>
{{ if .FilterHTTPS }}
<input type="checkbox" id="filterHTTPS" name="filterHTTPS" checked="checked" >
{{else}}
<input type="checkbox" id="filterHTTPS" name="filterHTTPS">
{{end}}
</div><p class="pin">*for old browsers</p> <br>
<div><input type="submit" value="Submit"/></div>
<div><br><br>
<a href="/submit/">Submit a URL</a>
</div>
<div><br>
<a href="/about/">About</a>
</div>
<p class="pin"><br><br><br><b>Search Options:</b><br><br>
cats +tabby (results must contain the word tabby)<br>
cats -tabby (results must not contain the word tabby)<br>
"I love you" (use quotes to find an exact match)<br>
join* (asterisk finds matches such as join, joins, joining)<br>
<br>
!td tornado (find within the frame of one day)<br>
!tw tornado (find within the frame of one week)<br>
!tm tornado (find within the frame of one month)<br>
!ty tornado (find within the frame of one year)<br>
<br>
site:URL Lorem ipsum (limit search within a domain or URL)<br>
<br>
<br>
<p class="pin"><b>Redirect Options:</b><br>
<br>
!g Paris (Google Text Search)<br>
!gi Paris (Google Images)<br>
!gv Paris (Google Videos)<br>
!gm Paris (Google Maps)<br>
<br>
!b Paris (Bing Text Search)<br>
!bi Paris (Bing Images)<br>
!bv Paris (Bing Videos)<br>
!bm Paris (Bing Maps)<br>
<br>
You may also use '&' in place of '!'.
</p>
</form>
</body>
</html>

View file

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<meta http-equiv="refresh" content="0; URL='{{.Url}}'"/>
</head>
<body>
You asked for it!
</body>
</html>

BIN
html/about/button.gif Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

533
html/about/guide.html Executable file
View file

@ -0,0 +1,533 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Build your own Search Engine</title>
<style>
h1 { color: #062DA1; }
body { background-color: #FAFAFA; }
a:link { COLOR: #062DA1; font-size: 17px; text-decoration: none;}
a:visited { COLOR: #7900A7; }
a.tlink:visited { COLOR: #515151; }
a.title:link { COLOR: #7900A7; font-weight: bold; font-size: 29px; text-decoration: none; font-family: "Georgia"; }
a.tiny { COLOR: #7900A7; font-size: 17px; }
a.pin1 { font-size:14px; COLOR: #7900A7; }
a.tlink { font-size: 21px; COLOR: #062DA1 }
a.more { font-size: 21px; COLOR: #7900A7 }
h1 { margin:0px; line-height:96px; }
p { font-size:17px; margin-bottom:0px; margin-top:0px; }
.titlep { COLOR: #7900A7; font-weight: bold; font-size: 83px; font-family: "Georgia"; }
.url { font-size:15px; color: #3a5a0c; }
.pin { font-size:14px; COLOR: #2e2e2e;}
textarea:focus, input:focus{ outline: none;}
blockquote { width: 700px; }
pre { width:700px; white-space: pre-wrap; word-wrap: break-word; }
</style>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<meta name="description" content="The source code and instructions to create your own version of Wiby."/>
</head>
<body>
<blockquote>
<h1 align="center">Build Your Own Search Engine</h1>
<div align="center">(Wiby Install Guide)</div>
<br>
<p>
<a href="guide.html#overview">Overview</a>
<br>
<a href="guide.html#install">Installation</a>
<br>
<a href="guide.html#control">Controlling</a>
<br>
<a href="guide.html#scale">Scaling</a>
<h2><a name="overview">Overview</a></h2>
July 7, 2022. Wiby is a search engine for the World Wide Web. I am ready to give it away under the GPLv2 license now that it has the features I want.
<br>
<br>
It includes a web interface allowing guardians to control where, how far, and how often it crawls websites and follows hyperlinks. The search index is stored inside of a MySQL full-text index.
<br>
<br>
Fast queries are maintained by concurrently reading different sections of the index across multiple replication servers, returning a list of top results from each replica,
then searching the combined list to ensure correct ordering. Replicas that fail are automatically excluded; new replicas are easy to include.
As new pages are crawled, they are stored randomly across the index, ensuring each replica can obtain relevant results.<br>
<br>
It is not meant to index the entire web and then sort it with a ranking algorithm.
It prefers to seed its index through human submissions made by guests, or by the guardian(s) of the search engine.
<br>
<br>
The software is designed for anyone with some extra computers (even a Pi), to host their own search engine catering to whatever niche matters to them. The search engine includes a simple API
for meta search engines to harness.
<br>
<br>
I hope this will enable anyone with a love of computers to cheaply build and maintain a search engine of their own.
I hope it can cultivate free and independent search engines, ensuring accessibility of ideas and information across the World Wide Web.
<br>
<br>
<pre>
Web Traffic
|
|
+-----------+-----------+
| Reverse Proxy (nginx) |
+-----------+-----------+
|
|
+-----------+-----------+
| Wiby Core Server(s) |+-----------------+----------------------------+
|(PHP or Golang version)| | |
+-----------+-----------+ +----------+----------+ +---------+---------+
| |Replication Databases|+-----+|Replication Tracker|
| +----------+----------+ +-------------------+
+-----------+-----------+ |
| Primary Database |+-----------------+
| (MySQL or MariaDB) |
+----+-------------+----+
| |
| |
+----+-----+ +----+----+
| Web | | Refresh |
|Crawler(s)| |Scheduler|
+----------+ +---------+
</pre>
<br>
<hr>
<h2><a name="install">Installation</a></h2>
I can only provide manual install instructions at this time.
<br>
<br>
Note that while the software is functionally complete, it is still in beta. Anticipate that some bugs will be discovered now that the source is released.
Ensure that you isolate the search engine from your other important services, and if you are running parts of it out of your home, keep the servers
on a separate VLAN. Continue this practise even when the software reaches "1.0".
<br>
<br>
If you have created a "LAMP", or rather a "LEMP" server before, this isn't much more complicated. If you've never done that, I suggest you find a "LEMP" tutorial.
<br><br>
<h3>Build a LEMP server</h3>
Digital Ocean tutorials are usually pretty good so <a href="https://www.digitalocean.com/community/tutorials/how-to-install-linux-nginx-mysql-php-lemp-stack-on-ubuntu-20-04">here's a link to one</a>.
<br>
<br>
For the sake of simplicity, assume all instructions are for Ubuntu 20.04. If you are on a different distro, modify the install steps accordingly to suit your distro.
<br>
If you don't have a physical server, you can rent computing space by looking for a "VPS provider". This virtual computer will be your reverse proxy, and if you want, it can host everything else too.
<br>
<br>
<h3>Install the following additional packages:</h3>
<pre>apt install build-essential php-gd libcurl4-openssl-dev libmysqlclient-dev mysql-server golang git
go get -u github.com/go-sql-driver/mysql</pre>
<br>
<h3>Get Wiby Source Files</h3>
Download the source directly from Wiby <a href="/download/wibysource.zip">here</a>, or from <a href="https://github.com/wibyweb/wiby/">GitHub</a>. The source is released under the GPLv2 license. Copy the source files for Wiby to your server.
<br>
<br>
<h3>Compile the crawler (cr), refresh scheduler (rs), replication tracker (rt):</h3>
<pre>
gcc cr.c -o cr -I/usr/include/mysql -lmysqlclient -lcurl -std=c99 -O3
gcc rs.c -o rs -I/usr/include/mysql -lmysqlclient -std=c99 -O3
gcc rt.c -o rt -I/usr/include/mysql -lmysqlclient -std=c99 -O3
</pre>
If you get any compile errors, it is likely due to the path of the mysql or libcurl header files.
This could happen if you are not using Ubuntu 20. You might have to locate the correct path for curl.h, easy.h, and mysql.h.
<br>
<br>
<h3>Build the core server application:</h3>
<pre>
Inside the go folder:
go build core.go
go build 1core.go
</pre>
If you eventually plan use replication servers or you are using a computer with a lot of available cores, you can use 'core'.
Alternately, if you dont intend to have a large index and do not plan on adding replication servers, you can use '1core'.
You can also use index.php in the root of the www directory and not use the Go version at all. Though the PHP version
is used mainly for prototyping.
<br>
<br>
<h3>Build the database:</h3>
Make sure these lines are inside of /etc/mysql/my.cnf, then restart mysql
<pre>
[client]
default-character-set=utf8mb4
[mysql]
default-character-set = utf8mb4
[mysqld]
max_connections = 2000
ft_min_word_len=2
sql_mode = "NO_BACKSLASH_ESCAPES"
character-set-server = utf8mb4
collation-server = utf8mb4_0900_ai_ci
skip-character-set-client-handshake
default-authentication-plugin=mysql_native_password
wait_timeout = 800
#memory use settings, you should adjust this based on your hardware
innodb_buffer_pool_size = 1342177280
innodb_buffer_pool_instances = 2
</pre>
Login to MySQL and type:
<pre>
create database wiby;
create database wibytemp;
</pre>
Import the wiby and wibytemp database files:
<pre>
mysql -u root -p wiby < wiby.sql
mysql -u root -p wibytemp < wibytemp.sql
</pre>
Login to MySQL, create the following accounts and give them the correct access:
<pre>
create user 'guest'@'localhost' identified by 'qwer';
create user 'approver'@'localhost' identified by 'foobar';
create user 'crawler'@'localhost' identified by 'seekout';
use wiby;
grant select on accounts to 'approver'@'localhost';
grant select on reviewqueue to 'approver'@'localhost';
grant insert on indexqueue to 'approver'@'localhost';
grant delete on reviewqueue to 'approver'@'localhost';
grant update on reviewqueue to 'approver'@'localhost';
grant select on indexqueue to 'crawler'@'localhost';
grant insert on windex to 'crawler'@'localhost';
grant insert on indexqueue to 'crawler'@'localhost';
grant update on windex to 'crawler'@'localhost';
grant delete on indexqueue to 'crawler'@'localhost';
grant delete on windex to 'crawler'@'localhost';
grant select on windex to 'crawler'@'localhost';
grant insert on reviewqueue to 'crawler'@'localhost';
grant select on windex to 'guest'@'localhost';
grant insert on reviewqueue to 'guest'@'localhost';
grant insert on feedback to 'guest'@'localhost';
grant select on feedback to 'approver'@'localhost';
grant delete on feedback to 'approver'@'localhost';
grant insert on graveyard to 'approver'@'localhost';
grant update on graveyard to 'approver'@'localhost';
grant delete on graveyard to 'approver'@'localhost';
grant select on graveyard to 'approver'@'localhost';
grant update on accounts to 'approver'@'localhost';
grant insert on accounts to 'approver'@'localhost';
grant delete on accounts to 'approver'@'localhost';
use wibytemp;
grant select on titlecheck to 'crawler'@'localhost';
grant insert on titlecheck to 'crawler'@'localhost';
grant delete on titlecheck to 'crawler'@'localhost';
grant select on rejected to 'approver'@'localhost';
grant insert on rejected to 'approver'@'localhost';
grant delete on rejected to 'approver'@'localhost';
grant select on reserve_id to 'crawler'@'localhost';
grant insert on reserve_id to 'crawler'@'localhost';
grant delete on reserve_id to 'crawler'@'localhost';
FLUSH PRIVILEGES;
</pre>
<h3>Copy the HTML files and PHP scripts to your web server</h3>
<pre>Copy the contents of the the html directory into the nginx html directory (/var/www/html)</pre>
<h3>Configure nginx for Wiby</h3>
In /etc/nginx/, create a directory called 'phpcache', and another one called 'cache'.
<br>
Instead of going through every detail, I will provide a template for you to try out as your default nginx config from inside /etc/nginx/sites-available/ of the source code.
<br>
<br>
You should learn nginx configuration on your own, this template is just to assist.
If you are using only the php version, comment all "core app" location entries to revert Wiby search to the php only version.
<br>
Make sure ssl_certificate and ssl_certificate_key have the path for your SSL files instead of the example paths. If you don't want to use SSL, just remove the server {} configuration for SSL connections (on port 443).
<br>
<br>
<h3>Start the Refresh Scheduler</h3>
This program (rs) will make sure all pages indexed are refreshed at least once per week (or sooner depending on how you assign updates to an individual website).
You may want to run this on startup, easiest way to set that is with a cron job (crontab -e). Run './rs -h' to get more parameters.
<br>
<br>
<h3>Start the Crawler</h3>
It is best to run the crawler in a screen session so that you can monitor its output. You can have more than one crawler running as long as you keep them in separate directories, include a symlink to the same robots folder, and also set the correct parameters on each.
To view the parameters, type './cr -h'. Without any parameters set, you can only run one crawler (which is probably all you need anyway).
<br>
<br>
Note that you may need to change the crawler's user-agent if you have issues indexing some websites. Pages that fail to index are noted inside of abandoned.txt.
<br>
<br>
Make sure the robots folder exists. robots.txt files are stored in the robots folder and are downloaded once and then referenced from that folder on future updates. Clear this folder every few weeks to ensure robots.txt files get refreshed from time to time.
You can turn off checking for robots.txt files by commenting out the line calling the "checkrobots" function inside of cr.c.
<br>
<br>
If crawling through hyperlinks on a page, the following file types are accepted: html, htm, txt, php, asp. Links containing parameters are ignored. These limitations do not apply to pages directly submitted by people.
<br>
<br>
<h3>Start the core server</h3>
'1core' is fine to get started if you have a smaller index. Use 'core' if you intend to scale computer resources as the index grows. You don't necessarily have to run this if you would prefer to use the simple index.php version.
You may want to run the core server on startup with a cron job.
<br>
<br>
<h3>Set Administrator Password for the Web Interface</h3>
There is no default web login, you will have to set this manually the first time:
<pre>
Rename the /html/hash folder to something private.
Edit html/private_folder_name/hashmake.php and change 'secretpassword' to your preferred admin password.
Access /private_folder_name/hashmake.php from your browser and copy down the hash.
After you have copied it down, delete or remove hashmake.php from your web server folder so that the hash cannot be discovered.
</pre>
Login to MySQL and create the account:
<pre>
use wiby;
INSERT INTO accounts (name,hash,level) VALUES('your_username','your_password_hash','admin');
</pre>
You can now access /accounts/ from your browser, login to create and manage all accounts for administrators and guardians of the search engine.
<br>
<br>
<b>admin</b> - Can access all web forms for the search engine including the /accounts/ page to create and delete accounts.
<br>
<br>
<b>guardian</b> - Can access all forms except the /accounts/ form. The main role of a guardian is to gatekeep the index of the search engine.
<br>
<br>
<br>
<hr>
<h2><a name="control">Controlling the Search Engine</a></h2>
<br>
There are several forms to control the search engine. There is no central form linking everything together, just a collection of different folders that you can rename if you want.
<br>
<br>
<h3>/submit/</h3> This public facing form allows users of the search engine to submit websites for indexing, provided they comply with your submission criteria, which you can modify on /submit/form.html.php.
<br>
<br>
<h3>/accounts/</h3>
This is the account management page. Admins have options to create, lock, change account type, delete, and reset passwords. Guardians have the option to change their password.
<br>
<br>
<h3>/review/</h3> This is the most important form, intended for you to verify website submissions meet your criteria. Up to 10 pages are assigned to each guardian or admin that accesses the form. The pages will remain assigned to that account for up to 30 minutes.
From here you can control how much, how deep, and how often the web crawler will access each submission. Here is an example of the available options for a website submission:
<br>
<br>
<a href="url_that_was_submitted">url_that_was_submitted</a>
<br>
[Worksafe<input type="checkbox" id="worksafe" name="worksafe" checked="checked">]
[Surprise<input type="checkbox" id="surprise" name="surprise">]
[Skip<input type="checkbox" id="skip" name="skip" >]
[Bury<input type="checkbox" id="bury" name="bury" >]
[Deny<input type="checkbox" id="deny" name="deny" >]
[Updatable<select id="updatable" name="updatable">
<option value=1>1 WEEK</option>
<option value=2>1 DAY</option>
<option value=3>12 HOUR</option>
<option value=4>6 HOUR</option>
<option value=5>3 HOUR</option>
<option value=6>1 HOUR</option>
</select>]
<br>
[Crawl: Depth <input type="number" id="crawldepth" name="crawldepth" >
Pages <input type="number" id="crawlpages" name="crawlpages" >
Type <select id="crawltype" name="crawltype">
<option value=0>Local</option>
<option value=1>All</option>
<option value=2>External</option>
</select>
Enforce Rules<input type="checkbox" id="forcerules" name="forcerules" >
Repeat<input type="checkbox" id="crawlrepeat" name="crawlrepeat" >]
<br>
<br>
Explanation of the above options:
<br>
<br>
<b>Worksafe</b> - Indicates if the website is safe for work. Set by the user who submitted the website, however you can change it based on your determination.
<br>
<br>
<b>Surprise</b> - Checking this box will put it in the "surprise me" feature, where users get redirected to random websites when they click "surprise me". Note that this feature won't show NSFW websites even if they are set to surprise.
<br>
<br>
<b>Skip</b> - Selecting this option will skip indexing the page and it will reappear on the review form after you submit the rest of the pages for crawling.
<br>
<br>
<b>Bury</b> - Selecting this will move the page to a grave yard (/grave/), a holding place with the same options as /review/ for websites that might have stopped working but that you suspect may come back online. The crawler will detect this automatically and send the page back into review. When you click on the link and see a 404, you can be assured the crawler sent it back to review after failing two update cycles. This also happens if the title of the page changes. The crawler will only do this for pages directly submitted by people. This curtesy is not given to websites that are automatically crawled but then fail to work later on. For those sites, after two failed update cycles, the page will be removed.
<br>
<br>
<b>Deny</b> - Select this to drop the page from being indexed. If the page does not meet your submission criteria, this would be the option to remove it from the queue.
<br>
<br>
<b>Updatable</b> - The update cycle for the web crawler to return to the page. This only applies to pages submitted by people, pages found by link crawling always go on a 1 week update cycle.
<br>
<br>
<b>------------------- Crawl -------------------</b>
<br>
The options listed below control how the crawler indexes hyperlinks on the website. By default, the crawler does not index any hyperlinks, it will only index the page that is submitted.
<br>
<br>
<b>Depth</b> - How many layers of links to crawl through. You must set at least a depth of 1 if you want to crawl any hyperlinks. Setting a negative value = no limit. Be careful about that.
<br>
<br>
<b>Pages</b> - How many pages to crawl on each link layer (depth). They will be randomly selected. You must set at least 1 if you want to crawl any hyperlinks. Setting a negative value = no limit. Be careful about that.
<br>
<br>
<b>Type</b> - Indicates if you want to only crawl links local to the website, or links external to the website, or both.
<br>
<br>
<b>Enforce rules</b> - This is a blunt tool that checks if pages have more than two scripts and/or css files. If the limit is exceded, the page will not be indexed. I don't use it and prefer to manually check based on more forgiving criteria.
<br>
<br>
<b>Repeat</b> - While the crawler will always return to update each page in the index, it wont crawl through hyperlinks again unless you tell it to. Even so, it only crawls hyperlinks on the page at a depth of 1 when repeat is selected.
<br>
<br>
<h3>/ban/</h3>
You can delete or ban individual URL's from the index with this form. Its pretty simple as I don't use it much. You can't delete an entire domain with it, for that you can build your own query in the MySQL console.
<br>
<br>
<h3>/bulksubmit/</h3>
Admins/Guardians can import a list of URLs into the review queue with this form.
<br>
<br>
<h3>/feedback/</h3>
Users can submit feedback for you with this form.
<br>
<br>
<h3>/readf/</h3>
Where admin accounts can read feedback submitted by users.
<br>
<br>
<h3>/grave/</h3>
It has the same features as /review/. Websites that you don't yet want to index but don't want to forget about are stored inside /grave/ by selecting 'bury' from inside /review/. The web crawler will (only for pages submitted directly by people), move 404'd pages or pages where the title has changed back to /review/ after two update cycles
where the page does not return to normal. So after a few weeks you may notice dead pages appearing in /review/, you can decide to drop the page or to bury it where it will be moved to /grave/. The page might go back to normal at some point and you can check /grave/ to see if it resurrects.
<br>
<br>
<h3>/insert/</h3>
This was the first form created back in late 2016 to populate the Wiby index and see if the search engine could even work as a proof of concept. It was meant to manually enter pages into the index as no crawler existed yet.
It is still useful if you want to manually index a page that refuses to permit the crawler to access it. In that case, set updatable to 0.
<br>
<br>
<h3>Additional Notes</h3>
If you want to force a website to appear at the top rank for a specific single word query, (like "weather"), you can force it by adding "weather" to the tags column for the target url in the windex table. Use this sparingly.
There is no form to do this on an existing website, you will have to update the row in mysql manually.
<br>
<br>
<br>
<hr>
<h2><a name="scale">Scaling the Search Engine</a></h2>
<br>
You can help ensure sub-second search queries as your index grows by building MySQL replica servers on a local netowork close to eachother, run the core application AND replication tracker (rt) on one or more replica servers and point your reverse proxy to use it.
Edit the servers.csv file for rt to indicate all available replica servers. If you have a machine with a huge amount of resources and cores, entering multiple duplicate entries to the same sever inside servers.csv (e.g. one for each core) works also.
<br>
<br>
The core application checks the replication tracker (rt) output to determine if any replicas are online, it will initiate a connection on those replicas and task each one to search a different section of the index,
drastically speeding up search speeds especially for multi-word queries. By default, single-word queries will not initiate multiple connections across replicas. To enable that on single-word queries, comment out the IF statement
on line 365 and rebuild the core application.
<br>
<br>
The reverse proxy and replica servers can be connected through a VPN such as wireguard or openvpn, however the IPs for servers.csv should be the local IPs for the LAN
the replicas are all connected on. <a href="https://www.digitalocean.com/community/tutorials/how-to-set-up-replication-in-mysql">Here</a> is a tutorial for setting up MySQL replicas. Full instructions below:
<br>
<br>
On the primary server add these lines to my.cnf under [mysqld] but only once you have a VPN to reach your replicas. Replace my.vpn.ip with your own.
<pre>
#setting up replication below
bind-address = 127.0.0.1,my.vpn.ip
server-id = 1
log_bin = /var/log/mysql/mysql-bin.log
binlog_do_db = wiby
binlog_format = mixed
</pre>
In MySQL on the primary server, create a user for replica access:
<pre>
create user 'slave_user'@'%' identified by 'd0gemuchw0w';
GRANT REPLICATION SLAVE ON *.* TO 'slave_user'@'%';
FLUSH PRIVILEGES;
</pre>
On the replica server, ensure the following my.cnf configuration, set the server-id as a unique id for each replica, then restart mysql:
<pre>
[client]
default-character-set=utf8mb4
[mysql]
default-character-set = utf8mb4
[mysqld]
max_connections = 2000
ft_min_word_len=2
sql_mode = "NO_BACKSLASH_ESCAPES"
#character-set-client-handshake = FALSE
character-set-server = utf8mb4
collation-server = utf8mb4_0900_ai_ci
skip-character-set-client-handshake
default-authentication-plugin=mysql_native_password
wait_timeout = 800
#memory use settings, you should adjust this based on your hardware
innodb_buffer_pool_size = 1342177280
innodb_buffer_pool_instances = 2
#setting up replication below
bind-address = 0.0.0.0
server-id = 2
relay_log_info_repository = TABLE
relay_log_recovery = ON
sync_binlog=1
</pre>
Make sure only VPN and VLAN addresses can reach your replicas. The bind address of 0.0.0.0 can be replaced with '127.0.0.1,replica.vpn.ip' which is safer but also more crash prone if the VPN address is not available on startup.
<br>
<br>
To export the database to the replica server, on the primary server, stop the web crawler and hide any web forms that can accept new data, then open MySQL and do the following.
<pre>
USE wiby;
FLUSH TABLES WITH READ LOCK;
SHOW MASTER STATUS;
+------------------+----------+--------------+------------------+-------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+------------------+----------+--------------+------------------+-------------------+
| mysql-bin.000055 | 15871269 | wiby | | |
+------------------+----------+--------------+------------------+-------------------+
</pre>
Keep the above session window open (or run it in a screen session).
<br>
Copy down the information from that table. In a separate session window, export the database:
<pre>
mysqldump -u root -p wiby > wiby.sql
</pre>
Once you have exported the database and recorded what you need, you can unlock the tables, and resume as normal. On the session window displaying the master status:
<pre>
UNLOCK TABLES;
</pre>
You can now close that window if you want.
<br>
On the replica server, import the database:
<pre>
mysql -u root -p wiby < wiby.sql
</pre>
Login to MySQL and type the following but replace the IP, MASTER_LOG_FILE, and MASTER_LOG_POS with yours from the table:
<pre>
CHANGE MASTER TO MASTER_HOST='primary.server.ip',MASTER_USER='slave_user', MASTER_PASSWORD='d0gemuchw0w', MASTER_LOG_FILE='mysql-bin.000055', MASTER_LOG_POS=15871269;
START SLAVE;
</pre>
To verify that the replica is syncronized, type the following on the replica in MySQL:
<pre>
SHOW SLAVE STATUS\G
</pre>
Make sure that:
<pre>
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
</pre>
In MySQL on the replica:
<pre>
use wiby;
create user 'remote_guest'@'%' identified by 'd0gemuchw0w';
grant select on windex to 'remote_guest'@'%';
create user 'guest'@'localhost' identified by 'qwer';
grant select on windex to 'guest'@'localhost';
FLUSH PRIVILEGES;
</pre>
<h3>Load Balancing</h3>
You should run the core application on one or more of your replicas and have nginx send traffic to it, this way you can reduce the burden on your VPS. The replication tracker (rt) must run on the same server
and directory that the core application is running on (not required for 1core).
<br>
<br>
Add the replica server's VPN address/port to upstream remote_core {} from the default config for nginx (see the provided example template). You can use the VPS as a backup instead by adding 'backup' to its address (eg: server 127.0.0.1:8080 backup;)
<br>
</p>
</blockquote>
</body>
</html>

24
html/about/index.html Executable file
View file

@ -0,0 +1,24 @@
<!DOCTYPE html>
<html>
<head>
<title>About</title>
<link rel="stylesheet" type="text/css" href="../styles.css"/>
</head>
<body>
<blockquote>
<a href="/"><h3>Heading</h3></a>
<p>
Write your about section.
<br>
<br>
<a href="/feedback/">Send Feedback</a><br><br>
</p>
<p class="pin"><b>Additional Features:</b>
<br><br>Developers can connect their applications using the JSON output available at /json.
<br>
<br>
<a class="pin1" href="pp.html">Privacy Policy</a>
</p>
</blockquote>
</body>
</html>

BIN
html/about/mug.gif Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

12
html/about/pp.html Executable file
View file

@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<title>Privacy Policy</title>
<link rel="stylesheet" type="text/css" href="/styles.css"/>
</head>
<body>
<h4>Privacy Policy</h4>
<p.main>Include your privacy policy here.
</p>
</body>
</html>

BIN
html/about/wiby.gif Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

BIN
html/about/wiby.org.gif Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

BIN
html/about/wibyplex.gif Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

25
html/accounts/accounts.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>Form Example</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $output; ?>
</p>
</body>
</html>

249
html/accounts/accounts.php Executable file
View file

@ -0,0 +1,249 @@
<?php
session_start();
if($_SESSION["authenticated"]!=true)
{
include 'index.php';
exit();
}else
{
$link = mysqli_connect('localhost', 'approver', 'foobar');
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
//get info for admin
if (((!isset($_POST['name']) && !isset($_POST['password'])) || $_SESSION["loadadmin"]==true) && $_SESSION["level"]=="admin")
{
$_SESSION["loadadmin"]=false;
$adminresult = mysqli_query($link,"SELECT name, level, attempts, updated FROM accounts");
if(!$adminresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//put contents of accounts into an array
while($row = mysqli_fetch_array($adminresult))
{
$db_name[] = $row['name'];
$db_level[] = $row['level'];
$db_attempts[] = $row['attempts'];
$db_updated[] = $row['updated'];
}
include 'admin.html.php';
exit();
//process info submitted by admin
}else if ($_SESSION["level"]=="admin" )
{
if($_POST["action"]=="0"){
$_SESSION["loadadmin"]=true;
echo "No actions taken.";
include 'accounts.php';
exit();
}
//create
if($_POST["action"]=="1"){
if($_POST['password']=="" || $_POST['name']==""){
echo "Both a name and password are required";
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
$post_name = mysqli_real_escape_string($link, $_POST['name']);
$post_password = mysqli_real_escape_string($link, $_POST['password']);
$hash = password_hash($post_password, PASSWORD_DEFAULT);
if($_POST['level']==1){
$post_level = "guardian";
}else if($_POST['level']==2){
$post_level = "admin";
}
$adminresult = mysqli_query($link,'INSERT INTO accounts (name,hash,level) VALUES("'.$post_name.'","'.$hash.'","'.$post_level.'")');
if(!$adminresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "Create account submitted for ".$post_name;
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
//delete
if($_POST["action"]=="2"){
if($_POST['name']==""){
echo "You did not name an account to delete.";
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
$post_name = mysqli_real_escape_string($link, $_POST['name']);
$adminresult = mysqli_query($link,'DELETE FROM accounts WHERE name = "'.$post_name.'"');
if(!$adminresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "Delete account submitted for ".$post_name;
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
//update password
if($_POST["action"]=="3"){
if($_POST['password']=="" || $_POST['name']==""){
echo "You must include both a name and password.";
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
$post_name = mysqli_real_escape_string($link, $_POST['name']);
$post_password = mysqli_real_escape_string($link, $_POST['password']);
$hash = password_hash($post_password, PASSWORD_DEFAULT);
$adminresult = mysqli_query($link,'UPDATE accounts SET hash = "'.$hash.'" WHERE name = "'.$post_name.'"');
if(!$adminresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "Update password submitted for ".$post_name;
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
//update level
if($_POST["action"]=="4"){
if($_POST['name']==""){
echo "You must include an account name.";
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
$post_name = mysqli_real_escape_string($link, $_POST['name']);
$post_level = mysqli_real_escape_string($link, $_POST['action']);
if($_POST['level']==1){
$post_level = "guardian";
}else if($_POST['level']==2){
$post_level = "admin";
}
$adminresult = mysqli_query($link,'UPDATE accounts SET level = "'.$post_level.'" WHERE name = "'.$post_name.'"');
if(!$adminresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "Update level submitted for ".$post_name;
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
//unlock
if($_POST["action"]=="5"){
if($_POST['name']==""){
echo "You must include an account name.";
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
$post_name = mysqli_real_escape_string($link, $_POST['name']);
$adminresult = mysqli_query($link,'UPDATE accounts SET attempts = 0 WHERE name = "'.$post_name.'"');
if(!$adminresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "Unlock account submitted for ".$post_name;
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
//lock
if($_POST["action"]=="6"){
if($_POST['name']==""){
echo "You must include an account name.";
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
$post_name = mysqli_real_escape_string($link, $_POST['name']);
$adminresult = mysqli_query($link,'UPDATE accounts SET attempts = 5 WHERE name = "'.$post_name.'"');
if(!$adminresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "Lock account submitted for ".$post_name;
$_SESSION["loadadmin"]=true;
include 'accounts.php';
exit();
}
}
//get form for guardian
if (!isset($_POST['password']) && $_SESSION["level"]=="guardian")
{
echo "Welcome ". $_POST['user'];
include 'guardian.html.php';
//process info for guardian
}else if ($_SESSION["level"]=="guardian"){
if($_POST['password']==""){
echo "Password field is empty.";
include 'guardian.html.php';
exit();
}
$post_password = mysqli_real_escape_string($link, $_POST['password']);
$hash = password_hash($post_password, PASSWORD_DEFAULT);
$result = mysqli_query($link,'UPDATE accounts SET hash = "'.$hash.'" WHERE name = "'.$_SESSION["user"].'"');
if(!$result)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "Password has been updated";
include 'guardian.html.php';
}
}
?>

42
html/accounts/admin.html.php Executable file
View file

@ -0,0 +1,42 @@
<!DOCTYPE html>
<html>
<head>
<title>Account Management</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
<style type="text/css">
textarea { display: block; width: 100%; }
</style>
</head>
<body>
<form action="accounts.php" method="post">
<div>
<br>
Username <input type="text" name="name" id="name"/><br>
Password&nbsp; <input type="password" name="password" id="password"/><br>
Level&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <select id="level" name="level">
<option value=1>guardian</option>
<option value=2>admin</option>
</select><br>
Action&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <select id="action" name="action">
<option value=0>none</option>
<option value=1>create</option>
<option value=2>delete</option>
<option value=3>update password</option>
<option value=4>update level</option>
<option value=5>unlock</option>
<option value=6>lock</option>
</select><br><br>
</div>
<div><input type="submit" value="Submit"/></div>
<br>Accounts:<br>
<?php $i=0; ?>
<?php foreach ($db_name as $username): ?>
<blockquote><p><?php echo $username ." ( level: ". $db_level[$i] ." | attempts: ". $db_attempts[$i] ." | updated: ". $db_updated[$i] ." )"; ?></p></blockquote>
<?php $i++; ?>
<?php endforeach; ?>
</form>
</body>
</html>

25
html/accounts/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

25
html/accounts/guardian.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>Account Management</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
<style type="text/css">
<head>
<title>Account Management</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<style type="text/css">
textarea { display: block; width: 100%; }
</style>
</head>
<body>
<form action="accounts.php" method="post">
<div>
Update Password&nbsp; <input type="password" name="password" id="password"/><br>
</div>
<div><input type="submit" value="Submit"/></div>
</form>
</body>
</html>

96
html/accounts/index.php Executable file
View file

@ -0,0 +1,96 @@
<?php
session_start();
if ( !isset($_POST['pass']) || !isset($_POST['user']))
{
include 'login.html.php';
}
else if( $_POST['user'] == '' || $_POST['pass'] == '')
{
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
else
{
if(!isset($_SESSION["authenticated"]))
{
include_once $_SERVER['DOCUMENT_ROOT'] . '/securimage/securimage.php';
$securimage = new Securimage();
if ($securimage->check($_POST['captcha_code']) == false)
{
echo "The security code entered was incorrect.";
include 'login.html.php';
exit();
}
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
$user = mysqli_real_escape_string($link, $_POST['user']);
$pass = mysqli_real_escape_string($link, $_POST['pass']);
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$loginresult = mysqli_query($link,"SELECT hash, attempts, level FROM accounts WHERE name = '$user';");
if(!$loginresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//lets put contents of accounts into an array
while($rowaccounts = mysqli_fetch_array($loginresult))
{
$hash[] = $rowaccounts['hash'];
$attempts[] = $rowaccounts['attempts'];
$level[] = $rowaccounts['level'];
}
if(password_verify($pass,$hash[0]) && $attempts[0] < 5)
{
if($attempts[0]>0)
{
if (!mysqli_query($link, "UPDATE accounts SET attempts = '0' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$_SESSION["authenticated"] = true;
$_SESSION["user"] = $user;
$_SESSION["level"] = $level[0];
include 'accounts.php';
exit();
}
else{
$attempt = $attempts[0] + 1;
if (!mysqli_query($link, "UPDATE accounts SET attempts = '$attempt' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
}
?>

31
html/accounts/login.html.php Executable file
View file

@ -0,0 +1,31 @@
<!DOCTYPE html>
<?php session_start(); ?>
<html>
<head>
<title>wiby.me</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
</head>
<body>
<form method="post">
Username <input type="text" name="user" id="user"/><br>
Password <input type="password" name="pass" id="pass"/><br><br>
<?php if($_SESSION["authenticated"]!=true): ?>
<div>
<img id="captcha" src="/securimage/securimage_show.php" alt="CAPTCHA Image" />
</div>
<div>
<input type="text" name="captcha_code" size="10" maxlength="6" />
<a href="#" onclick="document.getElementById('captcha').src = '/securimage/securimage_show.php?' + Math.random(); return false">Reload Image</a>
</div>
<?php endif; ?>
<br><input type="submit" id="login" value="Login"/>
</form>
</body>
</html>

25
html/ban/ban.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>Form Example</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $output; ?>
</p>
</body>
</html>

65
html/ban/ban.php Executable file
View file

@ -0,0 +1,65 @@
<?php
session_start();
if($_SESSION["authenticated"]!=true)
{
include 'index.php';
exit();
}
if (!isset($_POST['url']))
{
include 'form.html.php';
}
else
{
$link = mysqli_connect('localhost', 'crawler', 'seekout');
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$url = mysqli_real_escape_string($link, $_POST['url']);
$delete = mysqli_real_escape_string($link, $_POST['delete']);
if($delete == 'on')
{
$sql = 'DELETE FROM windex WHERE url = "'.$url.'"';
}
else
{
$sql = 'UPDATE windex SET enable = 0 WHERE url = "'.$url.'"';
}
if (!mysqli_query($link, $sql))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
$output = 'No errors '. $url;
include 'ban.html.php';
}
?>

25
html/ban/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

22
html/ban/form.html.php Executable file
View file

@ -0,0 +1,22 @@
<!DOCTYPE html>
<html>
<head>
<title>Ban a page</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
</head>
<body>
<form action="ban.php" method="post">
<div>
<label for="url">URL as it appears in search results:</label><br>
<input type="text" id="url" name="url" size="45"></input>
</div>
<div>
<label for="delete">Delete instead:</label>
<input type="checkbox" id="delete" name="delete" checked="checked">
</div>
<div><input type="submit" value="Submit"/></div>
</form>
</body>
</html>

94
html/ban/index.php Executable file
View file

@ -0,0 +1,94 @@
<?php
session_start();
if ( !isset($_POST['pass']) || !isset($_POST['user']))
{
include 'login.html.php';
}
else if( $_POST['user'] == '' || $_POST['pass'] == '')
{
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
else
{
if(!isset($_SESSION["authenticated"]))
{
include_once $_SERVER['DOCUMENT_ROOT'] . '/securimage/securimage.php';
$securimage = new Securimage();
if ($securimage->check($_POST['captcha_code']) == false)
{
echo "The security code entered was incorrect.";
include 'login.html.php';
exit();
}
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
$user = mysqli_real_escape_string($link, $_POST['user']);
$pass = mysqli_real_escape_string($link, $_POST['pass']);
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$loginresult = mysqli_query($link,"SELECT hash, attempts FROM accounts WHERE name = '$user';");
if(!$loginresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//lets put contents of accounts into an array
while($rowaccounts = mysqli_fetch_array($loginresult))
{
$hash[] = $rowaccounts['hash'];
$attempts[] = $rowaccounts['attempts'];
}
if(password_verify($pass,$hash[0]) && $attempts[0] < 5)
{
if($attempts[0]>0)
{
if (!mysqli_query($link, "UPDATE accounts SET attempts = '0' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$_SESSION["authenticated"] = true;
$_SESSION["user"] = $user;
include 'ban.php';
exit();
}
else{
$attempt = $attempts[0] + 1;
if (!mysqli_query($link, "UPDATE accounts SET attempts = '$attempt' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
}
?>

31
html/ban/login.html.php Executable file
View file

@ -0,0 +1,31 @@
<!DOCTYPE html>
<?php session_start(); ?>
<html>
<head>
<title>wiby.me</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
</head>
<body>
<form method="post">
Username <input type="text" name="user" id="user"/><br>
Password <input type="password" name="pass" id="pass"/><br><br>
<?php if($_SESSION["authenticated"]!=true): ?>
<div>
<img id="captcha" src="/securimage/securimage_show.php" alt="CAPTCHA Image" />
</div>
<div>
<input type="text" name="captcha_code" size="10" maxlength="6" />
<a href="#" onclick="document.getElementById('captcha').src = '/securimage/securimage_show.php?' + Math.random(); return false">Reload Image</a>
</div>
<?php endif; ?>
<br><input type="submit" id="login" value="Login"/>
</form>
</body>
</html>

118
html/bulksubmit/bulksubmit.php Executable file
View file

@ -0,0 +1,118 @@
<?php
session_start();
if($_SESSION["authenticated"]!=true)
{
include 'index.php';
exit();
}
if (!isset($_POST['urls']))
{
include 'form.html.php';
exit();
}
else if($_POST['urls'] == '')
{
echo "It doesn't look like you submitted anything.";
include 'form.html.php';
exit();
}
$link = mysqli_connect('localhost', 'guest', 'qwer');
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$urls = $_POST['urls'];
$urls = mysqli_real_escape_string($link, $_POST['urls']);
$urls = str_replace("\"", "\"\"", $urls);
$worksafe = mysqli_real_escape_string($link, $_POST['worksafe']);
$worksafe = str_replace("\"", "\"\"", $worksafe);
if($worksafe == 'on')
{
$worksafe = 1;
}
else
{
$worksafe = 0;
}
if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_POST['urls']))
{
$i=0;
$url="";
$gotfirsturl=false;
$sql = 'INSERT INTO reviewqueue (url,worksafe) VALUES ';//("'.$url.'","'.$worksafe.'")';
$gotURL=false;
$urls=str_replace("\r","",$urls);
$lenURLs=strlen($urls);
while ($i < $lenURLs){
if($urls[$i]!="\n"){
$url = $url . $urls[$i];
}else if($url != ''){
$url = substr($url,0,400); //don't allow user to post a longer url than 400b (also limited in form)
$url = str_replace("/index.html", "/", $url);
$url = str_replace("/index.htm", "/", $url);
if(strpos($url,'.') == false || strpos($url,' ') == true){
echo "It doesn't look like you submitted a valid URL: '". $url ."'";
include 'form.html.php';
exit();
}
//add to SQL statement
if($gotfirsturl==false){
$sql= $sql . '("'.$url.'","'.$worksafe.'")';
$gotfirsturl=true;
}else{
$sql= $sql . ',("'.$url.'","'.$worksafe.'")';
}
$url='';
}
$i++;
}
if($url!=''){
$url = substr($url,0,400); //don't allow user to post a longer url than 400b (also limited in form)
$url = str_replace("/index.html", "/", $url);
$url = str_replace("/index.htm", "/", $url);
if(strpos($url,'.') == false || strpos($url,' ') == true){
echo "It doesn't look like you submitted a valid URL: '". $url ."'";
include 'form.html.php';
exit();
}
//add to SQL statement
if($gotfirsturl==false){
$sql = $sql . '("'.$url.'","'.$worksafe.'")';
$gotfirsturl=true;
}else{
$sql = $sql . ',("'.$url.'","'.$worksafe.'")';
}
}
if (!mysqli_query($link, $sql))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
$output = 'Submission successful.';
include 'submit.html.php';
}
?>

25
html/bulksubmit/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

59
html/bulksubmit/form.html.php Executable file
View file

@ -0,0 +1,59 @@
<!DOCTYPE html>
<html>
<head>
<title>Bulk submit to the Wiby Web</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<LINK REL=STYLESHEET HREF="/styles.css" TYPE="text/css">
<style type="text/css">
textarea {
display: block;
width: 100%;
}
</style>
</head>
<body>
<form action="bulksubmit.php" method="post">
<div>
<label for="urls">URL List:</label><br>
<?php if(isset($_POST['urls'])) : ?>
<textarea id="urls" name="urls" maxlength="8000" rows="10" cols="60" ><?php echo htmlspecialchars($_POST['urls'], ENT_QUOTES, 'UTF-8') ?></textarea>
<?php else : ?>
<textarea id="urls" name="urls" maxlength="8000" rows="10" cols="60" ></textarea>
<?php endif; ?>
</div>
<div>
<label for="worksafe">worksafe:</label>
<input type="checkbox" id="worksafe" name="worksafe" checked="checked">
</div><br>
<br>
<div><input type="submit" value="Submit"/></div>
<br><br>
<h3>What kind of pages get indexed?</h3>
<p>
Pages must be simple in design. Simple HTML, <b>non-commerical</b> sites are preferred.<br>
<b>Pages should not use much scripts/css for cosmetic effect.</b> Some might squeak through.<br>
Don't use ads that are intrusive (such as ads that appear overtop of content).<br>
Don't submit a page which serves primarily as a portal to other bloated websites.<br>
If you submit a blog, submit a few of your articles, not your main feed.<br>
If your page does not contain any text or uses frames, ensure a meta description tag is added.<br>
Only the page you submit will be crawled.<br>
</p>
<p class="pin">
<br><br>Note:<br>
<br>The WibyBot (172.93.49.59) is occasionally rejected by some web servers.
<br>Barring technical issues, if you are puzzled why a site wasn't indexed, reread the above guide.
<br>Angelfire and Tripod pages are no longer grandfathered (ads are too intrusive).
</p>
</form>
</body>
</html>

95
html/bulksubmit/index.php Executable file
View file

@ -0,0 +1,95 @@
<?php
session_start();
if ( !isset($_POST['pass']) || !isset($_POST['user']))
{
include 'login.html.php';
}
else if( $_POST['user'] == '' || $_POST['pass'] == '')
{
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
else
{
if(!isset($_SESSION["authenticated"]))
{
include_once $_SERVER['DOCUMENT_ROOT'] . '/securimage/securimage.php';
$securimage = new Securimage();
if ($securimage->check($_POST['captcha_code']) == false)
{
echo "The security code entered was incorrect.";
include 'login.html.php';
exit();
}
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
$user = mysqli_real_escape_string($link, $_POST['user']);
$pass = mysqli_real_escape_string($link, $_POST['pass']);
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$loginresult = mysqli_query($link,"SELECT hash, attempts FROM accounts WHERE name = '$user';");
if(!$loginresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//lets put contents of accounts into an array
while($rowaccounts = mysqli_fetch_array($loginresult))
{
$hash[] = $rowaccounts['hash'];
$attempts[] = $rowaccounts['attempts'];
}
if(password_verify($pass,$hash[0]) && $attempts[0] < 5)
{
if($attempts[0]>0)
{
if (!mysqli_query($link, "UPDATE accounts SET attempts = '0' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$_SESSION["authenticated"] = true;
$_SESSION["user"] = $user;
$_SESSION["loadreview"]=true;
include 'bulksubmit.php';
exit();
}
else{
$attempt = $attempts[0] + 1;
if (!mysqli_query($link, "UPDATE accounts SET attempts = '$attempt' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
}
?>

31
html/bulksubmit/login.html.php Executable file
View file

@ -0,0 +1,31 @@
<!DOCTYPE html>
<?php session_start(); ?>
<html>
<head>
<title>wiby.me</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
</head>
<body>
<form method="post">
Username <input type="text" name="user" id="user"/><br>
Password <input type="password" name="pass" id="pass"/><br><br>
<?php if($_SESSION["authenticated"]!=true): ?>
<div>
<img id="captcha" src="/securimage/securimage_show.php" alt="CAPTCHA Image" />
</div>
<div>
<input type="text" name="captcha_code" size="10" maxlength="6" />
<a href="#" onclick="document.getElementById('captcha').src = '/securimage/securimage_show.php?' + Math.random(); return false">Reload Image</a>
</div>
<?php endif; ?>
<br><input type="submit" id="login" value="Login"/>
</form>
</body>
</html>

28
html/bulksubmit/submit.html.php Executable file
View file

@ -0,0 +1,28 @@
<!DOCTYPE html>
<html>
<head>
<title>Completed Submission</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<LINK REL=STYLESHEET HREF="/styles.css" TYPE="text/css">
</head>
<body>
<p>
<?php echo $output; ?>
<br><br>
<a href="/bulksubmit/bulksubmit.php">Return to bulk submission page</a>
<br><br>
<a href="/">Return to wiby</a>
</p>
</body>
</html>

25
html/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

BIN
html/favicon.ico Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 894 B

25
html/feedback/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

40
html/feedback/form.html.php Executable file
View file

@ -0,0 +1,40 @@
<!DOCTYPE html>
<html>
<head>
<title>Wiby Feedback Form</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<LINK REL=STYLESHEET HREF="/styles.css" TYPE="text/css">
</head>
<body>
<form method="post" >
<div>
<label for="feedback">Feedback:</label><br>
<?php if(isset($_POST['feedback'])) : ?>
<textarea id="feedback" name="feedback" maxlength="8000" rows="10" cols="60"><?php echo htmlspecialchars($_POST['feedback'], ENT_QUOTES, 'UTF-8') ?></textarea>
<?php else : ?>
<textarea id="feedback" name="feedback" maxlength="8000" rows="10" cols="60"></textarea>
<?php endif; ?>
</div>
<br>
<div>
<img id="captcha" src="/securimage/securimage_show.php" alt="CAPTCHA Image" />
</div>
<div>
<input type="text" name="captcha_code" size="10" maxlength="6" />
<a href="#" onclick="document.getElementById('captcha').src = '/securimage/securimage_show.php?' + Math.random(); return false">Reload Image</a>
<br><p class="pin">* Cookies must be enabled for the captcha.</p>
</div><br>
<div><input type="submit" value="Submit"/></div>
</form>
</body>
</html>

73
html/feedback/index.php Executable file
View file

@ -0,0 +1,73 @@
<?php
session_start();
if (!isset($_POST['feedback']))
{
include 'form.html.php';
}
else if($_POST['feedback'] == '')
{
echo "It doesn't look like you submitted anything.";
include 'form.html.php';
}
else
{
include_once $_SERVER['DOCUMENT_ROOT'] . '/securimage/securimage.php';
$securimage = new Securimage();
if ($securimage->check($_POST['captcha_code']) == false)
{
echo "The security code entered was incorrect.";
include 'form.html.php';
exit();
}
$link = mysqli_connect('localhost', 'guest', 'qwer');
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$feedback = str_replace("\'", "\'\'", $_POST['feedback']); //single quotes must be handled correctly
$feedback = str_replace("\"", "\"\"", $feedback);//double quotes must be handled correctly
//$feedback = mysqli_real_escape_string($link, $_POST['feedback']);//doesn't read back properly
$feedback = substr($feedback,0,8000); //don't allow user to post a longer string than 8k (also limited in form)
$sql = 'INSERT INTO feedback (message) VALUES ("'.$feedback.'")';
if (!mysqli_query($link, $sql))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//Send thank you message which includes feedback
$output = htmlspecialchars($_POST['feedback'], ENT_QUOTES, 'UTF-8');
include 'submit.html.php';
}
?>

36
html/feedback/submit.html.php Executable file
View file

@ -0,0 +1,36 @@
<!DOCTYPE html>
<html>
<head>
<title>Completed Feedback Submission</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<LINK REL=STYLESHEET HREF="/styles.css" TYPE="text/css">
<style>
pre {
white-space: pre-wrap;
white-space: -moz-pre-wrap;
white-space: -pre-wrap;
white-space: -o-pre-wrap;
word-wrap: break-word;
}
</style>
</head>
<body>
<p>
Thank you for submitting feedback:<blockquote><br>
<pre><?php echo $output; ?></pre>
</blockquote><br><br>
<a href="/about/">Return to About page</a>
<br><br>
<a href="/">Return to Wiby</a>
</p>
</body>
</html>

45
html/form.html.php Executable file
View file

@ -0,0 +1,45 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Title</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<meta name="description" content="Set your description"/>
<link rel="stylesheet" href="/styles.css" type="text/css">
<link rel="search" type="application/opensearchdescription+xml" title="Search Title" href="/opensearch.xml">
</head>
<body>
<div align="right"><a class="tiny" href="/submit/">Submit a page here!</a><br></div>
<div align="right"><a class="tiny" href="/settings/">Settings</a></div>
<div align="center"><h1 class="titlep">name</h1><br></div>
<form method="get">
<div align="center">
<input type="text" name="q" id="q" size="45" role="form" aria-label="Main search form" autofocus/>
<input type="submit" value="Search"/>
</div><br>
<div align="center"><a class="tiny" href="/surprise/">surprise me...</a></div>
</form>
<div align="center">
<div style="width:400px;text-align: left;">
<pre role="image" aria-label="Ascii art of a lighthouse overlooking the sea.">
.n. |
/___\ _.---. \ _ /
[|||] (_._ ) )--;_) =-
[___] '---'.__,' \
}-=-{ |
|-" |
|.-"| p
~^=~^~-|_.-|~^-~^~ ~^~ -^~^~|\ ~^-~^~-
^ .=.| _.|__ ^ ~ /| \
~ /:. \" _|_/\ ~ /_|__\ ^
.-/::. | |""|-._ ^ ~~~~
`===-'-----'""` '-. ~
jgs __.-' ^
</pre>
</div>
<br><a class="pin1" href="/about/pp.html">Privacy</a> | <a class="pin1" href="/about/">About</a>
</div>
</body>
</html>

25
html/grave/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

195
html/grave/graveyard.php Executable file
View file

@ -0,0 +1,195 @@
<?php
session_start();
if($_SESSION["authenticated"]!=true)
{
include 'index.php';
exit();
}
if (isset($_POST['startid']) && $_SESSION["loadgraveyard"]==false)
{
$startID = $_POST['startid'];
$endID = $_POST['endid'];
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$lim = 100; //note, setting this too high will cause PHP Warning: Unknown: Input variables exceeded 1000
if (isset($_POST['startid']) && $_SESSION["loadgraveyard"]==false) //this is incase any new submissions are made during the review process, they will be ignored
{
$result = mysqli_query($link,"SELECT * FROM graveyard WHERE id >= $startID AND id <= $endID");
if(!$result)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
else
{
//check graveyard for rows that are reserverd within reservetime. Do not select reserved rows. If reserved rows exceed 30mins, they can be reserved by different approver.
$result = mysqli_query($link,"SELECT * FROM graveyard WHERE reserved IS NULL OR reserved = '".$_SESSION["user"]."' OR reservetime < NOW() - INTERVAL 30 MINUTE LIMIT $lim");
if(!$result)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
//lets put contents of index into an array
while($row = mysqli_fetch_array($result))
{
$id[] = $row['id'];
$url[] = $row['url'];
$worksafe[] = $row['worksafe'];
}
if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_POST['startid']) && $_SESSION["loadgraveyard"]==false)
{ //store approved url list into indexqueue
$i=0;
$num_crawlers=1;//modify this variable to the number of crawlers you are using in parallel.
$crawler_id=1;
foreach($id as $pageid)
{
if($_POST["deny$pageid"] != 'on' && $_POST["skip$pageid"] != 'on')
{
$worksafe = mysqli_real_escape_string($link, $_POST["worksafe$pageid"]);
if($worksafe == 'on')
{
$worksafe = 1;
}
else
{
$worksafe = 0;
}
if($_POST["surprise$pageid"] == 'on')
{
$surprise = 1;
}
else
{
$surprise = 0;
}
if($_POST["forcerules$pageid"] == 'on')
{
$forcerules = 1;
}
else
{
$forcerules = 0;
}
if($_POST["crawlrepeat$pageid"] == 'on')
{
$crawlrepeat = 1;
}
else
{
$crawlrepeat = 0;
}
$updatable = $_POST["updatable$pageid"];
$crawldepth = $_POST["crawldepth$pageid"];
$crawlpages = $_POST["crawlpages$pageid"];
$crawltype = $_POST["crawltype$pageid"];
$sql = 'INSERT INTO indexqueue (url,worksafe,approver,surprise,updatable,crawl_depth,crawl_pages,crawl_type,force_rules,crawl_repeat,crawler_id) VALUES ("'.$url[$i].'","'.$worksafe.'","'.$_SESSION["user"].'","'.$surprise.'","'.$updatable.'","'.$crawldepth.'","'.$crawlpages.'","'.$crawltype.'","'.$forcerules.'","'.$crawlrepeat.'","'.$crawler_id.'")';
if (!mysqli_query($link, $sql))
{
$error = 'Error inserting into indexqueue: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
if($_POST["skip$pageid"] != 'on' || ($_POST["skip$pageid"] == 'on' && $_POST["deny$pageid"] == 'on'))
{
$result2 = mysqli_query($link,"DELETE FROM graveyard WHERE id = $pageid");
if(!$result2)
{
$error = 'Error deleting from graveyard: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$i++;
if($crawler_id == $num_crawlers){
$crawler_id=1;
}else{
$crawler_id++;
}
}
$_SESSION["loadgraveyard"]=true;
unset($id);
unset($url);
unset($worksafe);
unset($startID);
unset($endID);
unset($result);
$link -> close();
include 'graveyard.php';
//include 'refresh.html';
exit();
}
else
{
$_SESSION["loadgraveyard"]=false;
//insert approver into reserved, reservetime will autoupdate, so that they cannot be taken by a different approver for 30 mins.
foreach($id as $pageid)
{
$result = mysqli_query($link,"UPDATE graveyard SET reserved = '".$_SESSION["user"]."' WHERE id = $pageid");
if(!$result)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
//get total number of rows remaining in queue
$totalrows = mysqli_query($link,"select count(id) from graveyard");
if(!$totalrows)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//get result of total rows remaining in queue
while($row = mysqli_fetch_array($totalrows))
{
$queuesize = $row['count(id)'];
echo $queuesize . " pages queued in total.";
}
include 'graveyardqueue.html.php';
}
?>

View file

@ -0,0 +1,64 @@
<!DOCTYPE html>
<html>
<head>
<title>Graveyard</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
<style>
blockquote { width: 100%; }
</style>
</head>
<body>
<form action="graveyard.php" method="post">
<p>Some pages awaiting review:</p>
<?php $i=0; ?>
<?php foreach ($url as $pageurl): ?>
<blockquote><p>
<a class="tlink" href="<?php echo htmlspecialchars($pageurl, ENT_QUOTES, 'UTF-8'); ?>" style="font-size: 17px;" target="_blank"><?php echo htmlspecialchars($pageurl, ENT_QUOTES, 'UTF-8'); ?></a><br>
<?php if($worksafe[$i] == '1'): ?>
[Worksafe<input type="checkbox" id="worksafe<?php echo $id[$i] ?>" name="worksafe<?php echo $id[$i] ?>" checked="checked">]
<?php else: ?>
[Worksafe<input type="checkbox" id="worksafe<?php echo $id[$i] ?>" name="worksafe<?php echo $id[$i] ?>">]
<?php endif; ?>
[Surprise<input type="checkbox" id="surprise<?php echo $id[$i] ?>" name="surprise<?php echo $id[$i] ?>" >]
[Skip<input type="checkbox" id="skip<?php echo $id[$i] ?>" name="skip<?php echo $id[$i] ?>" checked="checked">]
[Deny<input type="checkbox" id="deny<?php echo $id[$i] ?>" name="deny<?php echo $id[$i] ?>" >]
[Updatable<select id="updatable<?php echo $id[$i] ?>" name="updatable<?php echo $id[$i] ?>">
<option value=1>1 WEEK</option>
<option value=2>1 DAY</option>
<option value=3>12 HOUR</option>
<option value=4>6 HOUR</option>
<option value=5>3 HOUR</option>
<option value=6>1 HOUR</option>
</select>]
[Crawl: Depth <input type="number" id="crawldepth<?php echo $id[$i] ?>" name="crawldepth<?php echo $id[$i] ?>" >
Pages <input type="number" id="crawlpages<?php echo $id[$i] ?>" name="crawlpages<?php echo $id[$i] ?>" >
Type <select id="crawltype<?php echo $id[$i] ?>" name="crawltype<?php echo $id[$i] ?>">
<option value=0>Local</option>
<option value=1>All</option>
<option value=2>External</option>
</select>
Enforce Rules<input type="checkbox" id="forcerules<?php echo $id[$i] ?>" name="forcerules<?php echo $id[$i] ?>" >
Repeat<input type="checkbox" id="crawlrepeat<?php echo $id[$i] ?>" name="crawlrepeat<?php echo $id[$i] ?>" >]
</p></blockquote>
<?php $i++; ?>
<?php endforeach; ?>
<br>
<?php $r=5; ?>
<div><input type="submit" id="submit" value="Submit"/></div>
<input type="hidden" name="startid" id="startid" value="<?php echo $id[0]; ?>">
<input type="hidden" name="endid" id="endid" value="<?php echo $id[$i-1]; ?>">
</form>
</body>
</html>

95
html/grave/index.php Executable file
View file

@ -0,0 +1,95 @@
<?php
session_start();
if ( !isset($_POST['pass']) || !isset($_POST['user']))
{
include 'login.html.php';
}
else if( $_POST['user'] == '' || $_POST['pass'] == '')
{
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
else
{
if(!isset($_SESSION["authenticated"]))
{
include_once $_SERVER['DOCUMENT_ROOT'] . '/securimage/securimage.php';
$securimage = new Securimage();
if ($securimage->check($_POST['captcha_code']) == false)
{
echo "The security code entered was incorrect.";
include 'login.html.php';
exit();
}
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
$user = mysqli_real_escape_string($link, $_POST['user']);
$pass = mysqli_real_escape_string($link, $_POST['pass']);
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$loginresult = mysqli_query($link,"SELECT hash, attempts FROM accounts WHERE name = '$user';");
if(!$loginresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//lets put contents of accounts into an array
while($rowaccounts = mysqli_fetch_array($loginresult))
{
$hash[] = $rowaccounts['hash'];
$attempts[] = $rowaccounts['attempts'];
}
if(password_verify($pass,$hash[0]) && $attempts[0] < 5)
{
if($attempts[0]>0)
{
if (!mysqli_query($link, "UPDATE accounts SET attempts = '0' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$_SESSION["authenticated"] = true;
$_SESSION["user"] = $user;
$_SESSION["loadgraveyard"]=true;
include 'graveyard.php';
exit();
}
else{
$attempt = $attempts[0] + 1;
if (!mysqli_query($link, "UPDATE accounts SET attempts = '$attempt' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
}
?>

31
html/grave/login.html.php Executable file
View file

@ -0,0 +1,31 @@
<!DOCTYPE html>
<?php session_start(); ?>
<html>
<head>
<title>wiby.me</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
</head>
<body>
<form method="post">
Username <input type="text" name="user" id="user"/><br>
Password <input type="password" name="pass" id="pass"/><br><br>
<?php if($_SESSION["authenticated"]!=true): ?>
<div>
<img id="captcha" src="/securimage/securimage_show.php" alt="CAPTCHA Image" />
</div>
<div>
<input type="text" name="captcha_code" size="10" maxlength="6" />
<a href="#" onclick="document.getElementById('captcha').src = '/securimage/securimage_show.php?' + Math.random(); return false">Reload Image</a>
</div>
<?php endif; ?>
<br><input type="submit" id="login" value="Login"/>
</form>
</body>
</html>

4
html/hash/hashmake.php Executable file
View file

@ -0,0 +1,4 @@
<?php
echo password_hash("secretpassword", PASSWORD_DEFAULT)."\n";
?>

450
html/index.php Executable file
View file

@ -0,0 +1,450 @@
<?php
session_start();
if (htmlspecialchars($_COOKIE['ws']) == "0")
{
$worksafe = false;
}else{
$worksafe = true;
}
if (htmlspecialchars($_COOKIE['hs']) == "1")
{
$filterHTTPS = true;
}else{
$filterHTTPS = false;
}
if (!isset($_REQUEST['q']))
{
include 'form.html.php';
}
else
{
$link = mysqli_connect('localhost', 'guest', 'qwer');
$query = mysqli_real_escape_string($link, $_GET['q']);
$lim = 12;
$starappend = 0;
$startID = 0;
$additions = "";
//Check if no query found
if($query == "")
{
include 'form.html.php';
exit();
}
//fix phone users putting space at end
if(strlen($query) > 1 && $query[strlen($query)-1]==" "){
$query = substr($query,0,strlen($query)-1);
}
//check if user wants to search a different search engine (!) or time window
if(($query[0] == "!" || $query[0] == "&") && strlen($query) > 3)
{
//separate actual query from search redirect
$actualquery = "";
$redirect = "";
if($query[2] == " "){
$redirect = substr($query, 1, 1);
for($i=3; $i<strlen($query);$i++){
$actualquery .= $query[$i];
}
}
if($query[3] == " "){
$redirect = substr($query, 1, 2);
for($i=4; $i<strlen($query);$i++){
$actualquery .= $query[$i];
}
}
//determine which search engine to redirect or which time window to use
if ($redirect == "g"){//if google
header('Location: '."http://google.com/search?q=$actualquery");
exit();
}else if ($redirect == "b"){//if bing
header('Location: '."http://bing.com/search?q=$actualquery");
exit();
}else if ($redirect == "gi"){//if google image search
header('Location: '."http://www.google.com/search?tbm=isch&q=$actualquery");
exit();
}else if ($redirect == "bi"){//if bing image search
header('Location: '."http://www.bing.com/images/search?q=$actualquery");
exit();
}else if ($redirect == "gv"){//if google video search
header('Location: '."http://www.google.com/search?tbm=vid&q=$actualquery");
exit();
}else if ($redirect == "bv"){//if bing video search
header('Location: '."http://www.bing.com/videos/search?q=$actualquery");
exit();
}else if ($redirect == "gm"){//if google maps search
header('Location: '."http://www.google.com/maps/search/$actualquery");
exit();
}else if ($redirect == "bm"){//if bing maps search
header('Location: '."http://www.bing.com/maps?q=$actualquery");
}else if ($redirect == "td"){
$additions = $additions."AND date > NOW() - INTERVAL 1 DAY ";
$query = $actualquery;
}else if ($redirect == "tw"){
$additions = $additions."AND date > NOW() - INTERVAL 7 DAY ";
$query = $actualquery;
}else if ($redirect == "tm"){
$additions = $additions."AND date > NOW() - INTERVAL 30 DAY ";
$query = $actualquery;
}else if ($redirect == "ty"){
$additions = $additions."AND date > NOW() - INTERVAL 365 DAY ";
$query = $actualquery;
}else{
header('Location: '."/?q=$actualquery");
exit();
}
}
//check if user wants to limit search to a specific website
if(strlen($query) > 5 && strcasecmp(substr($query, 0, 5),"site:")==0){
//remove 'site:'
$query = substr($query, 5, strlen($query)-5);
//get site:
$site = strstr($query, ' ', true);
//now just get the search query
$query = strstr($query, ' ', false);
$query = substr($query, 1, strlen($query)-1);
//add to additions
$additions = $additions."AND url LIKE '%".$site."%' ";
}
if (!isset($_REQUEST['o']))
{
$offset=0;
}
else
{
$offset = mysqli_real_escape_string($link, $_GET['o']);
}
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8mb4'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
//Check if query is a url (contains http:// or https:// and no spaces). If so, put quotations around to to get an exact match
$urlDetected = 0;
//if(strpos($query, ' ') == false && strpos($query,'.') == true && strpos($query,'"') == false && preg_match('/http/',$query) == true)
if(strpos($query, ' ') == false && strpos($query,'.') == true && strpos($query,'"') == false)//note this will flag on file extensions also
{
$queryOriginal = $query;
$query = '"' . $query . '"';
$urlDetected = 1;
}
//did user manually set -https instead of settings cookie?
if(substr($query,-7) == " -https"){
$filterHTTPS = true;
$query = substr($query, 0,-7);
}
//if query is just 1 or 2 letters, help make it work. Also CIA :D
if(strlen($query) < 3 || $query == "cia" || $query == "CIA"){
$query = " ".$query." *";
}
$queryNoQuotes = $query;
//Are there quotes in the query?
$exactMatch = false;
if(strpos($queryNoQuotes,'"') !== false)
{
$exactMatch = true;
$queryNoQuotes = $query;
}
//alright then lets remove the quotes
if($exactMatch == true)
{
while(strpos($queryNoQuotes,'"') !== false)
{
$queryNoQuotes = str_replace('"', "",$queryNoQuotes);
}
}
//first remove any flags inside queryNoQuotes, also grab any required words (+ prefix)
$queryNoQuotesOrFlags = '';
$requiredword = '';
if(strpos($queryNoQuotes,'+') !== false || strpos($queryNoQuotes,'-') !== false){
$words = explode(' ', $queryNoQuotes);
$i = 0;
foreach ($words as $word) {
if($i != 0 && $word[0] != '-' && $word[0] != '+'){
$queryNoQuotesOrFlags .= ' ';
}
if ($word[0] != '-' && $word[0] != '+'){
$queryNoQuotesOrFlags .= $word;
}
if ($word[0] == '+' && strlen($word) > 1){
$requiredword = substr($word,1);
}
$i++;
}
}
//remove the '*' if contained anywhere in queryNoQuotes
if(strpos($queryNoQuotes,'*') !== false && $exactMatch == false){
$queryNoQuotes = str_replace('*', "",$queryNoQuotes);
}
$queryNoQuotes_SQLsafe = mysqli_real_escape_string($link, $queryNoQuotes);
if($exactMatch == false)
{
//find longest word in query
$words = explode(' ', $queryNoQuotes);
$longestWordLength = 0;
$longestWord = '';
$wordcount = 0;
$longestwordelementnum = 0;
foreach ($words as $word) {
if (strlen($word) > $longestWordLength) {
$longestWordLength = strlen($word);
$longestWord = $word;
$longestwordelementnum = $wordcount;
}
$wordcount++;
}
}
//Check if query contains a hyphenated word. MySQL doesn't handle them smartly. We will wrap quotes around hyphenated words that aren't part of a string which is already wraped in quotes.
if((strpos($queryNoQuotes,'-') !== false || strpos($queryNoQuotes,'+') !== false) && $urlDetected == false){
if($query == "c++" || $query == "C++"){//shitty but works
$query = "c++ programming";
}
$hyphenwords = explode(' ',$query);
$query = '';
$quotes = 0;
$i = 0;
foreach ($hyphenwords as $word) {
if(strpos($queryNoQuotes,'"') !== false){
$quotes++;
}
if(((strpos($queryNoQuotes,'-') !== false && $word[0] != '-') || (strpos($queryNoQuotes,'+') !== false && $word[0] != '+')) && $quotes%2 == 0){//if hyphen exists, not a flag, not wrapped in quotes already
$word = '"' . $word . '"';
}
if($i > 0){
$query .= ' ';
}
$query .= $word;
$i++;
}
}
if($filterHTTPS == true){
$additions = $additions."AND http = '1' ";
}
if($worksafe == true){
$additions = $additions."AND worksafe = '1' ";
}
//perform full text search FOR InnoDB or MyISAM STORAGE ENGINE
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$query' IN BOOLEAN MODE) AND enable = '1' $additions ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN Match(title) AGAINST('$query' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
/*if(!$outputFTS)//dont error out yet, will give another try below
{
$error = 'Error ' . mysqli_error($link);
include 'error.html.php';
exit();
}*/
if($urlDetected == 1)
{
$query = $queryOriginal;
}
//perform full text search with * appended
if(mysqli_num_rows($outputFTS) == 0 && $offset == 0 && $urlDetected == 0 && $exactMatch == false)
{
$starappend = 1;
$querystar = $query;
//innodb will get fussy over some things if put in like '''' or ****, uncomment below lines if using innoDB
$querystar = str_replace('*', "",$querystar);
$querystar = str_replace('"', "",$querystar);
$querystar = str_replace('"', "",$querystar);
$querystar = str_replace('\'', "",$querystar);
//-----------------------------------------------
$querystar = $querystar . '*';
//perform full text search FOR InnoDB or MyISAM STORAGE ENGINE
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$querystar' IN BOOLEAN MODE) AND enable = '1' $additions ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
if(!$outputFTS)
{
$error = 'Error ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$count = 0;
$query = $_GET['q'];
//this will get set if position of longest word of query is found within body
$pos = -1;
//lets put contents of the full text search into the array
while($row = mysqli_fetch_array($outputFTS))
{
//put the contents of the URL column within the DB into an array
$id[] = $row[0];
$url[] = $row[1];
$title[] = substr($row[2],0,150);
$description[] = substr($row[3],0,180);
$body = $row[4];
$count++;
$lastID = $row[0];
if($exactMatch == false)
{
//remove the '*' at the end of the longest word if present
if(strpos($longestWord,'*') == true)
{
$longestWord = str_replace('*', "",$longestWord);
}
//first find an exact
if(strlen($requiredword) > 0){
$pos = stripos($body, $requiredword);
}else{
$pos = stripos($body, $queryNoQuotes);
}
//search within body for position of longest query word. If not found, try another word
if($pos == false){
$pos = stripos($body, $longestWord);
if($pos == false && $wordcount > 1)
{
if($longestwordelementnum > 0)
{
if(strpos($words[0],'*') == true)//remove the '*' at the end of the query if present
$words[0] = str_replace('*', "",$words[0]);
$pos = stripos($body, $words[0]);
}
else if($longestwordelementnum == 0)
{
if(strpos($words[1],'*') == true)//remove the '*' at the end of the query if present
$words[1] = str_replace('*', "",$words[1]);
$pos = stripos($body, $words[1]);
}
}
}
}
else
{
$pos = stripos($body, $queryNoQuotes);
}
//still not found?, set position to 0
if($pos == false){
$pos = 0;
}
//get all positions of all keywords in body
/* $lastPos = 0;
$positions = array();
foreach($words as $word)
{
while (($lastPos = mb_strpos($body, $word, $lastPos))!== false) {
$positions[$word][] = $lastPos;
$lastPos = $lastPos + strlen($word);
}
}*/
//figure out how much preceding text to use
if($pos < 32)
$starttext = 0;
else if($pos > 25)
$starttext = $pos - 25;
else if($pos > 20)
$starttext = $pos - 15;
//else $starttext = 0;
//total length of the ballpark
$textlength = 180;
//populate the ballpark
if($pos >= 0)
{
$ballparktext = substr($body,$starttext,$textlength);
}
else $ballpark = '0';
//find position of nearest Period
$foundPeriod = true;
$posPeriod = stripos($ballparktext, '. ') + $starttext +1;
//find position of nearest Space
$foundSpace = true;
$posSpace = stripos($ballparktext, ' ') + $starttext;
//if longest word in query is after a period+space within ballpark, reset $starttext to that point
if($pos-$starttext > $posPeriod)
{
$starttext = $posPeriod;
//populate the bodymatch
if($pos-$starttext >= 0)
{
$bodymatch[] = substr($body,$starttext,$textlength);
}
else $bodymatch[] = '';
}
//else if($pos-starttext > $posSpace)//else if longest word in query is after a space within ballpark, reset $starttext to that point
else if($pos > $posSpace)//else if longest word in query is after a space within ballpark, reset $starttext to that point
{
$starttext = $posSpace;
//populate the bodymatch
if($pos-$starttext >= 0)
{
$bodymatch[] = substr($body,$starttext,$textlength);
}
else $bodymatch[] = '';
}
else //else just set the bodymatch to the ballparktext
{
//populate the bodymatch
if($pos-$starttext >= 0)
{
$bodymatch[] = $ballparktext;
}
else $bodymatch[] = '';
}
}
$row = null;
$totalcount = $count + $offset;
include 'results.html.php';
}
?>

25
html/insert/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

91
html/insert/form.html.php Executable file
View file

@ -0,0 +1,91 @@
<!DOCTYPE html>
<html>
<head>
<title>Add to Index</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
<style type="text/css">
textarea {
display: block;
width: 100%;
}
</style>
</head>
<body>
<form action="insert.php" method="post">
<div>
<label for="url">url:</label>
<textarea id="url" name="url" ></textarea>
</div>
<div>
<label for="title">title:</label>
<textarea id="title" name="title"></textarea>
</div>
<div>
<label for="tags">tags:</label>
<textarea id="tags" name="tags"></textarea>
</div>
<div>
<label for="description">description :</label>
<textarea id="description" name="description"></textarea>
</div>
<div>
<label for="body">body:</label>
<textarea id="body" name="body"></textarea>
</div>
<div>
<label for="http">http (1 or 0):</label>
<textarea id="http" name="http"></textarea>
</div>
<div>
<label for="surprise">surprise (1 or 0):</label>
<textarea id="surprise" name="surprise"></textarea>
</div>
<div>
<label for="worksafe">worksafe (1 or 0): </label>
<textarea id="worksafe" name="worksafe"></textarea>
</div>
<div>
<label for="enable">enable (1 or 0):</label>
<textarea id="enable" name="enable"></textarea>
</div>
<div>
<label for="updatable">updatable (0 for no, or integer value 1-6, 1 is default):</label>
<textarea id="updatable" name="updatable"></textarea>
</div>
<div><input type="submit" value="Submit"/></div>
</form>
</body>
</html>

94
html/insert/index.php Executable file
View file

@ -0,0 +1,94 @@
<?php
session_start();
if ( !isset($_POST['pass']) || !isset($_POST['user']))
{
include 'login.html.php';
}
else if( $_POST['user'] == '' || $_POST['pass'] == '')
{
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
else
{
if(!isset($_SESSION["authenticated"]))
{
include_once $_SERVER['DOCUMENT_ROOT'] . '/securimage/securimage.php';
$securimage = new Securimage();
if ($securimage->check($_POST['captcha_code']) == false)
{
echo "The security code entered was incorrect.";
include 'login.html.php';
exit();
}
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
$user = mysqli_real_escape_string($link, $_POST['user']);
$pass = mysqli_real_escape_string($link, $_POST['pass']);
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$loginresult = mysqli_query($link,"SELECT hash, attempts FROM accounts WHERE name = '$user';");
if(!$loginresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//lets put contents of accounts into an array
while($rowaccounts = mysqli_fetch_array($loginresult))
{
$hash[] = $rowaccounts['hash'];
$attempts[] = $rowaccounts['attempts'];
}
if(password_verify($pass,$hash[0]) && $attempts[0] < 5)
{
if($attempts[0]>0)
{
if (!mysqli_query($link, "UPDATE accounts SET attempts = '0' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$_SESSION["authenticated"] = true;
$_SESSION["user"] = $user;
include 'insert.php';
exit();
}
else{
$attempt = $attempts[0] + 1;
if (!mysqli_query($link, "UPDATE accounts SET attempts = '$attempt' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
}
?>

25
html/insert/insert.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>Form Example</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $output; ?>
</p>
</body>
</html>

102
html/insert/insert.php Executable file
View file

@ -0,0 +1,102 @@
<?php
session_start();
if($_SESSION["authenticated"]!=true)
{
include 'index.php';
exit();
}
if (!isset($_POST['url']))
{
include 'form.html.php';
}
else
{
$link = mysqli_connect('localhost', 'crawler', 'seekout');
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
// $url = mysqli_real_escape_string($link, $_POST['url']);
$url = str_replace("\'", "\'\'", $_POST['url']);
$url = str_replace("\"", "\"\"", $url);
// $title = mysqli_real_escape_string($link, $_POST['title']);
$title = str_replace("\'", "\'\'", $_POST['title']);
$title = str_replace("\"", "\"\"", $title);
// $tags = mysqli_real_escape_string($link, $_POST['tags']);
$tags = str_replace("\'", "\'\'", $_POST['tags']);
$tags = str_replace("\"", "\"\"", $tags);
// $description = mysqli_real_escape_string($link, $_POST['description']);
$description = str_replace("\'", "\'\'", $_POST['description']);
$description = str_replace("\"", "\"\"", $description);
// $body = mysqli_real_escape_string($link, $_POST['body']);
$body = str_replace("\'", "\'\'", $_POST['body']);
$body = str_replace("\"", "\"\"", $body);
// $http = mysqli_real_escape_string($link, $_POST['http']);
$http = str_replace("\'", "\'\'", $_POST['http']);
$http = str_replace("\"", "\"\"", $http);
// $surprise = mysqli_real_escape_string($link, $_POST['surprise']);
$surprise = str_replace("\'", "\'\'", $_POST['surprise']);
$surprise = str_replace("\"", "\"\"", $surprise);
// $worksafe = mysqli_real_escape_string($link, $_POST['worksafe']);
$worksafe = str_replace("\'", "\'\'", $_POST['worksafe']);
$worksafe = str_replace("\"", "\"\"", $worksafe);
// $enable = mysqli_real_escape_string($link, $_POST['enable']);
$enable = str_replace("\'", "\'\'", $_POST['enable']);
$enable = str_replace("\"", "\"\"", $enable);
// $updatable = mysqli_real_escape_string($link, $_POST['updatable']);
$updatable = str_replace("\'", "\'\'", $_POST['updatable']);
$updatable = str_replace("\"", "\"\"", $updatable);
$sql = 'INSERT INTO windex (url,title,tags,description,body,http,surprise,worksafe,enable,updatable,approver)
VALUES ("'.$url.'","'.$title.'","'.$tags.'","'.$description.'","'.$body.'","'.$http.'","'.$surprise.'","'.$worksafe.'","'.$enable.'","'.$updatable.'","'.$_SESSION["user"].'")';
if (!mysqli_query($link, $sql))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
$output = 'No errors '.
$url . ' ' .
$title . ' ' .
$tags . ' ' .
$description . ' ' .
$body . ' ' .
$http . ' ' .
$surprise . ' ' .
$worksafe . ' ' .
$enable . ' ' .
$updatable;
include 'insert.html.php';
}
?>

31
html/insert/login.html.php Executable file
View file

@ -0,0 +1,31 @@
<!DOCTYPE html>
<?php session_start(); ?>
<html>
<head>
<title>wiby.me</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="../styles.css">
</head>
<body>
<form method="post">
Username <input type="text" name="user" id="user"/><br>
Password <input type="password" name="pass" id="pass"/><br><br>
<?php if($_SESSION["authenticated"]!=true): ?>
<div>
<img id="captcha" src="/securimage/securimage_show.php" alt="CAPTCHA Image" />
</div>
<div>
<input type="text" name="captcha_code" size="10" maxlength="6" />
<a href="#" onclick="document.getElementById('captcha').src = '/securimage/securimage_show.php?' + Math.random(); return false">Reload Image</a>
</div>
<?php endif; ?>
<br><input type="submit" id="login" value="Login"/>
</form>
</body>
</html>

25
html/json/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

19
html/json/form.html.php Executable file
View file

@ -0,0 +1,19 @@
<!DOCTYPE html>
<html>
<head>
<title>JSON API</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<meta name="description" content="Using the JSON API"/>
</head>
<body>
<h1>Using JSON API</h1>
<p>Use https://domain/json/ to get a JSON output of search results.<br><br>
Example: <a href="https://domain/json/?q=test">https://domain/json/?q=test</a> outputs results for the query 'test'.<br><br>
Append the parameter &o=NUM to get the next page of results.<br>
To determine the value of NUM, look for the presence of <b>NextOffset</b> at the end of the JSON data.<br>
Example: <a href="https://domain/json/?q=test&o=12">https://domain/json/?q=test&o=12</a>
<br><br><b>Terms of Use:
<br>1. Set terms here.
</b></p>
</body>
</html>

460
html/json/index.php Executable file
View file

@ -0,0 +1,460 @@
<?php
session_start();
if (htmlspecialchars($_COOKIE['ws']) == "0")
{
$worksafe = false;
}else{
$worksafe = true;
}
if (htmlspecialchars($_COOKIE['hs']) == "1")
{
$filterHTTPS = true;
}else{
$filterHTTPS = false;
}
if (!isset($_REQUEST['q']))
{
include 'form.html.php';
}
else
{
$link = mysqli_connect('localhost', 'guest', 'qwer');
$query = mysqli_real_escape_string($link, $_GET['q']);
$lim = 12;
$starappend = 0;
$startID = 0;
if (!isset($_REQUEST['o']))
{
$offset=0;
}
else
{
$offset = mysqli_real_escape_string($link, $_GET['o']);
}
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8mb4'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
//perform exact search
/* if($_SESSION["worksafe"]==true)
{
$output = mysqli_query($link,"SELECT id, url, title, description, body, FROM windex WHERE body LIKE '%$query%' AND enable = '1' AND worksafe = '1' AND id > $lastID OR description LIKE '%$query%' AND enable = '1' AND worksafe = '1' AND id > $lastID OR title LIKE '%$query%' AND enable = '1' AND worksafe = '1' AND id > $lastID OR url LIKE '%$query%' AND enable = '1' AND worksafe = '1' AND id > $lastID DESC LIMIT $lim");
}
else
{
$output = mysqli_query($link,"SELECT id, url, title, description, body, FROM windex WHERE body LIKE '%$query%' AND enable = '1' AND id > $lastID OR description LIKE '%$query%' AND enable = '1' AND id > $lastID OR title LIKE '%$query%' AND enable = '1' AND id > $lastID OR url LIKE '%$query%' AND enable = '1' AND id > $lastID DESC LIMIT $lim");
}
if(!$output)
{
$error = 'Error ' . mysqli_error($link);
include 'error.html.php';
exit();
}*/
//Check if query is a url (contains http:// or https:// and no spaces). If so, put quotations around to to get an exact match
$urlDetected = 0;
if(strpos($query, ' ') == false && strpos($query,'.') == true && strpos($query,'"') == false && preg_match('/http/',$query) == true)
{
$queryOriginal = $query;
$query = '"' . $query . '"';
$urlDetected = 1;
}
//it was made safe for sql, now put it back to the way it was and use htmlspecialchars on results page
$query = $_GET['q'];
//did user manually set -https instead of settings cookie?
if(substr($query,-7) == " -https"){
$filterHTTPS = true;
$query = substr($query, 0,-7);
}
$queryNoQuotes = $query;
//Are there quotes in the query?
$exactMatch = false;
if(strpos($queryNoQuotes,'"') !== false)
{
$exactMatch = true;
$queryNoQuotes = $query;
}
//alright then lets remove the quotes
if($exactMatch == true)
{
while(strpos($queryNoQuotes,'"') !== false)
{
$queryNoQuotes = str_replace('"', "",$queryNoQuotes);
}
}
//first remove any flags inside queryNoQuotes, also grab any required words (+ prefix)
$queryNoQuotesOrFlags = '';
$requiredword = '';
if(strpos($queryNoQuotes,'+') !== false || strpos($queryNoQuotes,'-') !== false){
$words = explode(' ', $queryNoQuotes);
$i = 0;
foreach ($words as $word) {
if($i != 0 && $word[0] != '-' && $word[0] != '+'){
$queryNoQuotesOrFlags .= ' ';
}
if ($word[0] != '-' && $word[0] != '+'){
$queryNoQuotesOrFlags .= $word;
}
if ($word[0] == '+' && strlen($word) > 1){
$requiredword = substr($word,1);
}
$i++;
}
}
//remove the '*' if contained anywhere in queryNoQuotes
if(strpos($queryNoQuotes,'*') !== false && $exactMatch == false){
$queryNoQuotes = str_replace('*', "",$queryNoQuotes);
}
$queryNoQuotes_SQLsafe = mysqli_real_escape_string($link, $queryNoQuotes);
if($exactMatch == false)
{
//find longest word in query
$words = explode(' ', $queryNoQuotes);
$longestWordLength = 0;
$longestWord = '';
$wordcount = 0;
$longestwordelementnum = 0;
foreach ($words as $word) {
if (strlen($word) > $longestWordLength) {
$longestWordLength = strlen($word);
$longestWord = $word;
$longestwordelementnum = $wordcount;
}
$wordcount++;
}
}
//Check if query contains a hyphenated word. MySQL doesn't handle them smartly. We will wrap quotes around hyphenated words that aren't part of a string which is already wraped in quotes.
if((strpos($queryNoQuotes,'-') !== false || strpos($queryNoQuotes,'+') !== false) && $urlDetected == false){
if($query == "c++" || $query == "C++"){//shitty but works
$query = "c++ programming";
}
$hyphenwords = explode(' ',$query);
$query = '';
$quotes = 0;
$i = 0;
foreach ($hyphenwords as $word) {
if(strpos($queryNoQuotes,'"') !== false){
$quotes++;
}
if(((strpos($queryNoQuotes,'-') !== false && $word[0] != '-') || (strpos($queryNoQuotes,'+') !== false && $word[0] != '+')) && $quotes%2 == 0){//if hyphen exists, not a flag, not wrapped in quotes already
$word = '"' . $word . '"';
}
if($i > 0){
$query .= ' ';
}
$query .= $word;
$i++;
}
}
//make query safe for sql again
$query = mysqli_real_escape_string($link, $query);
//perform full text search FOR InnoDB STORAGE ENGINE ONLY! DO NOT USE FOR MyISAM
if($filterHTTPS == false){
if($worksafe == true)
{
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$query' IN BOOLEAN MODE) AND enable = '1' AND worksafe = '1' ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN Match(title) AGAINST('$query' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
}
else
{
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$query' IN BOOLEAN MODE) AND enable = '1' ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN Match(title) AGAINST('$query' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
}
}
else
{
if($worksafe == true)
{
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$query' IN BOOLEAN MODE) AND enable = '1' AND worksafe = '1' AND http = '1' ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN Match(title) AGAINST('$query' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
}
else
{
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$query' IN BOOLEAN MODE) AND enable = '1' AND http = '1' ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$query' IN BOOLEAN MODE) THEN Match(title) AGAINST('$query' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
}
}
/*if(!$outputFTS)//dont error out yet, will give another try below
{
$error = 'Error ' . mysqli_error($link);
include 'error.html.php';
exit();
}*/
if($urlDetected == 1)
{
$query = $queryOriginal;
}
//perform full text search with * appended
if(mysqli_num_rows($outputFTS) == 0 && $offset == 0 && $urlDetected == 0 && $exactMatch == false)
{
$starappend = 1;
$querystar = $query;
$querystar = str_replace('*', "",$querystar);//innodb will get fussy over some things if put in like '''' or ****
$querystar = str_replace('"', "",$querystar);
$querystar = str_replace('"', "",$querystar);
$querystar = str_replace('\'', "",$querystar);
$querystar = $querystar . '*';
//perform full text search FOR InnoDB STORAGE ENGINE ONLY! DO NOT USE FOR MyISAM
if($filterHTTPS == false){
if($worksafe == true)
{
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$querystar' IN BOOLEAN MODE) AND enable = '1' AND worksafe = '1' ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
}
else
{
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$querystar' IN BOOLEAN MODE) AND enable = '1' ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
}
}
else
{
if($worksafe == true)
{
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$querystar' IN BOOLEAN MODE) AND enable = '1' AND worksafe = '1' AND http = '1' ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
}
else
{
$outputFTS = mysqli_query($link, "SELECT id, url, title, description, body FROM windex WHERE Match(tags, body, description, title, url) Against('$querystar' IN BOOLEAN MODE) AND enable = '1' AND http = '1' ORDER BY CASE WHEN LOCATE('$queryNoQuotes_SQLsafe', tags)>0 THEN 30 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 AND Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN 20 WHEN LOCATE('$queryNoQuotes_SQLsafe', title)>0 THEN 15 WHEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) THEN Match(title) AGAINST('$querystar' IN BOOLEAN MODE) WHEN LOCATE('$queryNoQuotes_SQLsafe', body)>0 THEN 14 END DESC LIMIT $lim OFFSET $offset");
}
}
if(!$outputFTS)
{
$error = 'Error ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$count = 0;
//it was made safe for sql, now put it back to the way it was and use htmlspecialchars on results page
$query = $_GET['q'];
//Are there quotes in the query?
$exactMatch = false;
if(preg_match('/"/',$query) == true)
{
$exactMatch = true;
$queryNoQuotes = $query;
}
//alright then lets remove the quotes
if($exactMatch == true)
{
while(preg_match('/"/',$queryNoQuotes) == true)
{
$queryNoQuotes = str_replace('"', "",$queryNoQuotes);
}
}
if($exactMatch == false)
{
//find longest word in query
$words = explode(' ', $query);
$longestWordLength = 0;
$longestWord = '';
$wordcount = 0;
$longestwordelementnum = 0;
foreach ($words as $word) {
if (strlen($word) > $longestWordLength) {
$longestWordLength = strlen($word);
$longestWord = $word;
$longestwordelementnum = $wordcount;
}
$wordcount++;
}
}
//this will get set if position of longest word of query is found within body
$pos = -1;
//lets put contents of the full text search into the array
while($row = mysqli_fetch_array($outputFTS))
{
//put the contents of the URL column within the DB into an array
$id[] = $row[0];
$url[] = $row[1];
$title[] = JSONRealEscapeString(substr($row[2],0,150));
$description[] = JSONRealEscapeString(substr($row[3],0,180));
$body = JSONRealEscapeString($row[4]);
$count++;
$lastID = $row[0];
if($exactMatch == false)
{
//remove the '*' at the end of the longest word if present
if(strpos($longestWord,'*') == true)
{
$longestWord = str_replace('*', "",$longestWord);
}
//first find an exact
if(strlen($requiredword) > 0){
$pos = stripos($body, $requiredword);
}else{
$pos = stripos($body, $queryNoQuotes);
}
//search within body for position of longest query word. If not found, try another word
if($pos == false){
$pos = stripos($body, $longestWord);
if($pos == false && $wordcount > 1)
{
if($longestwordelementnum > 0)
{
if(strpos($words[0],'*') == true)//remove the '*' at the end of the query if present
$words[0] = str_replace('*', "",$words[0]);
$pos = stripos($body, $words[0]);
}
else if($longestwordelementnum == 0)
{
if(strpos($words[1],'*') == true)//remove the '*' at the end of the query if present
$words[1] = str_replace('*', "",$words[1]);
$pos = stripos($body, $words[1]);
}
}
}
}
else
{
$pos = stripos($body, $queryNoQuotes);
}
//still not found?, set position to 0
if($pos == false){
$pos = 0;
}
//get all positions of all keywords in body
/* $lastPos = 0;
$positions = array();
foreach($words as $word)
{
while (($lastPos = mb_strpos($body, $word, $lastPos))!== false) {
$positions[$word][] = $lastPos;
$lastPos = $lastPos + strlen($word);
}
}*/
//figure out how much preceding text to use
if($pos < 32)
$starttext = 0;
else if($pos > 25)
$starttext = $pos - 25;
else if($pos > 20)
$starttext = $pos - 15;
//else $starttext = 0;
//total length of the ballpark
$textlength = 180;
//populate the ballpark
if($pos >= 0)
{
$ballparktext = substr($body,$starttext,$textlength);
}
else $ballpark = '0';
//find position of nearest Period
$foundPeriod = true;
$posPeriod = stripos($ballparktext, '. ') + $starttext +1;
//find position of nearest Space
$foundSpace = true;
$posSpace = stripos($ballparktext, ' ') + $starttext;
//if longest word in query is after a period+space within ballpark, reset $starttext to that point
if($pos-$starttext > $posPeriod)
{
$starttext = $posPeriod;
//populate the bodymatch
if($pos-$starttext >= 0)
{
$bodymatch[] = substr($body,$starttext,$textlength);
}
else $bodymatch[] = '';
}
//else if($pos-starttext > $posSpace)//else if longest word in query is after a space within ballpark, reset $starttext to that point
else if($pos > $posSpace)//else if longest word in query is after a space within ballpark, reset $starttext to that point
{
$starttext = $posSpace;
//populate the bodymatch
if($pos-$starttext >= 0)
{
$bodymatch[] = substr($body,$starttext,$textlength);
}
else $bodymatch[] = '';
}
else //else just set the bodymatch to the ballparktext
{
//populate the bodymatch
if($pos-$starttext >= 0)
{
$bodymatch[] = $ballparktext;
}
else $bodymatch[] = '';
}
}
$row = null;
$totalcount = $count + $offset;
//make safe for json
// replace := map[string]string{"\\":"\\\\", "\t":"\\t", "\b":"\\b", "\n":"\\n", "\r":"\\r", "\f":"\\f"/*, `"`:`\"`*/}
include 'results.json.php';
}
function JSONRealEscapeString($var){
$var = str_replace("\\","\\\\",$var);
$var = str_replace("\t","\\t",$var);
$var = str_replace("\b","\\b",$var);
$var = str_replace("\n","\\n",$var);
$var = str_replace("\r","\\r",$var);
$var = str_replace("\f","\\f",$var);
return $var;
}
?>

22
html/json/results.json.php Executable file
View file

@ -0,0 +1,22 @@
<?php header("Content-Type: application/json;charset=utf-8"); ?>
<?php $i=0; ?>[
<?php foreach ($url as $storedresult): ?>
<?php $title[$i] = htmlentities($title[$i], ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8'); ?>
<?php $bodymatch[$i] = htmlentities($bodymatch[$i], ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8'); ?>
<?php $description[$i] = htmlentities($description[$i], ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8'); ?>
<?php $title[$i] = str_replace("<","&lt;",$title[$i]); $title[$i] = str_replace(">","&gt;",$title[$i]); ?>
<?php $bodymatch[$i] = str_replace("<","&lt;",$bodymatch[$i]); $bodymatch[$i] = str_replace(">","&gt;",$bodymatch[$i]); ?>
<?php $description[$i] = str_replace("<","&lt;",$description[$i]); $description[$i] = str_replace(">","&gt;",$description[$i]); ?>
{
"URL": "<?php echo htmlspecialchars($storedresult, ENT_QUOTES, 'UTF-8'); ?>",
"Title": "<?php echo $title[$i]; ?>",
"Snippet": "<?php echo $bodymatch[$i]; ?>",
"Description": "<?php echo $description[$i]; $i++; ?>"
}<?php if ($i<sizeof($url)): ?>,
<?php endif ?><?php endforeach; ?><?php if($i >= $lim && $starappend == 0): ?>,
{
"NextOffset": "<?php echo $totalcount;?>"
}
<?php endif; ?>
]

14
html/opensearch.xml Executable file
View file

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" xmlns:moz="http://www.mozilla.org/2006/browser/search/">
<ShortName>Title</ShortName>
<Description>Enter your description</Description>
<Tags>web internet cyber interest subject</Tags>
<Contact>Put a contact if you want</Contact>
<Url type="text/html" template="https://wiby.me/?q={searchTerms}"/>
<LongName>Long title name</LongName>
<Image height="16" width="16" type="image/x-icon">URL_TO_FAVICON.ICO_FILE</Image>
<Query role="request" searchTerms="cats" />
<Language>en-us</Language>
<InputEncoding>UTF-8</InputEncoding>
<moz:SearchForm>URL_TO_YOUR_SEARCH_ENGINE</moz:SearchForm>
</OpenSearchDescription>

25
html/readf/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

110
html/readf/feedback.php Executable file
View file

@ -0,0 +1,110 @@
<?php
session_start();
if($_SESSION["level"]!="admin")
{
echo "Access denied.";
exit();
}
if($_SESSION["authenticated"]!=true)
{
include 'index.php';
exit();
}
if (isset($_POST['startid']) && $_SESSION["loadfeedback"]==false)
{
$startID = $_POST['startid'];
$endID = $_POST['endid'];
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$lim = 10000000000;
if (isset($_POST['startid']) && $_SESSION["loadfeedback"]==false) //this is incase any new submissions are made during the review process, they will be ignored
{
$result = mysqli_query($link,"SELECT * FROM feedback WHERE id >= $startID AND id <= $endID");
if(!$result)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
else
{
$result = mysqli_query($link,"SELECT * FROM feedback LIMIT $lim");
if(!$result)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
//lets put contents of index into an array
while($row = mysqli_fetch_array($result))
{
$id[] = $row['id'];
$message[] = $row['message'];
$time[] = $row['time'];
}
if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_POST['startid']) && $_SESSION["loadfeedback"]==false)
{ //remove selected feedback
$i=0;
foreach($id as $pageid)
{
if($_POST["drop$pageid"] == 'on')
{
$result2 = mysqli_query($link,"DELETE FROM feedback WHERE id = $pageid");
if(!$result2)
{
$error = 'Error deleting from feedback: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$i++;
}
$_SESSION["loadfeedback"]=true;
unset($id);
unset($message);
unset($time);
unset($startID);
unset($endID);
unset($result);
unset($result2);
$link -> close();
include 'feedback.php';
exit();
}
else
{
$_SESSION["loadfeedback"]=false;
include 'form.html.php';
}
?>

47
html/readf/form.html.php Executable file
View file

@ -0,0 +1,47 @@
<!DOCTYPE html>
<html>
<head>
<title>Feedback</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
<style>
pre {
white-space: pre-wrap;
white-space: -moz-pre-wrap;
white-space: -pre-wrap;
white-space: -o-pre-wrap;
word-wrap: break-word;
}
</style>
</head>
<body>
<form action="feedback.php" method="post">
<p>Some feedback awaiting review:</p><br><hr>
<?php $i=0; ?>
<?php foreach ($message as $submission): ?>
<blockquote><p>
<pre><?php echo htmlspecialchars($submission, ENT_QUOTES, 'UTF-8'); ?></pre>
Time: <?php echo htmlspecialchars($time[$i], ENT_QUOTES, 'UTF-8'); ?><br>
[Drop<input type="checkbox" id="drop<?php echo $id[$i] ?>" name="drop<?php echo $id[$i] ?>" >]
</p></blockquote><hr>
<?php $i++; ?>
<?php endforeach; ?>
<br>
<?php $r=5; ?>
<div><input type="submit" id="submit" value="Submit"/></div>
<input type="hidden" name="startid" id="startid" value="<?php echo $id[0]; ?>">
<input type="hidden" name="endid" id="endid" value="<?php echo $id[$i-1]; ?>">
</form>
</body>
</html>

96
html/readf/index.php Executable file
View file

@ -0,0 +1,96 @@
<?php
session_start();
if ( !isset($_POST['pass']) || !isset($_POST['user']))
{
include 'login.html.php';
}
else if( $_POST['user'] == '' || $_POST['pass'] == '')
{
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
else
{
if(!isset($_SESSION["authenticated"]))
{
include_once $_SERVER['DOCUMENT_ROOT'] . '/securimage/securimage.php';
$securimage = new Securimage();
if ($securimage->check($_POST['captcha_code']) == false)
{
echo "The security code entered was incorrect.";
include 'login.html.php';
exit();
}
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
$user = mysqli_real_escape_string($link, $_POST['user']);
$pass = mysqli_real_escape_string($link, $_POST['pass']);
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$loginresult = mysqli_query($link,"SELECT hash, attempts, level FROM accounts WHERE name = '$user';");
if(!$loginresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//lets put contents of accounts into an array
while($rowaccounts = mysqli_fetch_array($loginresult))
{
$hash[] = $rowaccounts['hash'];
$attempts[] = $rowaccounts['attempts'];
$level[] = $rowaccounts['level'];
}
if(password_verify($pass,$hash[0]) && $attempts[0] < 5 && $level[0] == "admin")
{
if($attempts[0]>0)
{
if (!mysqli_query($link, "UPDATE accounts SET attempts = '0' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$_SESSION["authenticated"] = true;
$_SESSION["user"] = $user;
$_SESSION["level"] = $level[0];
include 'feedback.php';
exit();
}
else{
$attempt = $attempts[0] + 1;
if (!mysqli_query($link, "UPDATE accounts SET attempts = '$attempt' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
}
?>

31
html/readf/login.html.php Executable file
View file

@ -0,0 +1,31 @@
<!DOCTYPE html>
<?php session_start(); ?>
<html>
<head>
<title>wiby.me</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
</head>
<body>
<form method="post">
Username <input type="text" name="user" id="user"/><br>
Password <input type="password" name="pass" id="pass"/><br><br>
<?php if($_SESSION["authenticated"]!=true): ?>
<div>
<img id="captcha" src="/securimage/securimage_show.php" alt="CAPTCHA Image" />
</div>
<div>
<input type="text" name="captcha_code" size="10" maxlength="6" />
<a href="#" onclick="document.getElementById('captcha').src = '/securimage/securimage_show.php?' + Math.random(); return false">Reload Image</a>
</div>
<?php endif; ?>
<br><input type="submit" id="login" value="Login"/>
</form>
</body>
</html>

41
html/results.html.php Executable file
View file

@ -0,0 +1,41 @@
<!DOCTYPE html>
<html>
<head>
<title>TITLE</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel=stylesheet href="/styles.css" type="text/css">
<link rel="search" type="application/opensearchdescription+xml" title="YOUR_TITLE" href="/opensearch.xml">
</head>
<body>
<form method="get">
<div style="float: left">
<a class="title" href="../">name</a>&nbsp;&nbsp;
<input type="text" size="35" name="q" id="q" value="<?php echo htmlspecialchars($query, ENT_QUOTES, 'UTF-8');?>" role="form" aria-label="Main search form"/>
<input type="submit" value="Search"/>
</div>
<div style="float: right"><a class="tiny" href="/settings/">Settings</a></div><br><br>
</form>
<?php $i=0; ?>
<p class="pin"><br></p>
<?php foreach ($url as $storedresult): ?>
<?php $title[$i] = html_entity_decode($title[$i], ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8'); ?>
<?php $bodymatch[$i] = html_entity_decode($bodymatch[$i], ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8'); ?>
<?php $description[$i] = html_entity_decode($description[$i], ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8'); ?>
<?php $title[$i] = str_replace("<","&lt;",$title[$i]); $title[$i] = str_replace(">","&gt;",$title[$i]); ?>
<?php $bodymatch[$i] = str_replace("<","&lt;",$bodymatch[$i]); $bodymatch[$i] = str_replace(">","&gt;",$bodymatch[$i]); ?>
<?php $description[$i] = str_replace("<","&lt;",$description[$i]); $description[$i] = str_replace(">","&gt;",$description[$i]); ?>
<blockquote><p>
<a class="tlink" href="<?php echo htmlspecialchars($storedresult, ENT_QUOTES, 'UTF-8'); ?>"><?php echo $title[$i]; ?></a> <br><p class="url"><?php echo htmlspecialchars($storedresult, ENT_QUOTES, 'UTF-8'); ?></p>
<?php echo $bodymatch[$i]; ?>
<br>
<?php echo $description[$i]; $i++; ?>
</blockquote>
<?php endforeach; ?>
<?php if($i >= $lim && $starappend == 0): ?>
<p class="pin"><blockquote></p><br><a class="tlink" href="/?q=<?php echo htmlspecialchars($query, ENT_QUOTES, 'UTF-8');?>&o=<?php echo $totalcount;?>">Find more...</a></blockquote>
<?php else: ?>
<blockquote><p class="pin"> <br>That's everything I could find.<br>Help make me smarter by <a class="pin1" href="/submit">submitting a page</a>.</p></blockquote>
<?php endif; ?>
</body>
</html>

25
html/review/error.html.php Executable file
View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>PHP Error Output</title>
<meta http-equiv="content-type"
content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<?php echo $error; ?>
</p>
</body>
</html>

95
html/review/index.php Executable file
View file

@ -0,0 +1,95 @@
<?php
session_start();
if ( !isset($_POST['pass']) || !isset($_POST['user']))
{
include 'login.html.php';
}
else if( $_POST['user'] == '' || $_POST['pass'] == '')
{
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
else
{
if(!isset($_SESSION["authenticated"]))
{
include_once $_SERVER['DOCUMENT_ROOT'] . '/securimage/securimage.php';
$securimage = new Securimage();
if ($securimage->check($_POST['captcha_code']) == false)
{
echo "The security code entered was incorrect.";
include 'login.html.php';
exit();
}
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
$user = mysqli_real_escape_string($link, $_POST['user']);
$pass = mysqli_real_escape_string($link, $_POST['pass']);
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$loginresult = mysqli_query($link,"SELECT hash, attempts FROM accounts WHERE name = '$user';");
if(!$loginresult)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//lets put contents of accounts into an array
while($rowaccounts = mysqli_fetch_array($loginresult))
{
$hash[] = $rowaccounts['hash'];
$attempts[] = $rowaccounts['attempts'];
}
if(password_verify($pass,$hash[0]) && $attempts[0] < 5)
{
if($attempts[0]>0)
{
if (!mysqli_query($link, "UPDATE accounts SET attempts = '0' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$_SESSION["authenticated"] = true;
$_SESSION["user"] = $user;
$_SESSION["loadreview"]=true;
include 'review.php';
exit();
}
else{
$attempt = $attempts[0] + 1;
if (!mysqli_query($link, "UPDATE accounts SET attempts = '$attempt' WHERE name = '$user';"))
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
echo "It doesn't look like you submitted a valid username or password.";
include 'login.html.php';
}
}
?>

31
html/review/login.html.php Executable file
View file

@ -0,0 +1,31 @@
<!DOCTYPE html>
<?php session_start(); ?>
<html>
<head>
<title>wiby.me</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
</head>
<body>
<form method="post">
Username <input type="text" name="user" id="user"/><br>
Password <input type="password" name="pass" id="pass"/><br><br>
<?php if($_SESSION["authenticated"]!=true): ?>
<div>
<img id="captcha" src="/securimage/securimage_show.php" alt="CAPTCHA Image" />
</div>
<div>
<input type="text" name="captcha_code" size="10" maxlength="6" />
<a href="#" onclick="document.getElementById('captcha').src = '/securimage/securimage_show.php?' + Math.random(); return false">Reload Image</a>
</div>
<?php endif; ?>
<br><input type="submit" id="login" value="Login"/>
</form>
</body>
</html>

239
html/review/review.php Executable file
View file

@ -0,0 +1,239 @@
<?php
session_start();
if($_SESSION["authenticated"]!=true)
{
include 'index.php';
exit();
}
if (isset($_POST['startid']) && $_SESSION["loadreview"]==false)
{
$startID = $_POST['startid'];
$endID = $_POST['endid'];
}
$link = mysqli_connect('localhost', 'approver', 'foobar');
if (!$link)
{
$error = 'Cant connect to database.';
include 'error.html.php';
exit();
}
if (!mysqli_set_charset($link, 'utf8'))
{
$error = 'Unable to set database connection encoding.';
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database.';
include 'error.html.php';
exit();
}
$lim = 10;
if (isset($_POST['startid']) && $_SESSION["loadreview"]==false) //this is incase any new submissions are made during the review process, they will be ignored
{
$result = mysqli_query($link,"SELECT * FROM reviewqueue WHERE id >= $startID AND id <= $endID");
if(!$result)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
else
{
//check reviewqueue table for rows that are reserverd within reservetime. Do not select reserved rows. If reserved rows exceed 30mins, they can be reserved by different approver.
$result = mysqli_query($link,"SELECT * FROM reviewqueue WHERE reserved IS NULL OR reserved = '".$_SESSION["user"]."' OR reservetime < NOW() - INTERVAL 30 MINUTE LIMIT $lim");
if(!$result)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
//lets put contents of reviewqueue into an array
while($row = mysqli_fetch_array($result))
{
$id[] = $row['id'];
$url[] = $row['url'];
$worksafe[] = $row['worksafe'];
}
if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_POST['startid']) && $_SESSION["loadreview"]==false)
{ //store approved url list into indexqueue
$i=0;
$num_crawlers=1;//modify this variable to the number of crawlers you are using in parallel.
$crawler_id=1;
foreach($id as $pageid)
{
if($_POST["deny$pageid"] != 'on' && $_POST["skip$pageid"] != 'on' && $_POST["bury$pageid"] != 'on')
{
$worksafe = mysqli_real_escape_string($link, $_POST["worksafe$pageid"]);
if($worksafe == 'on')
{
$worksafe = 1;
}
else
{
$worksafe = 0;
}
if($_POST["surprise$pageid"] == 'on')
{
$surprise = 1;
}
else
{
$surprise = 0;
}
if($_POST["forcerules$pageid"] == 'on')
{
$forcerules = 1;
}
else
{
$forcerules = 0;
}
if($_POST["crawlrepeat$pageid"] == 'on')
{
$crawlrepeat = 1;
}
else
{
$crawlrepeat = 0;
}
$updatable = $_POST["updatable$pageid"];
$crawldepth = $_POST["crawldepth$pageid"];
$crawlpages = $_POST["crawlpages$pageid"];
$crawltype = $_POST["crawltype$pageid"];
$sql = 'INSERT INTO indexqueue (url,worksafe,approver,surprise,updatable,crawl_depth,crawl_pages,crawl_type,force_rules,crawl_repeat,crawler_id) VALUES ("'.$url[$i].'","'.$worksafe.'","'.$_SESSION["user"].'","'.$surprise.'","'.$updatable.'","'.$crawldepth.'","'.$crawlpages.'","'.$crawltype.'","'.$forcerules.'","'.$crawlrepeat.'","'.$crawler_id.'")';
if (!mysqli_query($link, $sql))
{
$error = 'Error inserting into indexqueue: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
if($_POST["bury$pageid"] == 'on' && $_POST["skip$pageid"] != 'on' && $_POST["deny$pageid"] != 'on')
{
$worksafe = mysqli_real_escape_string($link, $_POST["worksafe$pageid"]);
if($worksafe == 'on')
{
$worksafe = 1;
}
else
{
$worksafe = 0;
}
$sql = 'INSERT INTO graveyard (url,worksafe) VALUES ("'.$url[$i].'","'.$worksafe.'")';
if (!mysqli_query($link, $sql))
{
$error = 'Error inserting into indexqueue: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
//put denied pages into wibytemp rejected table
if($_POST["bury$pageid"] != 'on' && $_POST["skip$pageid"] != 'on' && $_POST["deny$pageid"] == 'on')
{
if(!mysqli_select_db($link, 'wibytemp'))
{
$error = 'Unable to locate the database.'. mysqli_error($link);;
include 'error.html.php';
exit();
}
$sql = 'INSERT INTO rejected (url,user,date) VALUES ("'.$url[$i].'","'.$_SESSION["user"].'",now())';
if (!mysqli_query($link, $sql))
{
$error = 'Error inserting into indexqueue: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
if(!mysqli_select_db($link, 'wiby'))
{
$error = 'Unable to locate the database...';
include 'error.html.php';
exit();
}
}
if($_POST["skip$pageid"] != 'on')
{
$result2 = mysqli_query($link,"DELETE FROM reviewqueue WHERE id = $pageid");
if(!$result2)
{
$error = 'Error deleting from reviewqueue: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
$i++;
if($crawler_id == $num_crawlers){
$crawler_id = 1;
}else{
$crawler_id++;
}
}
$_SESSION["loadreview"]=true;
unset($id);
unset($url);
unset($worksafe);
unset($startID);
unset($endID);
unset($result);
$link -> close();
include 'review.php';
//include 'refresh.html';
exit();
}
else
{
$_SESSION["loadreview"]=false;
//insert approver into reserved, reservetime will autoupdate, so that they cannot be taken by a different approver for 30 mins.
foreach($id as $pageid)
{
$result = mysqli_query($link,"UPDATE reviewqueue SET reserved = '".$_SESSION["user"]."' WHERE id = $pageid");
if(!$result)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
}
//get total number of rows remaining in queue
$totalrows = mysqli_query($link,"select count(id) from reviewqueue");
if(!$totalrows)
{
$error = 'Error fetching index: ' . mysqli_error($link);
include 'error.html.php';
exit();
}
//get result of total rows remaining in queue
while($row = mysqli_fetch_array($totalrows))
{
$queuesize = $row['count(id)'];
echo $queuesize . " pages queued in total.";
}
include 'reviewqueue.html.php';
}
?>

View file

@ -0,0 +1,66 @@
<!DOCTYPE html>
<html>
<head>
<title>Awaiting Approval</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="/styles.css">
<style>
blockquote { width: 100%; }
input[type='number'] { width: 80px; }
</style>
</head>
<body>
<form action="review.php" method="post">
<p>Some pages awaiting review:</p>
<?php $i=0; ?>
<?php foreach ($url as $pageurl): ?>
<blockquote><p>
<a class="tlink" href="<?php echo htmlspecialchars($pageurl, ENT_QUOTES, 'UTF-8'); ?>" style="font-size: 17px;" target="_blank"><?php echo htmlspecialchars($pageurl, ENT_QUOTES, 'UTF-8'); ?></a><br>
<?php if($worksafe[$i] == '1'): ?>
[Worksafe<input type="checkbox" id="worksafe<?php echo $id[$i] ?>" name="worksafe<?php echo $id[$i] ?>" checked="checked">]
<?php else: ?>
[Worksafe<input type="checkbox" id="worksafe<?php echo $id[$i] ?>" name="worksafe<?php echo $id[$i] ?>">]
<?php endif; ?>
[Surprise<input type="checkbox" id="surprise<?php echo $id[$i] ?>" name="surprise<?php echo $id[$i] ?>">]
[Skip<input type="checkbox" id="skip<?php echo $id[$i] ?>" name="skip<?php echo $id[$i] ?>" >]
[Bury<input type="checkbox" id="bury<?php echo $id[$i] ?>" name="bury<?php echo $id[$i] ?>" >]
[Deny<input type="checkbox" id="deny<?php echo $id[$i] ?>" name="deny<?php echo $id[$i] ?>" >]
[Updatable<select id="updatable<?php echo $id[$i] ?>" name="updatable<?php echo $id[$i] ?>">
<option value=1>1 WEEK</option>
<option value=2>1 DAY</option>
<option value=3>12 HOUR</option>
<option value=4>6 HOUR</option>
<option value=5>3 HOUR</option>
<option value=6>1 HOUR</option>
</select>]
[Crawl: Depth <input type="number" id="crawldepth<?php echo $id[$i] ?>" name="crawldepth<?php echo $id[$i] ?>" >
Pages <input type="number" id="crawlpages<?php echo $id[$i] ?>" name="crawlpages<?php echo $id[$i] ?>" >
Type <select id="crawltype<?php echo $id[$i] ?>" name="crawltype<?php echo $id[$i] ?>">
<option value=0>Local</option>
<option value=1>All</option>
<option value=2>External</option>
</select>
Enforce Rules<input type="checkbox" id="forcerules<?php echo $id[$i] ?>" name="forcerules<?php echo $id[$i] ?>" >
Repeat<input type="checkbox" id="crawlrepeat<?php echo $id[$i] ?>" name="crawlrepeat<?php echo $id[$i] ?>" >]
</p></blockquote>
<?php $i++; ?>
<?php endforeach; ?>
<br>
<?php $r=5; ?>
<div><input type="submit" id="submit" value="Submit"/></div>
<input type="hidden" name="startid" id="startid" value="<?php echo $id[0]; ?>">
<input type="hidden" name="endid" id="endid" value="<?php echo $id[$i-1]; ?>">
</form>
</body>
</html>

BIN
html/securimage/AHGBold.ttf Executable file

Binary file not shown.

25
html/securimage/LICENSE.txt Executable file
View file

@ -0,0 +1,25 @@
COPYRIGHT:
Copyright (c) 2011 Drew Phillips
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

12
html/securimage/README.FONT.txt Executable file
View file

@ -0,0 +1,12 @@
AHGBold.ttf is used by Securimage under the following license:
Alte Haas Grotesk is a typeface that look like an helvetica printed in an old Muller-Brockmann Book.
These fonts are freeware and can be distributed as long as they are
together with this text file.
I would appreciate very much to see what you have done with it anyway.
yann le coroller
www.yannlecoroller.com
yann@lecoroller.com

244
html/securimage/README.md Executable file
View file

@ -0,0 +1,244 @@
## Name:
**Securimage** - A PHP class for creating captcha images and audio with many options.
## Version:
**3.6.7**
## Author:
Drew Phillips <drew@drew-phillips.com>
## Download:
The latest version can always be found at [phpcaptcha.org](https://www.phpcaptcha.org)
## Documentation:
Online documentation of the class, methods, and variables can be found
at http://www.phpcaptcha.org/Securimage_Docs/
## Requirements:
* PHP 5.4 or greater
* GD 2.0
* FreeType (Required, for TTF fonts)
* PDO (if using Sqlite, MySQL, or PostgreSQL)
## Synopsis:
**Within your HTML form**
<form method="post" action="">
.. form elements
<div>
<?php
require_once 'securimage.php';
echo Securimage::getCaptchaHtml();
?>
</div>
</form>
**Within your PHP form processor**
require_once 'securimage.php';
// Code Validation
$image = new Securimage();
if ($image->check($_POST['captcha_code']) == true) {
echo "Correct!";
} else {
echo "Sorry, wrong code.";
}
## Description:
What is **Securimage**?
Securimage is a PHP class that is used to generate and validate CAPTCHA images.
The classes uses an existing PHP session or creates its own if none is found to
store the CAPTCHA code. In addition, a database can be used instead of
session storage.
Variables within the class are used to control the style and display of the
image. The class uses TTF fonts and effects for strengthening the security of
the image.
It also creates audible codes which are played for visually impared users.
## UPGRADE NOTICE:
**3.6.3 and below:**
Securimage 3.6.4 fixed a XSS vulnerability in example_form.ajax.php. It is
recommended to upgrade to the latest version or delete example_form.ajax.php
from the securimage directory on your website.
**3.6.2 and above:**
If you are upgrading to 3.6.2 or greater *AND* are using database storage,
the table structure has changed in 3.6.2 adding an audio_data column for
storing audio files in the database in order to support HTTP range
requests. Delete your tables and have Securimage recreate them or see
the function createDatabaseTables() in securimage.php for the new structure
depending on which database backend you are using and alter the tables as
needed. If using SQLite, just overwrite your existing securimage.sq3 file
with the one from this release.
*If you are not using database tables for storage, ignore this notice.*
## Copyright:
Script
Copyright (c) 2018 Drew Phillips
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
## Licenses:
**WavFile.php**
The WavFile.php class used in Securimage by Drew Phillips and Paul Voegler
is used under the BSD License. See WavFile.php for details.
Many thanks to Paul Voegler (http://www.voegler.eu/) for contributing to
Securimage.
Script
---------------------------------------------------------------------------
**Flash code for Securimage**
Flash code created by Age Bosma & Mario Romero (animario@hotmail.com)
Many thanks for releasing this to the project!
---------------------------------------------------------------------------
**HKCaptcha**
Portions of Securimage contain code from Han-Kwang Nienhuys' PHP captcha
Han-Kwang Nienhuys' PHP captcha
Copyright June 2007
This copyright message and attribution must be preserved upon
modification. Redistribution under other licenses is expressly allowed.
Other licenses include GPL 2 or higher, BSD, and non-free licenses.
The original, unrestricted version can be obtained from
http://www.lagom.nl/linux/hkcaptcha/
---------------------------------------------------------------------------
**AHGBold.ttf**
AHGBold.ttf (AlteHaasGroteskBold.ttf) font was created by Yann Le Coroller
and is distributed as freeware.
Alte Haas Grotesk is a typeface that look like an helvetica printed in an
old Muller-Brockmann Book.
These fonts are freeware and can be distributed as long as they are
together with this text file.
I would appreciate very much to see what you have done with it anyway.
yann le coroller
www.yannlecoroller.com
yann@lecoroller.com
---------------------------------------------------------------------------
**PopForge Flash Library**
Portions of securimage_play.swf use the PopForge flash library for playing audio
/**
* Copyright(C) 2007 Andre Michelle and Joa Ebert
*
* PopForge is an ActionScript3 code sandbox developed by Andre Michelle
* and Joa Ebert
* http://sandbox.popforge.de
*
* PopforgeAS3Audio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* PopforgeAS3Audio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
--------------------------------------------------------------------------
**Graphics**
Some graphics used are from the Humility Icon Pack by WorLord
License: GNU/GPL (http://findicons.com/pack/1723/humility)
http://findicons.com/icon/192558/gnome_volume_control
http://findicons.com/icon/192562/gtk_refresh
--------------------------------------------------------------------------
**Background noise sound files are from SoundJay.com**
http://www.soundjay.com/tos.html
All sound effects on this website are created by us and protected under
the copyright laws, international treaty provisions and other applicable
laws. By downloading sounds, music or any material from this site implies
that you have read and accepted these terms and conditions:
Sound Effects
You are allowed to use the sounds free of charge and royalty free in your
projects (such as films, videos, games, presentations, animations, stage
plays, radio plays, audio books, apps) be it for commercial or
non-commercial purposes.
But you are NOT allowed to
- post the sounds (as sound effects or ringtones) on any website for
others to download, copy or use
- use them as a raw material to create sound effects or ringtones that
you will sell, distribute or offer for downloading
- sell, re-sell, license or re-license the sounds (as individual sound
effects or as a sound effects library) to anyone else
- claim the sounds as yours
- link directly to individual sound files
- distribute the sounds in apps or computer programs that are clearly
sound related in nature (such as sound machine, sound effect
generator, ringtone maker, funny sounds app, sound therapy app, etc.)
or in apps or computer programs that use the sounds as the program's
sound resource library for other people's use (such as animation
creator, digital book creator, song maker software, etc.). If you are
developing such computer programs, contact us for licensing options.
If you use the sound effects, please consider giving us a credit and
linking back to us but it's not required.

222
html/securimage/README.txt Executable file
View file

@ -0,0 +1,222 @@
NAME:
Securimage - A PHP class for creating captcha images and audio with many options.
VERSION:
3.6.7
AUTHOR:
Drew Phillips <drew@drew-phillips.com>
DOWNLOAD:
The latest version can always be
found at http://www.phpcaptcha.org
DOCUMENTATION:
Online documentation of the class, methods, and variables can
be found at http://www.phpcaptcha.org/Securimage_Docs/
REQUIREMENTS:
PHP 5.4 or greater
GD 2.0
FreeType (Required, for TTF fonts)
PDO (if using Sqlite, MySQL, or PostgreSQL)
SYNOPSIS:
require_once 'securimage.php';
**Within your HTML form**
<form method="post" action="">
.. form elements
<div>
<?php echo Securimage::getCaptchaHtml() ?>
</div>
</form>
**Within your PHP form processor**
// Code Validation
$image = new Securimage();
if ($image->check($_POST['captcha_code']) == true) {
echo "Correct!";
} else {
echo "Sorry, wrong code.";
}
DESCRIPTION:
What is Securimage?
Securimage is a PHP class that is used to generate and validate CAPTCHA
images.
The classes uses an existing PHP session or creates its own if
none is found to store the CAPTCHA code. In addition, a database can be
used instead of session storage.
Variables within the class are used to control the style and display of
the image. The class uses TTF fonts and effects for strengthening the
security of the image.
It also creates audible codes which are played for visually impared users.
UPGRADE NOTICE:
3.6.3 and below:
Securimage 3.6.4 fixed a XSS vulnerability in example_form.ajax.php. It is
recommended to upgrade to the latest version or delete example_form.ajax.php
from the securimage directory on your website.
3.6.2 and above:
If you are upgrading to 3.6.2 or greater AND are using database storage,
the table structure has changed in 3.6.2 adding an audio_data column for
storing audio files in the database in order to support HTTP range
requests. Delete your tables and have Securimage recreate them or see
the function createDatabaseTables() in securimage.php for the new structure
depending on which database backend you are using and alter the tables as
needed. If using SQLite, just overwrite your existing securimage.sq3 file
with the one from this release.
If you are not using database tables for storage, ignore this notice.
COPYRIGHT:
Copyright (c) 2018 Drew Phillips
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
LICENSES:
The WavFile.php class used in Securimage by Drew Phillips and Paul Voegler
is used under the BSD License. See WavFile.php for details.
Many thanks to Paul Voegler (http://www.voegler.eu/) for contributing to
Securimage.
---------------------------------------------------------------------------
Flash code created by Age Bosma & Mario Romero (animario@hotmail.com)
Many thanks for releasing this to the project!
---------------------------------------------------------------------------
Portions of Securimage contain code from Han-Kwang Nienhuys' PHP captcha
Han-Kwang Nienhuys' PHP captcha
Copyright June 2007
This copyright message and attribution must be preserved upon
modification. Redistribution under other licenses is expressly allowed.
Other licenses include GPL 2 or higher, BSD, and non-free licenses.
The original, unrestricted version can be obtained from
http://www.lagom.nl/linux/hkcaptcha/
---------------------------------------------------------------------------
AHGBold.ttf (AlteHaasGroteskBold.ttf) font was created by Yann Le Coroller
and is distributed as freeware.
Alte Haas Grotesk is a typeface that look like an helvetica printed in an
old Muller-Brockmann Book.
These fonts are freeware and can be distributed as long as they are
together with this text file.
I would appreciate very much to see what you have done with it anyway.
yann le coroller
www.yannlecoroller.com
yann@lecoroller.com
---------------------------------------------------------------------------
Portions of securimage_play.swf use the PopForge flash library for
playing audio
/**
* Copyright(C) 2007 Andre Michelle and Joa Ebert
*
* PopForge is an ActionScript3 code sandbox developed by Andre Michelle
* and Joa Ebert
* http://sandbox.popforge.de
*
* PopforgeAS3Audio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* PopforgeAS3Audio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
--------------------------------------------------------------------------
Some graphics used are from the Humility Icon Pack by WorLord
License: GNU/GPL (http://findicons.com/pack/1723/humility)
http://findicons.com/icon/192558/gnome_volume_control
http://findicons.com/icon/192562/gtk_refresh
--------------------------------------------------------------------------
Background noise sound files are from SoundJay.com
http://www.soundjay.com/tos.html
All sound effects on this website are created by us and protected under
the copyright laws, international treaty provisions and other applicable
laws. By downloading sounds, music or any material from this site implies
that you have read and accepted these terms and conditions:
Sound Effects
You are allowed to use the sounds free of charge and royalty free in your
projects (such as films, videos, games, presentations, animations, stage
plays, radio plays, audio books, apps) be it for commercial or
non-commercial purposes.
But you are NOT allowed to
- post the sounds (as sound effects or ringtones) on any website for
others to download, copy or use
- use them as a raw material to create sound effects or ringtones that
you will sell, distribute or offer for downloading
- sell, re-sell, license or re-license the sounds (as individual sound
effects or as a sound effects library) to anyone else
- claim the sounds as yours
- link directly to individual sound files
- distribute the sounds in apps or computer programs that are clearly
sound related in nature (such as sound machine, sound effect
generator, ringtone maker, funny sounds app, sound therapy app, etc.)
or in apps or computer programs that use the sounds as the program's
sound resource library for other people's use (such as animation
creator, digital book creator, song maker software, etc.). If you are
developing such computer programs, contact us for licensing options.
If you use the sound effects, please consider giving us a credit and
linking back to us but it's not required.

1913
html/securimage/WavFile.php Executable file

File diff suppressed because it is too large Load diff

11
html/securimage/audio/.htaccess Executable file
View file

@ -0,0 +1,11 @@
# Deny access to this folder
# Apache 2.4
<IfModule mod_authz_core.c>
Require all denied
</IfModule>
# Apache 2.2
<IfModule !mod_authz_core.c>
Deny from all
</IfModule>

BIN
html/securimage/audio/en/0.wav Executable file

Binary file not shown.

BIN
html/securimage/audio/en/1.wav Executable file

Binary file not shown.

BIN
html/securimage/audio/en/10.wav Executable file

Binary file not shown.

BIN
html/securimage/audio/en/11.wav Executable file

Binary file not shown.

BIN
html/securimage/audio/en/12.wav Executable file

Binary file not shown.

BIN
html/securimage/audio/en/13.wav Executable file

Binary file not shown.

BIN
html/securimage/audio/en/14.wav Executable file

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more