Doogle Master

This commit is contained in:
Zepher Ashe 2022-04-27 09:30:51 +01:00 committed by GitHub
parent 327e2320cb
commit 65cf89e250
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
31 changed files with 1568 additions and 0 deletions

49
SHA256SUMS Normal file
View file

@ -0,0 +1,49 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512
59492c770b524a1c583598969f410864b42cbe50dba1567401d42925a21fdc3b ./ajax/setBroken.php
42b0a956fbb7e7c4d258b76f77a40bcc3f424e92b8df4e48029b6ac36814ce13 ./ajax/updateImageCount.php
93c09e73205ae4cb9298787e456e7ba40a3a59913d4c7f7fa8231f3c0d57fb7e ./ajax/updateLinkCount.php
f6f3d53dd2240261f157695adf386a5c08014298c19f62ccf63cd162996892d0 ./assets/css/fancybox/3.3.5/jquery.fancybox.min.css
d5acffdfb41dc1e41a2132bff2e67b0e1632f10cba48321abc5c33e09fa0d076 ./assets/css/style.css
f72f0630f8cbe6b5a24094960e099eaf1f7be18af93c2f74f516f13b1e6212a0 ./assets/images/doogleLogo.png
11e84e77b4b2ca65fdc45407601edd77acef60994d8dfb670fe6db6a6672dbb9 ./assets/images/favicon/android-chrome-192x192.png
81506b473fe8709ea34e9974ede53b7bed210bbbd5b8f3541e9cc9fac2cd1fd8 ./assets/images/favicon/android-chrome-512x512.png
30b13061a191ddd4eb62107fa20bc714db91c5b39d39e64e843ce5a118a13bf1 ./assets/images/favicon/apple-touch-icon.png
a734688685e1d3eeb7c5b15267f31e7961aff394f3f68fc389a256b45e42970e ./assets/images/favicon/favicon-16x16.png
018136439b52bc1db7c84311f20435cfce95b5e191f5d8f36b27ce7eb5bc6064 ./assets/images/favicon/favicon-32x32.png
09925a497bbd72a7850434f205f31d9ab8cfa0e4f727718731595314ac89d482 ./assets/images/favicon/favicon.ico
bcb764f2e87fefd1f9c39cf0d3517ad4cbea2008cd925380aee23d9832e1fc2c ./assets/images/icons/search.png
88761e31eae97360d4dbdeedff92c4d151ec33492e9f1cdb34eb802762a9c125 ./assets/images/page.png
4ea49f1436476f370da62494ae780cfc99d4cbdd5cfab48082be4ce2274ecc07 ./assets/images/pageEnd.png
e771b2c0a69e5695ad7ff1a8bd7071fc6d46e6f9e3024acad6f743634d9c2e6d ./assets/images/pageSelected.png
61a1b5647ecafd1ae2fdd513262a394e16222b590bb82cc2a442f128cc6d4e52 ./assets/images/pageStart.png
4dbe2075e08dfc008a9a1290dc149f6ee360215610cc1944bdb625c0aee3b83c ./assets/js/fancybox/3.3.5/jquery.fancybox.min.js
160a426ff2894252cd7cebbdd6d6b7da8fcd319c65b70468f10b6690c45d02ef ./assets/js/jquery-3.3.1.min.js
367d6afdfc741fb48d2d9310e47c3924b693459a74c882c0fc545ec5ed7d55d2 ./assets/js/masonry/4.2.2/masonry.pkgd.min.js
19ada944019a8ef415a633317cc3d0924d5a0f2d91fe47ba1546e8844d7b308f ./assets/js/script.js
c798d95082d993b0de54f32e728515255f91e5c130476ff0b77089138aea1b5f ./classes/DomDocumentParser.php
1bd5e96382d6a3eddeec946080c96629e9aa56c2774a017fad24606f0c9f4244 ./classes/ImageResultsProvider.php
748e777d13df22396e186ddadb825cf472c92d9bda7aa04aa19c2b1cf96de3ec ./classes/SiteResultsProvider.php
b0ca5b7eb0af35124f5caab1c334356fd3cfe5c2cea625f313336622e475df76 ./config.php
371c7775b3cddddd12ff95ece2b8782e984ba869d4bce45cdca8e6c14ffa07a5 ./crawl-formSubmit.php
62ac95f4e51efd41db713a049218ad9255b35350a2e070a6e1facc62d842550b ./crawl-manual.php
bec40c943cf20745f47210a73226f69c19c63a6c90a2cc6c23d223aa777f9ba9 ./doogle-tables-no-data.sql
54346c28a4b984e342192c34c95cd849d641437f22a7b03303e729e286a2afa4 ./index.php
2ec5c79665b679dbc290b3d753a3710fc732ac64e3e7e525324988e6507103f6 ./search.php
-----BEGIN PGP SIGNATURE-----
iQIzBAEBCgAdFiEEHj7kwug4oP7u+vqSNt6aWc2GnuIFAmJo/qQACgkQNt6aWc2G
nuJKbA/+LDRw7bwjj9mz/60E7BMMrMSJKOhnVTxNIfuiK+HVwFh8NAUoVvR3S3qZ
VJfS7igvmj/Ne6YDTpd45lVXhAPr1POx/RzwS0VGr270lacl1cyMq60dHrZu0wLc
/rKyEbejFCqH16l6f+qfy7e4rhJFi/IM+tS+gJp7T0EQMSuNzrH9KgVc7H0LlfWZ
tQy9Sll64y9TI/W80kzq2169ULgRoH3AQpWLBznaIPZo/EmKH/r57DV+WYHm10Z8
Bu9tdGPDn3eh6IvvHfeFm+dSgskbnh8FTsa2VUaY0GZ1hnLAvENjAAV9CDuUBj5z
KWZPhWIz+iM8RsMax13eA3TgNT+p7JLhHaeLtteyhXwWobgvTsgh/UichwmFqcPV
nswJgTngJhRMkf8O0A3fQO3zZrKAU2rR5bJrMSMylhIvtpg38+/yp0a5xIkJhKwn
scf9GV8brqT+q9y9wrwKfrgcWMTbUH07Iv9R7KwcNz3sz0lA2TgIAximg0ZbmeEp
FjCL/sl6mhtK/LlR2blclMxQnEXOg/Y17LLDqxRuh/SzGDgoEBce7I36j8ZyQSfT
z7wwufDMnsqoC5LoDONyfjhrnydmYRRJ9mVnSipz48ON/pPAo6jGyk78mmO3A0/P
oVepSwM8n1HUHwnQmsXtoHyz1lL7n/H+X/pJcwO7lMVYEuCaxDQ=
=M5Y1
-----END PGP SIGNATURE-----

13
ajax/setBroken.php Normal file
View file

@ -0,0 +1,13 @@
<?php
include("../config.php");
if(isset($_POST["src"]))
{
$query = $con->prepare("UPDATE images SET broken = 1 WHERE imageUrl=:src");
$query->bindParam(":src", $_POST["src"]);
$query->execute();
}
else
echo "No src passed to page"; //DEBUGGING
?>

13
ajax/updateImageCount.php Normal file
View file

@ -0,0 +1,13 @@
<?php
include("../config.php");
if(isset($_POST["imageUrl"]))
{
$query = $con->prepare("UPDATE images SET clicks = clicks + 1 WHERE imageUrl=:imageUrl");
$query->bindParam(":imageUrl", $_POST["imageUrl"]);
$query->execute();
}
else
echo "No image URL passed to page"; //DEBUGGING
?>

13
ajax/updateLinkCount.php Normal file
View file

@ -0,0 +1,13 @@
<?php
include("../config.php");
if(isset($_POST["linkId"]))
{
$query = $con->prepare("UPDATE sites SET clicks = clicks + 1 WHERE id=:id");
$query->bindParam(":id", $_POST["linkId"]);
$query->execute();
}
else
echo "No link passed to page"; //DEBUGGING
?>

File diff suppressed because one or more lines are too long

400
assets/css/style.css Normal file
View file

@ -0,0 +1,400 @@
:root
{
/* Variables */
--searchbar-border-radius: 20px;
/* Custom Scrollbar - WebKit */
--scrollbarWidthSlim: 6px;
--scrollbarBgColourGradient: linear-gradient(180deg, #d0368a 0%, #708ad4 99%);
}
*
{
font-family: Arial, sans-serif;
color: #545454;
}
html,
body
{
margin: 0;
height: 100%;
}
.wrapper
{
display: flex;
flex-direction: column;
min-height: 100%;
}
.wrapper.indexPage
{
justify-content: center;
}
.mainSection
{
display: flex;
flex-direction: column;
align-items: center;
}
.mainSection .searchContainer
{
margin-top: 20px;
width: 100%;
}
.mainSection .searchContainer form
{
display: flex;
flex-direction: column;
align-items: center;
}
.searchContainer .searchButton
{
color: #757575;
background-color: #f5f5f5;
border: none;
height: 36px;
width: 125px;
border-radius: 2px;
font-size: 13px;
font-weight: bold;
margin-top: 20px;
cursor: pointer;
outline: none;
border-radius: 25px; /* Bug fix */
}
.mainSection .searchContainer .searchBox
{
border: none;
box-shadow: 0 2px 2px 0 rgba(0,0,0,0.16), 0 0 0 1px rgba(0,0,0,0.08);
height: 44px;
border-radius: var(--searchbar-border-radius);
outline: none;
padding: 10px;
box-sizing: border-box;
font-size: 16px;
width: 70%;
max-width: 630px;
color: #000;
padding-left: 25px;
}
/* .mainSection .searchContainer .searchBox:hover
{
border: aqua;
} */
.mainSection .logoContainer
{
width: 220px;
text-align: center;
}
.logoContainer img
{
width: 100%;
}
/******************
Search Page Styling
******************/
.header
{
background-color: #FAFAFA;
border-bottom: 1px solid #ebebeb;
}
.wrapper .headerContent
{
display: flex;
align-items: center;
}
.headerContent .logoContainer
{
width: 150px;
padding: 5px 20px;
box-sizing: border-box;
}
/* Search container */
.headerContent .searchContainer
{
flex: 1;
}
.headerContent .searchContainer form
{
margin: 15px 0 28px 0;
}
.headerContent .searchBarContainer
{
height: 44px;
background-color: #fff;
box-shadow: 0 2px 2px 0 rgba(0,0,0,0.16), 0 0 0 1px rgba(0,0,0,0.08);
width: 70%;
max-width: 630px;
box-sizing: border-box;
display: flex;
border-radius: var(--searchbar-border-radius);
}
.headerContent .searchBarContainer .searchBox
{
flex: 1;
border: none;
background-color: transparent;
padding: 12px;
font-size: 16px;
color: #000;
outline: none;
}
.headerContent .searchBarContainer .searchButton
{
background-color: #fff;
height: 44px;
margin-top: 0;
width: 44px;
padding-right: 20px;
display: flex;
justify-content: center;
}
.headerContent .searchBarContainer .searchButton img
{
width: 22px;
padding: 10px;
}
/* Tabs container */
.tabsContainer
{
margin-left: 150px;
}
.tabsContainer .tabList
{
padding: 0;
margin: 0;
}
.tabsContainer .tabList li
{
display: inline-block;
padding: 0 16px 12px 16px;
color: #777;
font-size: 13px;
}
.tabsContainer .tabList li a
{
text-decoration: none;
}
.tabsContainer .tabList li.active
{
border-bottom: 3px solid #1A73E8;
}
.tabsContainer .tabList li.active a
{
font-weight: bold;
color: #1A73E8;
}
/****************
Results styling
****************/
.mainResultsSection
{
flex: 1;
}
.mainResultsSection .resultsCount
{
font-size: 13px;
color: #808080;
margin-left: 150px;
}
.mainResultsSection .siteResults
{
margin-left: 150px;
}
.resultContainer
{
display: flex;
flex-direction: column;
margin-bottom: 26px;
}
.resultContainer .title
{
margin: 0;
}
.resultContainer .title a
{
color: #1a0dab;
text-decoration: none;
font-weight: normal;
font-size: 18px;
}
.resultContainer .title a:hover
{
text-decoration: underline;
}
.resultContainer .url
{
color: #006621;
font-size: 14px;
}
.resultContainer .description
{
font-size: 12px;
}
/********************
Pagination styling
********************/
.paginationContainer
{
display: flex;
justify-content: center;
margin-bottom: 25px;
}
.pageButtons
{
display: flex;
}
.pageNumberContainer img
{
height: 37px;
}
#pageEndContainer img
{
height: 43px;
}
.pageNumberContainer,
.pageNumberContainer a
{
display: flex;
flex-direction: column;
align-items: center;
text-decoration: none;
}
.pageNumber
{
color: #000;
font-size: 13px;
}
a .pageNumber
{
color: #4285f4;
}
/**************
Image styling
**************/
.mainResultsSection .imageResults
{
margin: 20px;
}
.gridItem
{
position: relative;
}
.gridItem img
{
max-width: 200px;
min-width: 50px;
visibility: hidden;
}
.gridItem .details
{
visibility: hidden;
position: absolute;
bottom: 0px;
left: 0px;
width: 100%;
overflow: hidden;
background-color: rgba(0,0,0,0.8);
color: #fff;
font-size: 11px;
padding: 3px;
box-sizing: border-box;
white-space: nowrap;
}
.gridItem:hover .details
{
visibility: visible;
}
/*********
Crawl form
**********/
#crawl-wrapper
{
text-align: center;
padding-top: 100px;
}
#crawl-input
{
width:400px;
}
/*
Mobile responsive design -- Overrides
*/
@media only screen and (max-width: 700px)
{
.resultContainer .url
{
color: #006621;
font-size: 19px;
}
.tabsContainer
{
text-align: center;
}
.mainResultsSection .resultsCount
{
margin-left: 15px;
}
.mainResultsSection .siteResults
{
margin-left: 15px;
}
.mainResultsSection .resultsCount
{
margin-left: 15px;
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 515 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1,008 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

BIN
assets/images/page.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

BIN
assets/images/pageEnd.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

BIN
assets/images/pageStart.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

File diff suppressed because one or more lines are too long

2
assets/js/jquery-3.3.1.min.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

111
assets/js/script.js Normal file
View file

@ -0,0 +1,111 @@
var timer;
$(document).ready(function() {
$(".result").on("click", function() {
var id = $(this).attr("data-linkId");
var url = $(this).attr("href");
if(!id) {
alert("data-linkId attribute not found"); //DEBUGGING
}
increaseLinkClicks(id, url);
return false;
});
var grid = $(".imageResults");
grid.on("layoutComplete", function() {
$(".gridItem img").css("visibility", "visible");
});
grid.masonry({
itemSelector: ".gridItem",
columnWidth: 200,
gutter: 5,
isInitLayout: false
});
$("[data-fancybox]").fancybox({
caption : function( instance, item ) {
var caption = $(this).data('caption') || '';
var siteUrl = $(this).data('siteurl') || '';
if ( item.type === 'image' ) {
caption = (caption.length ? caption + '<br />' : '')
+ '<a href="' + item.src + '">View image</a><br>'
+ '<a href="' + siteUrl + '">Visit page</a>';
}
return caption;
},
afterShow : function( instance, item ) {
increaseImageClicks(item.src);
}
});
});
function loadImage(src, className) {
var image = $("<img>");
image.on("load", function() {
$("." + className + " a").append(image);
clearTimeout(timer);
timer = setTimeout(function() {
$(".imageResults").masonry();
}, 200);
});
image.on("error", function() {
$("." + className).remove();
$.post("ajax/setBroken.php", {src: src});
});
image.attr("src", src);
}
function increaseLinkClicks(linkId, url) {
$.post("ajax/updateLinkCount.php", {linkId: linkId})
.done(function(result) {
if(result != "") {
alert(result);
return;
}
window.location.href = url;
});
}
function increaseImageClicks(imageUrl) {
$.post("ajax/updateImageCount.php", {imageUrl: imageUrl})
.done(function(result) {
if(result != "") {
alert(result);
return;
}
});
}

View file

@ -0,0 +1,39 @@
<?php
class DomDocumentParser
{
private $doc;
public function __construct($url)
{
$options = array(
'http'=>array('method'=>"GET", 'header'=>"User-Agent: doogleBot/0.1\n")
);
$context = stream_context_create($options);
$this->doc = new DomDocument();
@$this->doc->loadHTML(file_get_contents($url, false, $context));
//@ Error supression is unnecessary, PHP>7.0 supports HTML5
}
public function getlinks()
{
return $this->doc->getElementsByTagName("a");
}
public function getTitleTags()
{
return $this->doc->getElementsByTagName("title");
}
public function getMetaTags()
{
return $this->doc->getElementsByTagName("meta");
}
public function getImages()
{
return $this->doc->getElementsByTagName("img");
}
}
?>

View file

@ -0,0 +1,85 @@
<?php
class ImageResultsProvider
{
private $con;
public function __construct($con)
{
$this->con = $con;
}
public function getNumResults($term)
{
$query = $this->con->prepare("SELECT COUNT(*) as total
FROM images
WHERE (title LIKE :term
OR alt LIKE :term)
AND broken=0");
$searchTerm = "%". $term . "%";
$query->bindParam(":term", $searchTerm);
$query->execute();
$row = $query->fetch(PDO::FETCH_ASSOC);
return $row["total"];
}
public function getResultsHtml($page, $pageSize, $term)
{
$fromLimit = ($page - 1) * $pageSize;
$query = $this->con->prepare("SELECT *
FROM images
WHERE (title LIKE :term
OR alt LIKE :term)
AND broken=0
ORDER BY clicks DESC
LIMIT :fromLimit, :pageSize");
$searchTerm = "%". $term . "%";
$query->bindParam(":term", $searchTerm);
$query->bindParam(":fromLimit", $fromLimit, PDO::PARAM_INT);
$query->bindParam(":pageSize", $pageSize, PDO::PARAM_INT);
$query->execute();
$resultsHtml = "<div class='imageResults'>";
$count = 0;
while($row = $query->fetch(PDO::FETCH_ASSOC))
{
$count++;
$id = $row["id"];
$imageUrl = $row["imageUrl"];
$siteUrl = $row["siteUrl"];
$title = $row["title"];
$alt = $row["alt"];
if($title)
$displayText = $title;
else if($alt)
$displayText = $alt;
else
$displayText = $imageUrl;
$resultsHtml .= "<div class='gridItem image$count'>
<a href='$imageUrl' data-fancybox data-caption='$displayText'
data-siteurl='$siteUrl'>
<script>
$(document).ready(function() {
loadImage(\"$imageUrl\", \"image$count\");
});
</script>
<span class='details'>$displayText</span>
</a>
</div>";
}
$resultsHtml .= "</div>";
return $resultsHtml;
}
}
?>

View file

@ -0,0 +1,86 @@
<?php
class SiteResultsProvider
{
private $con;
public function __construct($con)
{
$this->con = $con;
}
public function getNumResults($term)
{
$query = $this->con->prepare("SELECT COUNT(*) as total
FROM sites WHERE title LIKE :term
OR url LIKE :term
OR keywords LIKE :term
OR description LIKE :term");
$searchTerm = "%". $term . "%";
$query->bindParam(":term", $searchTerm);
$query->execute();
$row = $query->fetch(PDO::FETCH_ASSOC);
return $row["total"];
}
public function getResultsHtml($page, $pageSize, $term)
{
/*
Pagination system logic ($fromLimit)
page1: (1 - 1) * 20 = 0
page2: (2 - 1) * 20 = 20
page3: (3 - 1) * 20 = 40
...
*/
$fromLimit = ($page - 1) * $pageSize;
$query = $this->con->prepare("SELECT *
FROM sites WHERE title LIKE :term
OR url LIKE :term
OR keywords LIKE :term
OR description LIKE :term
ORDER BY clicks DESC
LIMIT :fromLimit, :pageSize");
$searchTerm = "%". $term . "%";
$query->bindParam(":term", $searchTerm);
$query->bindParam(":fromLimit", $fromLimit, PDO::PARAM_INT);
$query->bindParam(":pageSize", $pageSize, PDO::PARAM_INT);
$query->execute();
$resultsHtml = "<div class='siteResults'>";
while($row = $query->fetch(PDO::FETCH_ASSOC))
{
$id = $row["id"];
$url = $row["url"];
$title = $row["title"];
$description = $row["description"];
$title = $this->trimField($title, 55);
$description = $this->trimField($description, 230);
$resultsHtml .= "<div class='resultContainer'>
<h3 class='title'>
<a class='result' href='$url' data-linkId='$id'>
$title
</a>
</h3>
<span class='url'>$url</span>
<span class='description'>$description</span>
</div>";
}
$resultsHtml .= "</div>";
return $resultsHtml;
}
private function trimField($string, $characterLimit)
{
$dots = strlen($string) > $characterLimit ? "..." : "";
return substr($string, 0, $characterLimit) . $dots;
}
}
?>

18
config.php Normal file
View file

@ -0,0 +1,18 @@
<?php
ob_start();
$dbname = "doogle";
$dbhost = "192.168.5.240";
$dbuser = "root";
$dbpass = "";
try
{
$con = new PDO("mysql:dbname=$dbname;host=$dbhost", "$dbuser", "$dbpass");
$con->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
}
catch(PDOExeption $e)
{
echo "Connection failed: " . $e->getMessage();
}
?>

232
crawl-formSubmit.php Normal file
View file

@ -0,0 +1,232 @@
<?php
include("config.php");
include("classes/DomDocumentParser.php");
$alreadyCrawled = array();
$crawling = array();
$alreadyFoundImages = array();
function linkExists($url)
{
global $con;
$query = $con->prepare("SELECT * FROM sites WHERE url = :url");
$query->bindParam(":url", $url);
$query->execute();
return $query->rowCount() != 0;
}
function imageExists($src)
{
global $con;
$query = $con->prepare("SELECT * FROM images WHERE imageUrl = :src");
$query->bindParam(":src", $src);
$query->execute();
return $query->rowCount() != 0;
}
function insertLink($url, $title, $description, $keywords)
{
global $con;
$query = $con->prepare("INSERT INTO sites(url, title, description, keywords)
VALUES(:url, :title, :description, :keywords)");
$query->bindParam(":url", $url);
$query->bindParam(":title", $title);
$query->bindParam(":description", $description);
$query->bindParam(":keywords", $keywords);
return $query->execute();
}
function insertImage($url, $src, $alt, $title)
{
global $con;
$query = $con->prepare("INSERT INTO images(siteUrl, imageUrl, alt, title)
VALUES(:siteUrl, :imageUrl, :alt, :title)");
$query->bindParam(":siteUrl", $url);
$query->bindParam(":imageUrl", $src);
$query->bindParam(":alt", $alt);
$query->bindParam(":title", $title);
return $query->execute();
}
/* Converts relative link to absolute link */
function createLink($src, $url)
{
$scheme = parse_url($url)["scheme"]; // http
$host = parse_url($url)["host"]; // www.safesploit.com
if(substr($src, 0, 2) == "//")
$src = $scheme . ":" . $src;
else if(substr($src, 0, 1) == "/")
$src = $scheme . "://" . $host . $src;
else if(substr($src, 0, 2) == "./")
$src = $scheme . "://" . $host . dirname(parse_url($url)["path"]) . substr($src, 1);
else if(substr($src, 0, 3) == "../")
$src = $scheme . "://" . $host . "/" . $src;
else if(substr($src, 0, 5) != "https" && substr($src, 0, 4) != "http")
$src = $scheme . "://" . $host . "/" . $src;
return $src;
}
function getDetails($url)
{
global $alreadyFoundImages;
$parser = new DomDocumentParser($url);
$titleArray = $parser->getTitleTags();
if(sizeof($titleArray) == 0 || $titleArray->item(0) == NULL)
return;
//Replace linebreak
$title = $titleArray->item(0)->nodeValue;
$title = str_replace("\n", "", $title);
//Return if no <title>
if($title == "")
return;
$description = "";
$keywords = "";
$metasArray = $parser->getMetatags();
foreach($metasArray as $meta)
{
if($meta->getAttribute("name") == "description")
$description = $meta->getAttribute("content");
if($meta->getAttribute("name") == "keywords")
$keywords = $meta->getAttribute("content");
}
$description = str_replace("\n", "", $description);
$keywords = str_replace("\n", "", $keywords);
if(linkExists($url))
echo "$url already exists<br>";
else if(insertLink($url, $title, $description, $keywords))
echo "SUCCESS: $url<br>";
else
echo "ERROR: Failed to insert $url<br>";
$imageArray = $parser->getImages();
foreach($imageArray as $image)
{
$src = $image->getAttribute("src");
$alt = $image->getAttribute("alt");
$title = $image->getAttribute("title");
if(!$title && !$alt)
continue;
$src = createLink($src, $url);
if(!in_array($src, $alreadyFoundImages))
{
$alreadyFoundImages[] = $src;
if(imageExists($src))
echo "$src already exists<br>";
else if(insertImage($url, $src, $alt, $title))
echo "SUCCESS: $src<br>";
else
echo "ERROR: Failed to insert $src<br>";
}
}
echo "<b>URL:</b> $url, <b>Title:</b> $title, <b>Description:</b> $description, <b>keywords:</b> $keywords<br>"; //DEBUGGING sites
echo "<b>src:</b> <a href=$src>$src</a>, <b>alt:</b> $alt, <b>title:</b> $title, <b>url:</b> $url<br>"; //DEBUGGING images
}
function followLinks($url)
{
global $alreadyCrawled;
global $crawling;
$parser = new DomDocumentParser($url);
$linkList = $parser->getLinks();
foreach($linkList as $link)
{
$href = $link->getAttribute("href");
// Filter hrefs
if(strpos($href, "#") !== false)
continue;
else if(substr($href, 0, 11) == "javascript:")
continue;
$href = createLink($href, $url);
if(!in_array($href, $alreadyCrawled))
{
$alreadyCrawled[] = $href;
$crawling[] = $href;
getDetails($href);
}
//else return; //DEBUGGING
echo ($href . "<br>"); //DEBUGGING
}
array_shift($crawling);
foreach($crawling as $site)
followLinks($site);
}
?>
<!DOCTYPE html>
<html>
<head>
<title>doogleBot Crawler</title>
<link rel="icon" type="image/x-icon" href="assets/images/favicon/favicon.ico">
<link rel="shortcut icon" type="image/png" href="assets/images/favicon/favicon-32x32.png">
<link rel="apple-touch-icon" href="assets/images/favicon/apple-touch-icon.png">
<link rel="android-chrome-icon" type="image/png" href="assets/images/favicon/android-chrome-512x512.png">
<meta name="description" content="Search the web for sites and images.">
<meta name="keywords" content="Search engine, doogle, websites">
<meta name="author" content="Zepher Ashe">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" type="text/css" href="assets/css/style.css">
</head>
<body>
<div id="crawl-wrapper">
<form action="crawl-formSubmit.php" method="post" >
URL: <input type="text" name="url" required="required" id="crawl-input" value="">
<button type="submit">Crawl</button>
</form>
</div>
</body>
</html>
<?php
if (isset($_POST['url']))
{
$startUrl = $_POST['url'];
followLinks($startUrl);
}
?>

201
crawl-manual.php Normal file
View file

@ -0,0 +1,201 @@
<?php
include("config.php");
include("classes/DomDocumentParser.php");
$alreadyCrawled = array();
$crawling = array();
$alreadyFoundImages = array();
function linkExists($url)
{
global $con;
$query = $con->prepare("SELECT * FROM sites WHERE url = :url");
$query->bindParam(":url", $url);
$query->execute();
return $query->rowCount() != 0;
}
function imageExists($src)
{
global $con;
$query = $con->prepare("SELECT * FROM images WHERE imageUrl = :src");
$query->bindParam(":src", $src);
$query->execute();
return $query->rowCount() != 0;
}
function insertLink($url, $title, $description, $keywords)
{
global $con;
$query = $con->prepare("INSERT INTO sites(url, title, description, keywords)
VALUES(:url, :title, :description, :keywords)");
$query->bindParam(":url", $url);
$query->bindParam(":title", $title);
$query->bindParam(":description", $description);
$query->bindParam(":keywords", $keywords);
return $query->execute();
}
function insertImage($url, $src, $alt, $title)
{
global $con;
$query = $con->prepare("INSERT INTO images(siteUrl, imageUrl, alt, title)
VALUES(:siteUrl, :imageUrl, :alt, :title)");
$query->bindParam(":siteUrl", $url);
$query->bindParam(":imageUrl", $src);
$query->bindParam(":alt", $alt);
$query->bindParam(":title", $title);
return $query->execute();
}
/* Converts relative link to absolute link */
function createLink($src, $url)
{
$scheme = parse_url($url)["scheme"]; // http
$host = parse_url($url)["host"]; // www.safesploit.com
if(substr($src, 0, 2) == "//")
$src = $scheme . ":" . $src;
else if(substr($src, 0, 1) == "/")
$src = $scheme . "://" . $host . $src;
else if(substr($src, 0, 2) == "./")
$src = $scheme . "://" . $host . dirname(parse_url($url)["path"]) . substr($src, 1);
else if(substr($src, 0, 3) == "../")
$src = $scheme . "://" . $host . "/" . $src;
else if(substr($src, 0, 5) != "https" && substr($src, 0, 4) != "http")
$src = $scheme . "://" . $host . "/" . $src;
return $src;
}
function getDetails($url)
{
global $alreadyFoundImages;
$parser = new DomDocumentParser($url);
$titleArray = $parser->getTitleTags();
if(sizeof($titleArray) == 0 || $titleArray->item(0) == NULL)
return;
//Replace linebreak
$title = $titleArray->item(0)->nodeValue;
$title = str_replace("\n", "", $title);
//Return if no <title>
if($title == "")
return;
$description = "";
$keywords = "";
$metasArray = $parser->getMetatags();
foreach($metasArray as $meta)
{
if($meta->getAttribute("name") == "description")
$description = $meta->getAttribute("content");
if($meta->getAttribute("name") == "keywords")
$keywords = $meta->getAttribute("content");
}
$description = str_replace("\n", "", $description);
$keywords = str_replace("\n", "", $keywords);
if(linkExists($url))
echo "$url already exists<br>";
else if(insertLink($url, $title, $description, $keywords))
echo "SUCCESS: $url<br>";
else
echo "ERROR: Failed to insert $url<br>";
$imageArray = $parser->getImages();
foreach($imageArray as $image)
{
$src = $image->getAttribute("src");
$alt = $image->getAttribute("alt");
$title = $image->getAttribute("title");
if(!$title && !$alt)
continue;
$src = createLink($src, $url);
if(!in_array($src, $alreadyFoundImages))
{
$alreadyFoundImages[] = $src;
if(imageExists($src))
echo "$src already exists<br>";
else if(insertImage($url, $src, $alt, $title))
echo "SUCCESS: $src<br>";
else
echo "ERROR: Failed to insert $src<br>";
}
}
echo "<b>URL:</b> $url, <b>Title:</b> $title, <b>Description:</b> $description, <b>keywords:</b> $keywords<br>"; //DEBUGGING sites
echo "<b>src:</b> <a href=$src>$src</a>, <b>alt:</b> $alt, <b>title:</b> $title, <b>url:</b> $url<br>"; //DEBUGGING images
}
function followLinks($url)
{
global $alreadyCrawled;
global $crawling;
$parser = new DomDocumentParser($url);
$linkList = $parser->getLinks();
foreach($linkList as $link)
{
$href = $link->getAttribute("href");
// Filter hrefs
if(strpos($href, "#") !== false)
continue;
else if(substr($href, 0, 11) == "javascript:")
continue;
$href = createLink($href, $url);
if(!in_array($href, $alreadyCrawled))
{
$alreadyCrawled[] = $href;
$crawling[] = $href;
getDetails($href);
}
//else return; //DEBUGGING
echo ($href . "<br>"); //DEBUGGING
}
array_shift($crawling);
foreach($crawling as $site)
followLinks($site);
}
$startUrl = "https://thehackernews.com/";
followLinks($startUrl);
?>

93
doogle-tables-no-data.sql Normal file
View file

@ -0,0 +1,93 @@
-- phpMyAdmin SQL Dump - No Data
-- version 5.1.1
-- https://www.phpmyadmin.net/
--
-- Host: 192.168.5.240
-- Generation Time: Apr 24, 2022 at 09:25 AM
-- Server version: 8.0.28-0ubuntu0.20.04.3
-- PHP Version: 7.4.24
SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
SET AUTOCOMMIT = 0;
START TRANSACTION;
SET time_zone = "+00:00";
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8mb4 */;
--
-- Database: `doogle`
--
CREATE DATABASE IF NOT EXISTS `doogle` DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci;
USE `doogle`;
-- --------------------------------------------------------
--
-- Table structure for table `images`
--
CREATE TABLE `images` (
`id` int(11) NOT NULL,
`siteUrl` varchar(512) NOT NULL,
`imageUrl` varchar(512) NOT NULL,
`alt` varchar(512) NOT NULL,
`title` varchar(512) NOT NULL,
`clicks` int(11) NOT NULL DEFAULT '0',
`broken` tinyint(4) NOT NULL DEFAULT '0'
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Table structure for table `sites`
--
CREATE TABLE `sites` (
`id` int(11) NOT NULL,
`url` varchar(512) NOT NULL,
`title` varchar(512) NOT NULL,
`description` varchar(512) NOT NULL,
`keywords` varchar(512) NOT NULL,
`clicks` int(11) NOT NULL DEFAULT '0'
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
--
-- Indexes for dumped tables
--
--
-- Indexes for table `images`
--
ALTER TABLE `images`
ADD PRIMARY KEY (`id`);
--
-- Indexes for table `sites`
--
ALTER TABLE `sites`
ADD PRIMARY KEY (`id`);
--
-- AUTO_INCREMENT for dumped tables
--
--
-- AUTO_INCREMENT for table `images`
--
ALTER TABLE `images`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=13003;
--
-- AUTO_INCREMENT for table `sites`
--
ALTER TABLE `sites`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=5297;
COMMIT;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;

34
index.php Normal file
View file

@ -0,0 +1,34 @@
<!DOCTYPE html>
<html>
<head>
<title>Doogle Web Crawler</title>
<meta name="description" content="Search the web for sites and images.">
<meta name="keywords" content="search engine, doogle, websites">
<meta name="author" content="Zepher Ashe">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="icon" type="image/x-icon" href="assets/images/favicon/favicon.ico">
<link rel="shortcut icon" type="image/png" href="assets/images/favicon/favicon-32x32.png">
<link rel="apple-touch-icon" href="assets/images/favicon/apple-touch-icon.png">
<link rel="android-chrome-icon" type="image/png" href="assets/images/favicon/android-chrome-512x512.png">
<link rel="stylesheet" type="text/css" href="assets/css/style.css">
</head>
<body>
<div class="wrapper indexPage">
<div class="mainSection">
<div class="logoContainer">
<img src="assets/images/doogleLogo.png" title="Logo of our site" alt="Site logo">
</div>
<div class="searchContainer">
<form action="search.php" method="GET">
<input class="searchBox" type="text" name="term" autocomplete="off">
<input class="searchButton" type="submit" value="Search">
</form>
</div>
</div>
</div>
</body>
</html>

157
search.php Normal file
View file

@ -0,0 +1,157 @@
<?php
include("config.php");
include("classes/SiteResultsProvider.php");
include("classes/ImageResultsProvider.php");
if(isset($_GET['term']))
$term = $_GET['term'];
else
exit("You must enter a search term!");
$type = isset($_GET["type"]) ? $_GET["type"] : "sites";
$page = isset($_GET["page"]) ? $_GET["page"] : 1;
?>
<!DOCTYPE html>
<html>
<head>
<title><?php if(isset($term) && $term != '') echo($term . ' | '); ?>Doogle Search</title>
<link rel="icon" type="image/x-icon" href="assets/images/favicon/favicon.ico">
<link rel="shortcut icon" type="image/png" href="assets/images/favicon/favicon-32x32.png">
<link rel="apple-touch-icon" href="assets/images/favicon/apple-touch-icon.png">
<link rel="android-chrome-icon" type="image/png" href="assets/images/favicon/android-chrome-512x512.png">
<meta name="description" content="Search the web for sites and images.">
<meta name="keywords" content="Search engine, doogle, websites">
<meta name="author" content="Zepher Ashe">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!-- <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/fancybox/3.3.5/jquery.fancybox.min.css" /> -->
<link rel="stylesheet" type="text/css" href="assets/css/fancybox/3.3.5/jquery.fancybox.min.css">
<link rel="stylesheet" type="text/css" href="assets/css/style.css">
<script src="assets/js/jquery-3.3.1.min.js"></script>
<!-- <script src="https://code.jquery.com/jquery-3.3.1.min.js" integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script> -->
</head>
<body>
<div class="wrapper">
<div class="header">
<div class="headerContent">
<div class="logoContainer">
<a href="index.php">
<img src="assets/images/doogleLogo.png">
</a>
</div>
<div class="searchContainer">
<form action="search.php" method="GET">
<div class="searchBarContainer">
<input type="hidden" name="type" value="<?php echo $type; ?>">
<input class="searchBox" type="text" name="term" value="<?php echo $term; ?>" autocomplete="off">
<button class="searchButton">
<img src="assets/images/icons/search.png">
</button>
</div>
</form>
</div>
</div>
<div class="tabsContainer">
<ul class="tabList">
<li class="<?php echo $type == 'sites' ? 'active' : '' ?>">
<a href='<?php echo "search.php?term=$term&type=sites"; ?>'>
Sites
</a>
</li>
<li class="<?php echo $type == 'images' ? 'active' : '' ?>">
<a href='<?php echo "search.php?term=$term&type=images"; ?>'>
Images
</a>
</li>
</ul>
</div>
</div>
<div class="mainResultsSection">
<?php
if($type == "sites")
{
$resultsProvider = new SiteResultsProvider($con);
$pageSize = 20;
}
else if($type == "images")
{
$resultsProvider = new ImageResultsProvider($con);
$pageSize = 30;
}
$numResults = $resultsProvider->getNumResults($term);
echo "<p class='resultsCount'>$numResults results found</p>";
echo $resultsProvider->getResultsHtml($page, $pageSize, $term);
?>
</div>
<div class="paginationContainer">
<div class="pageButtons">
<div class="pageNumberContainer">
<img src="assets/images/pageStart.png">
</div>
<?php
$pagesToShow = 10;
$numPages = ceil($numResults / $pageSize);
$pagesLeft = min($pagesToShow, $numPages);
$currentPage = $page - floor($pagesToShow / 2);
if($currentPage < 1)
$currentPage = 1;
if($currentPage + $pagesLeft > $numPages + 1)
$currentPage = $numPages + 1 - $pagesLeft;
while($pagesLeft != 0 && $currentPage <= $numPages)
{
if($currentPage == $page)
{
echo "<div class='pageNumberContainer'>
<img src='assets/images/pageSelected.png'>
<span class='pageNumber'>$currentPage</span>
</div>";
}
else
{
echo "<div class='pageNumberContainer'>
<a href='search.php?term=$term&type=$type&page=$currentPage'>
<img src='assets/images/page.png'>
<span class='pageNumber'>$currentPage</span>
</a>
</div>";
}
$currentPage++;
$pagesLeft--;
}
?>
<div class="pageNumberContainer">
<div id="pageEndContainer">
<img src="assets/images/pageEnd.png">
</div>
</div>
</div>
</div>
</div>
<script src="assets/js/fancybox/3.3.5/jquery.fancybox.min.js"></script>
<script src="assets/js/masonry/4.2.2/masonry.pkgd.min.js"></script>
<script type="text/javascript" src="assets/js/script.js"></script>
<!--
<script src="https://cdnjs.cloudflare.com/ajax/libs/fancybox/3.3.5/jquery.fancybox.min.js"></script>
<script src="https://unpkg.com/masonry-layout@4/dist/masonry.pkgd.min.js"></script>
-->
</body>
</html>