
1.2 - January 2, 2024 - [new] Preferred language setting for DuckDuckGo results in config.php. - [new] Preferred language setting for Wikipedia results in config.php. - [new] Combined DuckDuckGo, Google, Wikipedia and Ecosia (Bing) results into one page. - [new] Ranking algorithm for search results. - [new] Option to down-rank certain social media sites in results (Makes them show lower down the page). - [new] Option to show the Goosle rank along with the search source. - [new] Crawler for results from Limetorrents.lol. - [new] Periodic check for updates in footer. - [change] Moved duckduckgo.php and google.php into the engines/search/ folder. - [change] Removed Wikipedia special search in favor of actual search results. - [change] Removed 'Date Added' from 1337x results. - [change] Removed Chrome based and Mobile user-agents, as they don't work for the WikiPedia API. - [change] Added more trackers for generating magnet links. - [tweak] 30-50% faster parsing of search results (couple of ms per search query). - [tweak] Expanded the season/episode filter to all sources that support TV Shows. - [tweak] More sensible santization of variables (Searching for html tags/basic code should now work). - [tweak] Moved 'imdb_id_search' out from special results into its 'own' setting. - [tweak] Moved 'password_generator' out from special results into its 'own' setting. - [tweak] More accurate and faster Google scrape. - [tweak] Reduced paragraph margins. - [tweak] More code cleanup, making it more uniform. - [fix] Prevents searching on disabled methods by 'cheating' the search type in the url. - [fix] Better decoding for special characters in urls for search results. - [fix] Better validation for special searches trigger words. - [fix] Better sanitization for DuckDuckGo and Google results.
318 lines
No EOL
12 KiB
PHP
318 lines
No EOL
12 KiB
PHP
<?php
|
|
/* ------------------------------------------------------------------------------------
|
|
* Goosle - A meta search engine for private and fast internet fun.
|
|
*
|
|
* COPYRIGHT NOTICE
|
|
* Copyright 2023-2024 Arnan de Gans. All Rights Reserved.
|
|
*
|
|
* COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT.
|
|
* By using this code you agree to indemnify Arnan de Gans from any
|
|
* liability that might arise from its use.
|
|
------------------------------------------------------------------------------------ */
|
|
|
|
/*--------------------------------------
|
|
// Verify the hash, or not, and let people in, or not
|
|
--------------------------------------*/
|
|
function verify_hash($opts, $auth) {
|
|
if(($opts->hash_auth == "on" && strtolower($opts->hash) === strtolower($auth)) || $opts->hash_auth == "off") return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Set curl options
|
|
--------------------------------------*/
|
|
function set_curl_options($curl, $url, $user_agents) {
|
|
$referer_url = parse_url($url);
|
|
|
|
curl_setopt($curl, CURLOPT_URL, $url);
|
|
curl_setopt($curl, CURLOPT_HTTPGET, 1); // Redundant? Probably...
|
|
curl_setopt($curl, CURLOPT_PROTOCOLS, CURLPROTO_HTTPS | CURLPROTO_HTTP);
|
|
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
|
|
curl_setopt($curl, CURLOPT_USERAGENT, $user_agents[array_rand($user_agents)]);
|
|
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
|
|
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
'Accept-Language: en-US,en;q=0.5',
|
|
'Upgrade-Insecure-Requests: 1',
|
|
'Sec-Fetch-Dest: document',
|
|
'Sec-Fetch-Mode: navigate',
|
|
'Sec-Fetch-Site: none',
|
|
'Referer: '.$referer_url["scheme"].'://'.$referer_url["host"].'/',
|
|
));
|
|
curl_setopt($curl, CURLOPT_ENCODING, "gzip,deflate");
|
|
curl_setopt($curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_WHATEVER);
|
|
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
|
|
curl_setopt($curl, CURLOPT_MAXREDIRS, 5);
|
|
curl_setopt($curl, CURLOPT_REDIR_PROTOCOLS, CURLPROTO_HTTPS | CURLPROTO_HTTP);
|
|
curl_setopt($curl, CURLOPT_TIMEOUT, 3);
|
|
curl_setopt($curl, CURLOPT_VERBOSE, false);
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Load pages into a DOM
|
|
--------------------------------------*/
|
|
function get_xpath($response) {
|
|
if(!$response)
|
|
return null;
|
|
|
|
$htmlDom = new DOMDocument;
|
|
@$htmlDom->loadHTML($response);
|
|
$xpath = new DOMXPath($htmlDom);
|
|
|
|
return $xpath;
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Format search result urls
|
|
--------------------------------------*/
|
|
function get_formatted_url($url) {
|
|
$url = parse_url($url);
|
|
|
|
$formatted_url = $url['scheme'] . "://" . $url['host'];
|
|
$formatted_url .= str_replace('/', ' › ', urldecode(str_replace('%20', ' ', rtrim($url['path'], '/'))));
|
|
|
|
return $formatted_url;
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// APCu Caching
|
|
--------------------------------------*/
|
|
function has_cached_results($url, $hash) {
|
|
if(function_exists("apcu_exists")) {
|
|
return apcu_exists("$hash:$url");
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
function store_cached_results($url, $hash, $results, $ttl = 0) {
|
|
if(function_exists("apcu_store") && !empty($results)) {
|
|
return apcu_store("$hash:$url", $results, $ttl);
|
|
}
|
|
}
|
|
|
|
function fetch_cached_results($url, $hash) {
|
|
if(function_exists("apcu_fetch")) {
|
|
return apcu_fetch("$hash:$url");
|
|
}
|
|
|
|
return array();
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Sanitize variables
|
|
--------------------------------------*/
|
|
function sanitize($variable) {
|
|
switch(gettype($variable)) {
|
|
case 'string':
|
|
$variable = htmlspecialchars(trim($variable), ENT_QUOTES);
|
|
break;
|
|
case 'boolean':
|
|
$variable = ($variable === FALSE) ? 0 : 1;
|
|
break;
|
|
default:
|
|
$variable = ($variable === NULL) ? 'NULL' : htmlspecialchars(strip_tags(trim($variable)), ENT_QUOTES);
|
|
break;
|
|
}
|
|
|
|
return $variable;
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Search result match counter
|
|
--------------------------------------*/
|
|
function match_count($string, $query) {
|
|
$string = strtolower($string);
|
|
|
|
if(filter_var($string, FILTER_VALIDATE_URL)) {
|
|
$string = preg_replace("/[^a-z0-9]+/", " ", $string);
|
|
}
|
|
|
|
$string = preg_replace("/[^a-z0-9 ]+/", "", $string);
|
|
$string = preg_replace("/\s{2,}/", " ", $string);
|
|
|
|
$matches = array_intersect(array_filter(array_unique(explode(" ", $string))), $query);
|
|
$matches = count($matches);
|
|
|
|
return $matches;
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Detect social media results
|
|
--------------------------------------*/
|
|
function is_social_media($string) {
|
|
$string = strtolower($string);
|
|
|
|
if(preg_match("/(?:https?:)?\/\/(?:www\.)?(?:facebook|fb)\.com\/(?P<profile>(?![A-z]+\.php)(?!marketplace|gaming|watch|me|messages|help|search|groups)[A-z0-9_\-\.]+)\/?/", $string)
|
|
|| preg_match("/(?:https?:)?\/\/(?:www\.)?(?:instagram\.com|instagr\.am)\/(?P<username>[A-Za-z0-9_](?:(?:[A-Za-z0-9_]|(?:\.(?!\.))){0,28}(?:[A-Za-z0-9_]))?)/", $string)
|
|
|| preg_match("/(?:https?:)?\/\/(?:[A-z]+\.)?twitter\.com\/@?(?P<username>[A-z0-9_]+)\/status\/(?P<tweet_id>[0-9]+)\/?/", $string)
|
|
|| preg_match("/(?:https?:)?\/\/(?:[A-z]+\.)?twitter\.com\/@?(?!home|share|privacy|tos)(?P<username>[A-z0-9_]+)\/?/", $string)
|
|
|| preg_match("/(?:https?:)?\/\/(?:[a-z]+\.)?reddit\.com\/(?:u(?:ser)?)\/(?P<username>[A-z0-9\-\_]*)\/?/", $string)
|
|
|| preg_match("/(?:https?:)?\/\/(?:www\.)?snapchat\.com\/add\/(?P<username>[A-z0-9\.\_\-]+)\/?/", $string)
|
|
|| preg_match("/^.*https:\/\/(?:m|www|vm)?\.?tiktok\.com\/((?:.*\b(?:(?:usr|v|embed|user|video)\/|\?shareId=|\&item_id=)(\d+))|\w+)/", $string)
|
|
|| preg_match("/(?:https?:)?\/\/(?:[\w]+\.)?linkedin\.com\/(?P<company_type>(company)|(school))\/(?P<company_permalink>[A-z0-9-À-ÿ\.]+)\/?/", $string)
|
|
|| preg_match("/(?:https?:)?\/\/(?:[\w]+\.)?linkedin\.com\/feed\/update\/urn:li:activity:(?P<activity_id>[0-9]+)\/?/", $string)
|
|
|| preg_match("/(?:https?:)?\/\/(?:[\w]+\.)?linkedin\.com\/in\/(?P<permalink>[\w\-\_À-ÿ%]+)\/?/", $string)
|
|
) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Search suggestions
|
|
--------------------------------------*/
|
|
function search_suggestion($opts, $results) {
|
|
if(array_key_exists("did_you_mean", $results)) {
|
|
$specific_result = $specific_result2 = "";
|
|
|
|
if(array_key_exists("search_specific", $results)) {
|
|
if($opts->type == 3 && count($results['search_specific']) > 1) {
|
|
// Format query url
|
|
$search_specific_url2 = "./results.php?q=".urlencode($results['search_specific'][1])."&t=".$opts->type."&a=".$opts->hash;
|
|
$specific_result2 = " or <a href=\"".$search_specific_url2."\">".$results['search_specific'][1]."</a>";
|
|
}
|
|
|
|
// Format query url
|
|
$search_specific_url = "./results.php?q=".urlencode($results['search_specific'][0])."&t=".$opts->type."&a=".$opts->hash;
|
|
$specific_result = "<br /><small>Or instead search for <a href=\"".$search_specific_url."\">".$results['search_specific'][0]."</a>".$specific_result2.".</small>";
|
|
|
|
unset($search_specific, $search_specific_url, $search_specific2, $search_specific_url2);
|
|
}
|
|
|
|
$didyoumean_url = "./results.php?q=".urlencode($results['did_you_mean'])."&t=".$opts->type."&a=".$opts->hash;
|
|
|
|
echo "<li class=\"meta\">Did you mean <a href=\"".$didyoumean_url."\">".$results['did_you_mean']."</a>?".$specific_result."</li>";
|
|
|
|
unset($didyoumean_url, $specific_result, $specific_result2);
|
|
}
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Count and format search sources
|
|
--------------------------------------*/
|
|
function search_sources($results) {
|
|
$sources = array();
|
|
foreach($results as $source => $amount) {
|
|
$plural = ($amount > 1) ? "results" : "result";
|
|
$sources[] = $amount." ".$plural." from ".$source;
|
|
}
|
|
|
|
$sources = replace_last_comma(implode(', ', $sources));
|
|
|
|
echo "<li class=\"sources\">".$sources.".</li>";
|
|
|
|
unset($sources);
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Special Search result
|
|
--------------------------------------*/
|
|
function special_search_result($opts, $results) {
|
|
if($opts->imdb_id_search == "on") {
|
|
$found = false;
|
|
foreach($results['search'] as $search_result) {
|
|
if(!$found && preg_match_all("/(imdb.com|tt[0-9]+)/i", $search_result['url'], $imdb_result) && stristr($search_result['title'], "tv series") !== false) {
|
|
$results['special'] = array(
|
|
"title" => $search_result['title'],
|
|
"text" => "Goosle found an IMDb ID for this TV Show in your results (".$imdb_result[0][1].") - <a href=\"./results.php?q=".$imdb_result[0][1]."&a=".$opts->hash."&t=9\">search for magnet links</a>?<br /><sub>An IMDb ID is detected when a TV Show is present in the results. The first match is highlighted here.</sub>"
|
|
);
|
|
$found = true;
|
|
}
|
|
}
|
|
}
|
|
if(array_key_exists("special", $results)) {
|
|
echo "<li class=\"special-result\"><article>";
|
|
echo "<div class=\"title\"><h2>".$results['special']['title']."</h2></div>";
|
|
echo "<div class=\"text\">".$results['special']['text']."</div>";
|
|
if(array_key_exists("source", $results['special'])) {
|
|
echo "<div class=\"source\"><a href=\"".$results['special']['source']."\" target=\"_blank\">".$results['special']['source']."</a></div>";
|
|
}
|
|
echo "</article></li>";
|
|
}
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Find and replace the last comma in a string
|
|
--------------------------------------*/
|
|
function replace_last_comma($string) {
|
|
$last_comma = strrpos($string, ', ');
|
|
if($last_comma !== false) {
|
|
$string = substr_replace($string, ' and ', $last_comma, 2);
|
|
}
|
|
|
|
return $string;
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Human readable file sizes
|
|
--------------------------------------*/
|
|
function human_filesize($bytes, $dec = 2) {
|
|
$size = array('B', 'kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB');
|
|
$factor = floor((strlen($bytes) - 1) / 3);
|
|
|
|
return sprintf("%.{$dec}f ", $bytes / pow(1024, $factor)) . @$size[$factor];
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Generate random strings for passwords
|
|
--------------------------------------*/
|
|
function string_generator() {
|
|
$characters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
|
|
$password = array();
|
|
$length = strlen($characters) - 1;
|
|
|
|
for ($i = 0; $i < 24; $i++) {
|
|
$n = rand(0, $length);
|
|
$password[] = $characters[$n];
|
|
}
|
|
|
|
array_splice($password, 6, 0, '-');
|
|
array_splice($password, 13, 0, '-');
|
|
array_splice($password, 20, 0, '-');
|
|
|
|
return implode($password);
|
|
}
|
|
|
|
/*--------------------------------------
|
|
// Show version in footer and do periodic update check
|
|
--------------------------------------*/
|
|
function show_version($opts) {
|
|
$cache_file = dirname(__DIR__).'/version.data';
|
|
|
|
if(!is_file($cache_file)){
|
|
// Create update cache file
|
|
$version = array('version' => "1.2", 'latest' => "0.0", "checked" => 0, "url" => "");
|
|
file_put_contents($cache_file, serialize($version));
|
|
} else {
|
|
// Get update information
|
|
$version = unserialize(file_get_contents($cache_file));
|
|
}
|
|
|
|
// Current version
|
|
$show_version = "<a href=\"https://github.com/adegans/Goosle/\" target=\"_blank\">Goosle ".$version['version']."</a>.";
|
|
|
|
if($version['checked'] < time() - 604800) {
|
|
$ch = curl_init();
|
|
set_curl_options($ch, "https://api.github.com/repos/adegans/goosle/releases/latest", $opts->user_agents);
|
|
$response = curl_exec($ch);
|
|
curl_close($ch);
|
|
|
|
$json_response = json_decode($response, true);
|
|
|
|
// No response
|
|
if(empty($json_response)) return $show_version;
|
|
|
|
// Update version info
|
|
$version = array('version' => $version['version'], 'latest' => $json_response['tag_name'], "checked" => time(), "url" => $json_response['html_url']);
|
|
file_put_contents($cache_file, serialize($version));
|
|
}
|
|
|
|
// Check if a newer version is available and add it to the version display
|
|
if(version_compare($version['version'], $version['latest'], "<")) {
|
|
$show_version .= " <a href=\"".$version['url']."\" target=\"_blank\" class=\"update\">Version ".$version['latest']." is available!</a>";
|
|
}
|
|
|
|
return $show_version;
|
|
}
|
|
?>
|