
1.2 - January 2, 2024 - [new] Preferred language setting for DuckDuckGo results in config.php. - [new] Preferred language setting for Wikipedia results in config.php. - [new] Combined DuckDuckGo, Google, Wikipedia and Ecosia (Bing) results into one page. - [new] Ranking algorithm for search results. - [new] Option to down-rank certain social media sites in results (Makes them show lower down the page). - [new] Option to show the Goosle rank along with the search source. - [new] Crawler for results from Limetorrents.lol. - [new] Periodic check for updates in footer. - [change] Moved duckduckgo.php and google.php into the engines/search/ folder. - [change] Removed Wikipedia special search in favor of actual search results. - [change] Removed 'Date Added' from 1337x results. - [change] Removed Chrome based and Mobile user-agents, as they don't work for the WikiPedia API. - [change] Added more trackers for generating magnet links. - [tweak] 30-50% faster parsing of search results (couple of ms per search query). - [tweak] Expanded the season/episode filter to all sources that support TV Shows. - [tweak] More sensible santization of variables (Searching for html tags/basic code should now work). - [tweak] Moved 'imdb_id_search' out from special results into its 'own' setting. - [tweak] Moved 'password_generator' out from special results into its 'own' setting. - [tweak] More accurate and faster Google scrape. - [tweak] Reduced paragraph margins. - [tweak] More code cleanup, making it more uniform. - [fix] Prevents searching on disabled methods by 'cheating' the search type in the url. - [fix] Better decoding for special characters in urls for search results. - [fix] Better validation for special searches trigger words. - [fix] Better sanitization for DuckDuckGo and Google results.
93 lines
3.7 KiB
PHP
93 lines
3.7 KiB
PHP
<?php
|
|
/* ------------------------------------------------------------------------------------
|
|
* Goosle - A meta search engine for private and fast internet fun.
|
|
*
|
|
* COPYRIGHT NOTICE
|
|
* Copyright 2023-2024 Arnan de Gans. All Rights Reserved.
|
|
*
|
|
* COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT.
|
|
* By using this code you agree to indemnify Arnan de Gans from any
|
|
* liability that might arise from its use.
|
|
------------------------------------------------------------------------------------ */
|
|
class DuckDuckGoRequest extends EngineRequest {
|
|
public function get_request_url() {
|
|
// Split the query
|
|
$query_terms = explode(" ", strtolower($this->query), 2);
|
|
$query_terms[0] = strtolower($query_terms[0]);
|
|
|
|
// Safe search override
|
|
$safe = "-1";
|
|
if(strpos($query_terms[0], "safe") !== false) {
|
|
$switch = explode(":", $query_terms[0]);
|
|
|
|
if(!is_numeric($switch[1])) {
|
|
$safe = (strtolower($switch[1]) == "off") ? "-2" : "1";
|
|
$this->query = implode(" ", array_slice($query_terms, 1));
|
|
}
|
|
}
|
|
|
|
// All parameters and values: https://duckduckgo.com/duckduckgo-help-pages/settings/params/
|
|
// q = query
|
|
// kp = Safe search (1 = on, -1 = moderate, -2 = off (may include nsfw/illegal content))
|
|
// kl = Search results language (Works as a region setting, see params page for more supported regions: en-us, en-uk, nl-nl, es-es, fr-fr, etc.)
|
|
// kz = Instant answers (1 = on, -1 = off)
|
|
// kc = Autoload images (1 = on, -1 = off)
|
|
// kav = Autoload results (1 = on, -1 = off)
|
|
// kf = Favicons (1 = on, -1 = off)
|
|
// kaf = Full URLs (1 = on, -1 = off)
|
|
// kac = Auto suggest (1 = on, -1 = off)
|
|
// kd = Redirects (1 = on, -1 = off)
|
|
// kh = HTTPS (1 = on, -1 = off)
|
|
// kg = Get/Post (g = GET, p = POST)
|
|
// k1 = Ads (1 = on, -1 = off)
|
|
|
|
$args = array("q" => $this->query, "kl" => $this->opts->duckduckgo_language, "kp" => $safe, "kz" => "-1", "kc" => "-1", "kav" => "-1", "kf" => "-1", "kaf" => "1", "kac" => "-1", "kd" => "-1", "kh" => "1", "kg" => "g", "k1" => "-1");
|
|
$url = "https://html.duckduckgo.com/html/?".http_build_query($args);
|
|
|
|
unset($query_terms, $safe, $switch, $args);
|
|
|
|
return $url;
|
|
}
|
|
|
|
public function parse_results($response) {
|
|
$results = array();
|
|
$xpath = get_xpath($response);
|
|
|
|
if(!$xpath) return $results;
|
|
|
|
// Scrape recommended
|
|
$didyoumean = $xpath->query(".//div[@id='did_you_mean']/a[1]")[0];
|
|
if(!is_null($didyoumean)) {
|
|
$results['did_you_mean'] = $didyoumean->textContent;
|
|
}
|
|
$search_specific = $xpath->query(".//div[@id='did_you_mean']/a[2]")[0];
|
|
if(!is_null($search_specific)) {
|
|
$results['search_specific'] = $search_specific->textContent;
|
|
}
|
|
|
|
// Scrape the results
|
|
$scrape = $xpath->query("/html/body/div[1]/div[".count($xpath->query('/html/body/div[1]/div'))."]/div/div/div[contains(@class, 'web-result')]/div");
|
|
$rank = $results['amount'] = count($scrape);
|
|
foreach($scrape as $result) {
|
|
$url = $xpath->evaluate(".//h2[@class='result__title']//a/@href", $result)[0];
|
|
if($url == null) continue;
|
|
|
|
$title = $xpath->evaluate(".//h2[@class='result__title']", $result)[0];
|
|
if($title == null) continue;
|
|
|
|
$description = $xpath->evaluate(".//a[@class='result__snippet']", $result)[0];
|
|
$description = ($description == null) ? "No description was provided for this site." : htmlspecialchars(trim($description->textContent));
|
|
|
|
$url = htmlspecialchars(trim($url->textContent));
|
|
$title = htmlspecialchars(trim($title->textContent));
|
|
$id = uniqid(rand(0, 9999));
|
|
|
|
$results['search'][] = array ("id" => $id, "source" => "DuckDuckGo", "title" => $title, "url" => $url, "description" => $description, "engine_rank" => $rank);
|
|
$rank -= 1;
|
|
}
|
|
unset($response, $xpath, $scrape, $rank);
|
|
|
|
return $results;
|
|
}
|
|
}
|
|
?>
|