Goosle/engines/search/duckduckgo.php
Arnan de Gans 92a70e6d28 Version 1.2
1.2 - January 2, 2024
- [new] Preferred language setting for DuckDuckGo results in config.php.
- [new] Preferred language setting for Wikipedia results in config.php.
- [new] Combined DuckDuckGo, Google, Wikipedia and Ecosia (Bing) results into one page.
- [new] Ranking algorithm for search results.
- [new] Option to down-rank certain social media sites in results (Makes them show lower down the page).
- [new] Option to show the Goosle rank along with the search source.
- [new] Crawler for results from Limetorrents.lol.
- [new] Periodic check for updates in footer.
- [change] Moved duckduckgo.php and google.php into the engines/search/ folder.
- [change] Removed Wikipedia special search in favor of actual search results.
- [change] Removed 'Date Added' from 1337x results.
- [change] Removed Chrome based and Mobile user-agents, as they don't work for the WikiPedia API.
- [change] Added more trackers for generating magnet links.
- [tweak] 30-50% faster parsing of search results (couple of ms per search query).
- [tweak] Expanded the season/episode filter to all sources that support TV Shows.
- [tweak] More sensible santization of variables (Searching for html tags/basic code should now work).
- [tweak] Moved 'imdb_id_search' out from special results into its 'own' setting.
- [tweak] Moved 'password_generator' out from special results into its 'own' setting.
- [tweak] More accurate and faster Google scrape.
- [tweak] Reduced paragraph margins.
- [tweak] More code cleanup, making it more uniform.
- [fix] Prevents searching on disabled methods by 'cheating' the search type in the url.
- [fix] Better decoding for special characters in urls for search results.
- [fix] Better validation for special searches trigger words.
- [fix] Better sanitization for DuckDuckGo and Google results.
2024-01-02 00:24:27 -06:00

93 lines
3.7 KiB
PHP

<?php
/* ------------------------------------------------------------------------------------
* Goosle - A meta search engine for private and fast internet fun.
*
* COPYRIGHT NOTICE
* Copyright 2023-2024 Arnan de Gans. All Rights Reserved.
*
* COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT.
* By using this code you agree to indemnify Arnan de Gans from any
* liability that might arise from its use.
------------------------------------------------------------------------------------ */
class DuckDuckGoRequest extends EngineRequest {
public function get_request_url() {
// Split the query
$query_terms = explode(" ", strtolower($this->query), 2);
$query_terms[0] = strtolower($query_terms[0]);
// Safe search override
$safe = "-1";
if(strpos($query_terms[0], "safe") !== false) {
$switch = explode(":", $query_terms[0]);
if(!is_numeric($switch[1])) {
$safe = (strtolower($switch[1]) == "off") ? "-2" : "1";
$this->query = implode(" ", array_slice($query_terms, 1));
}
}
// All parameters and values: https://duckduckgo.com/duckduckgo-help-pages/settings/params/
// q = query
// kp = Safe search (1 = on, -1 = moderate, -2 = off (may include nsfw/illegal content))
// kl = Search results language (Works as a region setting, see params page for more supported regions: en-us, en-uk, nl-nl, es-es, fr-fr, etc.)
// kz = Instant answers (1 = on, -1 = off)
// kc = Autoload images (1 = on, -1 = off)
// kav = Autoload results (1 = on, -1 = off)
// kf = Favicons (1 = on, -1 = off)
// kaf = Full URLs (1 = on, -1 = off)
// kac = Auto suggest (1 = on, -1 = off)
// kd = Redirects (1 = on, -1 = off)
// kh = HTTPS (1 = on, -1 = off)
// kg = Get/Post (g = GET, p = POST)
// k1 = Ads (1 = on, -1 = off)
$args = array("q" => $this->query, "kl" => $this->opts->duckduckgo_language, "kp" => $safe, "kz" => "-1", "kc" => "-1", "kav" => "-1", "kf" => "-1", "kaf" => "1", "kac" => "-1", "kd" => "-1", "kh" => "1", "kg" => "g", "k1" => "-1");
$url = "https://html.duckduckgo.com/html/?".http_build_query($args);
unset($query_terms, $safe, $switch, $args);
return $url;
}
public function parse_results($response) {
$results = array();
$xpath = get_xpath($response);
if(!$xpath) return $results;
// Scrape recommended
$didyoumean = $xpath->query(".//div[@id='did_you_mean']/a[1]")[0];
if(!is_null($didyoumean)) {
$results['did_you_mean'] = $didyoumean->textContent;
}
$search_specific = $xpath->query(".//div[@id='did_you_mean']/a[2]")[0];
if(!is_null($search_specific)) {
$results['search_specific'] = $search_specific->textContent;
}
// Scrape the results
$scrape = $xpath->query("/html/body/div[1]/div[".count($xpath->query('/html/body/div[1]/div'))."]/div/div/div[contains(@class, 'web-result')]/div");
$rank = $results['amount'] = count($scrape);
foreach($scrape as $result) {
$url = $xpath->evaluate(".//h2[@class='result__title']//a/@href", $result)[0];
if($url == null) continue;
$title = $xpath->evaluate(".//h2[@class='result__title']", $result)[0];
if($title == null) continue;
$description = $xpath->evaluate(".//a[@class='result__snippet']", $result)[0];
$description = ($description == null) ? "No description was provided for this site." : htmlspecialchars(trim($description->textContent));
$url = htmlspecialchars(trim($url->textContent));
$title = htmlspecialchars(trim($title->textContent));
$id = uniqid(rand(0, 9999));
$results['search'][] = array ("id" => $id, "source" => "DuckDuckGo", "title" => $title, "url" => $url, "description" => $description, "engine_rank" => $rank);
$rank -= 1;
}
unset($response, $xpath, $scrape, $rank);
return $results;
}
}
?>