Goosle/engines/search/duckduckgo.php
Arnan de Gans 32c27924f5 Version 1.4
- NOTICE: config.default.php has changed, re-create your config.php!!
- [fix] Footer no longer overlaps results
- [fix] Search navigation no longer bunched up on smaller displays
- [fix] Double search type when searching from start page
- [new] Filter for additional/different headers per cURL request
- [new] Image search via Openverse API (Access token and cronjob required, see installation instructions)
- [new] Image search via Qwant API
- [new] Web (recent news) search via Qwant API
- [tweak] Merged 'cache' option into 'cache-type', see config.default.php for details
- [tweak] Better filtering for duplicate web results
- [tweak] File size formatting for images more uniform
- [tweak] Optimized curl_multi_exec handling
- [tweak] Improved SEO headers
- [tweak] Layout tweaks and optimizations for search results, header and footer
- [tweak] Removed redundant HTML, CSS and some PHP
- [tweak] MagnetDL search disabled by default because of Cloudflare (Will probably be removed in future version)
- [tweak] Removed non-functional magnet trackers
- [tweak] Added 15 extra public magnet trackers
- [change] Removed Ecosia support
- [change] Removed Reddit support
- [change] Removed 1337x support
- [change] Removed MagnetDL support
2024-05-16 19:12:14 -06:00

103 lines
4 KiB
PHP

<?php
/* ------------------------------------------------------------------------------------
* Goosle - A meta search engine for private and fast internet fun.
*
* COPYRIGHT NOTICE
* Copyright 2023-2024 Arnan de Gans. All Rights Reserved.
*
* COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT.
* By using this code you agree to indemnify Arnan de Gans from any
* liability that might arise from its use.
------------------------------------------------------------------------------------ */
class DuckDuckGoRequest extends EngineRequest {
public function get_request_url() {
// Split the query
$query_terms = explode(" ", strtolower($this->query), 2);
$query_terms[0] = strtolower($query_terms[0]);
// Safe search override
$safe = "-1";
if(strpos($query_terms[0], "safe") !== false) {
$switch = explode(":", $query_terms[0]);
if(!is_numeric($switch[1])) {
$safe = (strtolower($switch[1]) == "off") ? "-2" : "1";
$this->query = implode(" ", array_slice($query_terms, 1));
}
}
// All parameters and values: https://duckduckgo.com/duckduckgo-help-pages/settings/params/
// q = query
// kp = Safe search (1 = on, -1 = moderate, -2 = off (may include nsfw/illegal content))
// kl = Search results language (Works as a region setting, see params page for more supported regions: en-us, en-uk, nl-nl, es-es, fr-fr, etc.)
// kz = Instant answers (1 = on, -1 = off)
// kc = Autoload images (1 = on, -1 = off)
// kav = Autoload results (1 = on, -1 = off)
// kf = Favicons (1 = on, -1 = off)
// kaf = Full URLs (1 = on, -1 = off)
// kac = Auto suggest (1 = on, -1 = off)
// kd = Redirects (1 = on, -1 = off)
// kh = HTTPS (1 = on, -1 = off)
// kg = Get/Post (g = GET, p = POST)
// k1 = Ads (1 = on, -1 = off)
$args = array("q" => $this->query, "kl" => $this->opts->duckduckgo_language, "kp" => $safe, "kz" => "-1", "kc" => "-1", "kav" => "-1", "kf" => "-1", "kaf" => "1", "kac" => "-1", "kd" => "-1", "kh" => "1", "kg" => "g", "k1" => "-1");
$url = "https://html.duckduckgo.com/html/?".http_build_query($args);
unset($query_terms, $safe, $switch, $args);
return $url;
}
public function get_request_headers() {
return array(
'Accept' => 'text/html, application/xhtml+xml, application/xml;q=0.8, */*;q=0.7',
);
}
public function parse_results($response) {
$results = array();
$xpath = get_xpath($response);
if(!$xpath) return $results;
// Scrape recommended
$didyoumean = $xpath->query(".//div[@id='did_you_mean']/a[1]")[0];
if(!is_null($didyoumean)) {
$results['did_you_mean'] = $didyoumean->textContent;
}
$search_specific = $xpath->query(".//div[@id='did_you_mean']/a[2]")[0];
if(!is_null($search_specific)) {
$results['search_specific'] = $search_specific->textContent;
}
// Scrape the results
$scrape = $xpath->query("/html/body/div[1]/div[".count($xpath->query('/html/body/div[1]/div'))."]/div/div/div[contains(@class, 'web-result')]/div");
$rank = $results['amount'] = count($scrape);
foreach($scrape as $result) {
$url = $xpath->evaluate(".//h2[@class='result__title']//a/@href", $result)[0];
if(is_null($url)) continue;
$title = $xpath->evaluate(".//h2[@class='result__title']", $result)[0];
if(is_null($title)) continue;
$description = $xpath->evaluate(".//a[@class='result__snippet']", $result)[0];
$description = (is_null($description)) ? "No description was provided for this site." : sanitize($description->textContent);
$url = sanitize($url->textContent);
$title = sanitize($title->textContent);
// filter duplicate urls/results
if(!empty($results['search'])) {
if(in_array($url, array_column($results['search'], "url"))) continue;
}
$results['search'][] = array("id" => uniqid(rand(0, 9999)), "source" => "DuckDuckGo", "title" => $title, "url" => $url, "description" => $description, "engine_rank" => $rank);
$rank -= 1;
}
unset($response, $xpath, $scrape, $rank);
return $results;
}
}
?>