Goosle/engines/magnet/nyaa.php
Arnan de Gans aab57e1be0 Version 1.3
- [fix] Image search crawler filters out non-image results better
- [new] Crawler for results from magnetdl.com
- [new] Direct Reddit.com search, search for 'Top Posts' created in the past year
- [new] Added NSFW filter for Reddit results in config.default.php
- [new] YTS movie highlights now link to YTS website when clicking the title
- [new] Placeholder image for missing eztv highlight thumbnails
- [tweak] Better hash matching for duplicate magnet results
- [tweak] Better checking for missing/empty values in image search results
- [tweak] Code cleanup
- [tweak] More uniform code/variable names
- [change] Naming overhaul - Replaced 'Torrent' with 'Magnet' throughout most of Goosle
2024-04-11 19:46:30 -06:00

70 lines
2.7 KiB
PHP

<?php
/* ------------------------------------------------------------------------------------
* Goosle - A meta search engine for private and fast internet fun.
*
* COPYRIGHT NOTICE
* Copyright 2023-2024 Arnan de Gans. All Rights Reserved.
*
* COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT.
* By using this code you agree to indemnify Arnan de Gans from any
* liability that might arise from its use.
------------------------------------------------------------------------------------ */
class NyaaRequest extends EngineRequest {
public function get_request_url() {
$args = array("q" => $this->query);
$url = "https://nyaa.si/?".http_build_query($args);
unset($args);
return $url;
}
public function parse_results($response) {
$results = array();
$xpath = get_xpath($response);
// Failed to load page
if(!$xpath) return $results;
// Scrape the page
foreach($xpath->query("//tbody/tr") as $result) {
$meta = $xpath->evaluate(".//td[@class='text-center']", $result);
$name = sanitize($xpath->evaluate(".//td[@colspan='2']//a[not(contains(@class, 'comments'))]/@title", $result)[0]->textContent);
$magnet = sanitize($xpath->evaluate(".//a[2]/@href", $meta[0])[0]->textContent);
$hash = parse_url($magnet, PHP_URL_QUERY);
parse_str($hash, $hash_parameters);
$hash = strtolower(str_replace("urn:btih:", "", $hash_parameters['xt']));
$seeders = sanitize($meta[3]->textContent);
$leechers = sanitize($meta[4]->textContent);
$size = str_replace("GiB", "GB", str_replace("MiB", "MB", sanitize($meta[1]->textContent)));
// Ignore results with 0 seeders?
if($this->opts->show_zero_seeders == "off" AND $seeders == 0) continue;
// Get extra data
$category = sanitize($xpath->evaluate(".//td[1]//a/@title", $result)[0]->textContent);
$category = str_replace(" - ", "/", $category);
$url = "https://nyaa.si".sanitize($xpath->evaluate(".//td[@colspan='2']//a[not(contains(@class, 'comments'))]/@href", $result)[0]->textContent);
$date_added = sanitize($meta[2]->textContent);
$date_added = explode("-", substr($date_added, 0, 10));
$date_added = mktime(0, 0, 0, intval($date_added[1]), intval($date_added[2]), intval($date_added[0]));
// Filter episodes
if(!is_season_or_episode($this->query, $name)) continue;
$id = uniqid(rand(0, 9999));
$results[] = array (
// Required
"id" => $id, "source" => "nyaa.si", "name" => $name, "magnet" => $magnet, "hash" => $hash, "seeders" => $seeders, "leechers" => $leechers, "size" => $size,
// Extra
"category" => $category, "url" => $url, "date_added" => $date_added
);
}
unset($response, $xpath);
return $results;
}
}
?>