Procházet zdrojové kódy

Implement Qwant search engine

Nikodem Rabuliński před 10 měsíci
rodič
revize
709425f60d
4 změnil soubory, kde provedl 169 přidání a 0 odebrání
  1. 1 0
      src/engines/mod.rs
  2. 163 0
      src/engines/qwant.rs
  3. 4 0
      src/models/engine_models.rs
  4. 1 0
      websurfx/config.lua

+ 1 - 0
src/engines/mod.rs

@@ -8,6 +8,7 @@ pub mod brave;
 pub mod duckduckgo;
 pub mod librex;
 pub mod mojeek;
+pub mod qwant;
 pub mod search_result_parser;
 pub mod searx;
 pub mod startpage;

+ 163 - 0
src/engines/qwant.rs

@@ -0,0 +1,163 @@
+//! The `qwant` module handles the scraping of results from the qwant search engine
+//! by querying the upstream qwant search engine with user provided query and with a page
+//! number if provided.
+
+use std::collections::HashMap;
+
+use reqwest::header::HeaderMap;
+use reqwest::Client;
+use serde::Deserialize;
+
+use crate::models::aggregation_models::SearchResult;
+
+use crate::models::engine_models::{EngineError, SearchEngine};
+
+use error_stack::{Report, Result, ResultExt};
+
+/// A new Qwant engine type defined in-order to implement the `SearchEngine` trait which allows to
+/// reduce code duplication as well as allows to create vector of different search engines easily.
+pub struct Qwant;
+
+#[derive(Deserialize, Debug)]
+#[serde(rename_all = "camelCase")]
+/// Web page search result
+struct QwantSearchResult {
+    // NOTE: This object also contains `favicon`, `url_ping_suffix`, `thumbnail_url`,
+    //       `source`, and `is_family_friendly` attributes,
+    //       which we currently don't care about.
+    /// Title of the result
+    title: String,
+    /// Url of the result
+    url: String,
+    /// Description of the result
+    desc: String,
+}
+
+impl From<&QwantSearchResult> for SearchResult {
+    fn from(value: &QwantSearchResult) -> Self {
+        SearchResult::new(&value.title, &value.url, &value.desc, &["qwant"])
+    }
+}
+
+#[derive(Deserialize, Debug)]
+#[serde(rename_all = "snake_case")]
+#[serde(tag = "type")]
+/// A result which should be shown to the user
+enum QwantItem {
+    /// Results containing web pages relevant to the query
+    Web {
+        // NOTE: This object also contains `count` and `serpContextId` attributes,
+        //       which we currently don't care about.
+        /// List of web page search results
+        items: Vec<QwantSearchResult>,
+    },
+    #[serde(other)]
+    /// Other item type like "related_searches", which aren't relevant.
+    Other,
+}
+
+#[derive(Deserialize, Debug)]
+struct QwantItems {
+    // NOTE: This object also contains `headline`, `sidebar`, and `bottomline` attributes,
+    //       which we currently don't care about.
+    /// Results which should be shown in the main section of the page
+    mainline: Vec<QwantItem>,
+}
+
+#[derive(Deserialize, Debug)]
+struct QwantResult {
+    // NOTE: This object also contains `denied`, `total`, `items`, `filters`, `lastPage`,
+    //       `instrumentation`, `onlyProductAds`, and `topClassification` attributes,
+    //       which we currently don't care about.
+    /// Entries that should be shown to the user
+    items: QwantItems,
+}
+
+#[derive(Deserialize, Debug)]
+#[serde(rename_all = "snake_case")]
+#[serde(tag = "status", content = "data")]
+enum QwantApiResponse {
+    /// Success response
+    Success {
+        // NOTE: This object also contains `query` and `cache` attributes,
+        //       which we currently don't care about.
+        /// Actual results the search produced
+        result: QwantResult,
+    },
+    // TODO: Use the reported error messages
+    #[allow(unused)]
+    /// Error response
+    Error {
+        /// Machine-readable error code
+        error_code: i32,
+        #[serde(default)]
+        /// List of human-readable error messages
+        message: Vec<String>,
+    },
+}
+
+impl From<QwantApiResponse> for Result<QwantResult, EngineError> {
+    fn from(value: QwantApiResponse) -> Self {
+        match value {
+            QwantApiResponse::Success { result } => Ok(result),
+            QwantApiResponse::Error { .. } => Err(Report::new(EngineError::RequestError)),
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl SearchEngine for Qwant {
+    async fn results(
+        &self,
+        query: &str,
+        page: u32,
+        user_agent: &str,
+        client: &Client,
+        safe_search: u8,
+    ) -> Result<Vec<(String, SearchResult)>, EngineError> {
+        let results_per_page = 10;
+        let start_result = results_per_page * page;
+
+        let url: String =  format!("https://api.qwant.com/v3/search/web?q={query}&count={results_per_page}&locale=en_US&offset={start_result}&safesearch={safe_search}&device=desktop&tgp=2&displayed=true");
+
+        let header_map = HeaderMap::try_from(&HashMap::from([
+            ("User-Agent".to_string(), user_agent.to_string()),
+            ("Referer".to_string(), "https://qwant.com/".to_string()),
+            ("Origin".to_string(), "https://qwant.com".to_string()),
+        ]))
+        .change_context(EngineError::UnexpectedError)?;
+
+        let result: QwantApiResponse = client
+            .get(url)
+            .headers(header_map)
+            .send()
+            .await
+            .change_context(EngineError::RequestError)?
+            .json()
+            .await
+            .change_context(EngineError::RequestError)?;
+
+        let result = Result::from(result)?;
+
+        let results: Vec<_> = result
+            .items
+            .mainline
+            .into_iter()
+            .filter_map(|item| match item {
+                QwantItem::Web { items } => Some(items),
+                _ => None,
+            })
+            .flatten()
+            .map(|result| {
+                let search_result = SearchResult::from(&result);
+                (result.url, search_result)
+            })
+            .collect();
+
+        if results.is_empty() {
+            Err(Report::new(EngineError::EmptyResultSet))
+        } else {
+            Ok(results)
+        }
+    }
+}

+ 4 - 0
src/models/engine_models.rs

@@ -206,6 +206,10 @@ impl EngineHandler {
                     let engine = crate::engines::bing::Bing::new()?;
                     ("bing", Box::new(engine))
                 }
+                "qwant" => {
+                    let engine = crate::engines::qwant::Qwant;
+                    ("qwant", Box::new(engine))
+                }
                 _ => {
                     return Err(Report::from(EngineError::NoSuchEngineFound(
                         engine_name.to_string(),

+ 1 - 0
websurfx/config.lua

@@ -72,4 +72,5 @@ upstream_search_engines = {
     LibreX = false,
     Mojeek = false,
     Bing = false,
+    Qwant = false,
 } -- select the upstream search engines from which the results should be fetched.