Browse Source

Add option to use a proxy for outgoing (search engine) requests.

m00nwtchr 1 năm trước cách đây
mục cha
commit
07e1f663df
3 tập tin đã thay đổi với 44 bổ sung22 xóa
  1. 14 0
      src/config/parser.rs
  2. 28 22
      src/results/aggregator.rs
  3. 2 0
      websurfx/config.lua

+ 14 - 0
src/config/parser.rs

@@ -6,6 +6,7 @@ use crate::handler::{file_path, FileType};
 use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
 use log::LevelFilter;
 use mlua::Lua;
+use reqwest::Proxy;
 use std::{collections::HashMap, fs, thread::available_parallelism};
 
 /// A named struct which stores the parsed config file options.
@@ -48,6 +49,9 @@ pub struct Config {
     pub tcp_connection_keep_alive: u8,
     /// It stores the pool idle connection timeout in seconds.
     pub pool_idle_connection_timeout: u8,
+
+    /// Url of the proxy to use for outgoing requests.
+    pub proxy: Option<Proxy>,
 }
 
 impl Config {
@@ -118,6 +122,15 @@ impl Config {
             _ => parsed_cet,
         };
 
+        let proxy_str = globals.get::<_, String>("proxy")?;
+        let proxy = match Proxy::all(proxy_str) {
+            Ok(proxy) => Some(proxy),
+            Err(_) => {
+                log::error!("Invalid proxy url, defaulting to no proxy.");
+                None
+            }
+        };
+
         Ok(Config {
             port: globals.get::<_, u16>("port")?,
             binding_ip: globals.get::<_, String>("binding_ip")?,
@@ -148,6 +161,7 @@ impl Config {
             safe_search,
             #[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
             cache_expiry_time,
+            proxy,
         })
     }
 }

+ 28 - 22
src/results/aggregator.rs

@@ -76,7 +76,7 @@ pub async fn aggregate(
     safe_search: u8,
 ) -> Result<SearchResults, Box<dyn std::error::Error>> {
     let client = CLIENT.get_or_init(|| {
-        ClientBuilder::new()
+        let mut cb = ClientBuilder::new()
             .timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
             .pool_idle_timeout(Duration::from_secs(
                 config.pool_idle_connection_timeout as u64,
@@ -86,9 +86,13 @@ pub async fn aggregate(
             .https_only(true)
             .gzip(true)
             .brotli(true)
-            .http2_adaptive_window(config.adaptive_window)
-            .build()
-            .unwrap()
+            .http2_adaptive_window(config.adaptive_window);
+
+        if config.proxy.is_some() {
+            cb = cb.proxy(config.proxy.clone().unwrap());
+        }
+
+        cb.build().unwrap()
     });
 
     let user_agent: &str = random_user_agent();
@@ -247,6 +251,7 @@ pub async fn filter_with_lists(
 
     Ok(())
 }
+
 /// Sorts  SearchResults by relevance score.
 /// <br> sort_unstable is used as its faster,stability is not an issue on our side.
 /// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
@@ -262,6 +267,7 @@ fn sort_search_results(results: &mut [SearchResult]) {
             .unwrap_or(Ordering::Less)
     })
 }
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -285,15 +291,15 @@ mod tests {
             },
         ));
         map_to_be_filtered.push((
-            "https://www.rust-lang.org/".to_owned(),
-            SearchResult {
-                title: "Rust Programming Language".to_owned(),
-                url: "https://www.rust-lang.org/".to_owned(),
-                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
-                engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
-                relevance_score:0.0
-            },)
-        );
+			"https://www.rust-lang.org/".to_owned(),
+			SearchResult {
+				title: "Rust Programming Language".to_owned(),
+				url: "https://www.rust-lang.org/".to_owned(),
+				description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
+				engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
+				relevance_score: 0.0,
+			}, )
+		);
 
         // Create a temporary file with regex patterns
         let mut file = NamedTempFile::new()?;
@@ -336,15 +342,15 @@ mod tests {
             },
         ));
         map_to_be_filtered.push((
-            "https://www.rust-lang.org/".to_owned(),
-            SearchResult {
-                title: "Rust Programming Language".to_owned(),
-                url: "https://www.rust-lang.org/".to_owned(),
-                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
-                engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
-                relevance_score:0.0
-            },
-        ));
+			"https://www.rust-lang.org/".to_owned(),
+			SearchResult {
+				title: "Rust Programming Language".to_owned(),
+				url: "https://www.rust-lang.org/".to_owned(),
+				description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
+				engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
+				relevance_score: 0.0,
+			},
+		));
 
         // Create a temporary file with a regex pattern containing a wildcard
         let mut file = NamedTempFile::new()?;

+ 2 - 0
websurfx/config.lua

@@ -73,3 +73,5 @@ upstream_search_engines = {
     Mojeek = false,
     Bing = false,
 } -- select the upstream search engines from which the results should be fetched.
+
+proxy = "" -- Proxy to send outgoing requests through. Set to empty string to disable.