From 6c47ea921bc49a7be591abeda08cdc0fa303ef5d Mon Sep 17 00:00:00 2001 From: Kavin <20838718+FireMasterK@users.noreply.github.com> Date: Mon, 1 May 2023 04:22:10 +0100 Subject: [PATCH 1/2] performance: compile regex only once --- src/utils.rs | 86 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 35 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index 62a71ed..1de2868 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -5,6 +5,7 @@ use crate::{client::json, server::RequestExt}; use askama::Template; use cookie::Cookie; use hyper::{Body, Request, Response}; +use once_cell::sync::Lazy; use regex::Regex; use rust_embed::RustEmbed; use serde_json::Value; @@ -777,6 +778,21 @@ pub async fn catch_random(sub: &str, additional: &str) -> Result, } } +static REGEX_FORMAT_1: Lazy = Lazy::new(|| Regex::new(r"https://www\.reddit\.com/(.*)").unwrap()); +static REGEX_FORMAT_2: Lazy = Lazy::new(|| Regex::new(r"https://old\.reddit\.com/(.*)").unwrap()); +static REGEX_FORMAT_3: Lazy = Lazy::new(|| Regex::new(r"https://np\.reddit\.com/(.*)").unwrap()); +static REGEX_FORMAT_4: Lazy = Lazy::new(|| Regex::new(r"https://reddit\.com/(.*)").unwrap()); +static REGEX_FORMAT_5: Lazy = Lazy::new(|| Regex::new(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))").unwrap()); +static REGEX_FORMAT_6: Lazy = Lazy::new(|| Regex::new(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$").unwrap()); +static REGEX_FORMAT_7: Lazy = Lazy::new(|| Regex::new(r"https://i\.redd\.it/(.*)").unwrap()); +static REGEX_FORMAT_8: Lazy = Lazy::new(|| Regex::new(r"https://a\.thumbs\.redditmedia\.com/(.*)").unwrap()); +static REGEX_FORMAT_9: Lazy = Lazy::new(|| Regex::new(r"https://b\.thumbs\.redditmedia\.com/(.*)").unwrap()); +static REGEX_FORMAT_10: Lazy = Lazy::new(|| Regex::new(r"https://emoji\.redditmedia\.com/(.*)/(.*)").unwrap()); +static REGEX_FORMAT_11: Lazy = Lazy::new(|| Regex::new(r"https://preview\.redd\.it/(.*)").unwrap()); +static REGEX_FORMAT_12: Lazy = Lazy::new(|| Regex::new(r"https://external\-preview\.redd\.it/(.*)").unwrap()); +static REGEX_FORMAT_13: Lazy = Lazy::new(|| Regex::new(r"https://styles\.redditmedia\.com/(.*)").unwrap()); +static REGEX_FORMAT_14: Lazy = Lazy::new(|| Regex::new(r"https://www\.redditstatic\.com/(.*)").unwrap()); + // Direct urls to proxy if proxy is enabled pub fn format_url(url: &str) -> String { if url.is_empty() || url == "self" || url == "default" || url == "nsfw" || url == "spoiler" { @@ -785,13 +801,11 @@ pub fn format_url(url: &str) -> String { Url::parse(url).map_or(url.to_string(), |parsed| { let domain = parsed.domain().unwrap_or_default(); - let capture = |regex: &str, format: &str, segments: i16| { - Regex::new(regex).map_or(String::new(), |re| { - re.captures(url).map_or(String::new(), |caps| match segments { - 1 => [format, &caps[1]].join(""), - 2 => [format, &caps[1], "/", &caps[2]].join(""), - _ => String::new(), - }) + let capture = |regex: &Regex, format: &str, segments: i16| { + regex.captures(url).map_or(String::new(), |caps| match segments { + 1 => [format, &caps[1]].join(""), + 2 => [format, &caps[1], "/", &caps[2]].join(""), + _ => String::new(), }) }; @@ -817,44 +831,46 @@ pub fn format_url(url: &str) -> String { } match domain { - "www.reddit.com" => capture(r"https://www\.reddit\.com/(.*)", "/", 1), - "old.reddit.com" => capture(r"https://old\.reddit\.com/(.*)", "/", 1), - "np.reddit.com" => capture(r"https://np\.reddit\.com/(.*)", "/", 1), - "reddit.com" => capture(r"https://reddit\.com/(.*)", "/", 1), - "v.redd.it" => chain!( - capture(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))", "/vid/", 2), - capture(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$", "/hls/", 2) - ), - "i.redd.it" => capture(r"https://i\.redd\.it/(.*)", "/img/", 1), - "a.thumbs.redditmedia.com" => capture(r"https://a\.thumbs\.redditmedia\.com/(.*)", "/thumb/a/", 1), - "b.thumbs.redditmedia.com" => capture(r"https://b\.thumbs\.redditmedia\.com/(.*)", "/thumb/b/", 1), - "emoji.redditmedia.com" => capture(r"https://emoji\.redditmedia\.com/(.*)/(.*)", "/emoji/", 2), - "preview.redd.it" => capture(r"https://preview\.redd\.it/(.*)", "/preview/pre/", 1), - "external-preview.redd.it" => capture(r"https://external\-preview\.redd\.it/(.*)", "/preview/external-pre/", 1), - "styles.redditmedia.com" => capture(r"https://styles\.redditmedia\.com/(.*)", "/style/", 1), - "www.redditstatic.com" => capture(r"https://www\.redditstatic\.com/(.*)", "/static/", 1), + "www.reddit.com" => capture(®EX_FORMAT_1, "/", 1), + "old.reddit.com" => capture(®EX_FORMAT_2, "/", 1), + "np.reddit.com" => capture(®EX_FORMAT_3, "/", 1), + "reddit.com" => capture(®EX_FORMAT_4, "/", 1), + "v.redd.it" => chain!(capture(®EX_FORMAT_5, "/vid/", 2), capture(®EX_FORMAT_6, "/hls/", 2)), + "i.redd.it" => capture(®EX_FORMAT_7, "/img/", 1), + "a.thumbs.redditmedia.com" => capture(®EX_FORMAT_8, "/thumb/a/", 1), + "b.thumbs.redditmedia.com" => capture(®EX_FORMAT_9, "/thumb/b/", 1), + "emoji.redditmedia.com" => capture(®EX_FORMAT_10, "/emoji/", 2), + "preview.redd.it" => capture(®EX_FORMAT_11, "/preview/pre/", 1), + "external-preview.redd.it" => capture(®EX_FORMAT_12, "/preview/external-pre/", 1), + "styles.redditmedia.com" => capture(®EX_FORMAT_13, "/style/", 1), + "www.redditstatic.com" => capture(®EX_FORMAT_14, "/static/", 1), _ => url.to_string(), } }) } } +static REDDIT_REGEX: Lazy = Lazy::new(|| Regex::new(r#"href="(https|http|)://(www\.|old\.|np\.|amp\.|)(reddit\.com|redd\.it)/"#).unwrap()); +static REDDIT_PREVIEW_REGEX: Lazy = Lazy::new(|| Regex::new(r"https://external-preview\.redd\.it(.*)[^?]").unwrap()); + // Rewrite Reddit links to Libreddit in body of text pub fn rewrite_urls(input_text: &str) -> String { - let text1 = Regex::new(r#"href="(https|http|)://(www\.|old\.|np\.|amp\.|)(reddit\.com|redd\.it)/"#) - .map_or(String::new(), |re| re.replace_all(input_text, r#"href="/"#).to_string()) - // Remove (html-encoded) "\" from URLs. - .replace("%5C", "") - .replace('\\', ""); + let text1 = + // Rewrite Reddit links to Libreddit + REDDIT_REGEX.replace_all(input_text, r#"href="/"#) + .to_string() + // Remove (html-encoded) "\" from URLs. + .replace("%5C", "") + .replace('\\', ""); // Rewrite external media previews to Libreddit - Regex::new(r"https://external-preview\.redd\.it(.*)[^?]").map_or(String::new(), |re| { - if re.is_match(&text1) { - re.replace_all(&text1, format_url(re.find(&text1).map(|x| x.as_str()).unwrap_or_default())).to_string() - } else { - text1 - } - }) + if REDDIT_PREVIEW_REGEX.is_match(&text1) { + REDDIT_PREVIEW_REGEX + .replace_all(&text1, format_url(REDDIT_PREVIEW_REGEX.find(&text1).map(|x| x.as_str()).unwrap_or_default())) + .to_string() + } else { + text1 + } } // Format vote count to a string that will be displayed. From 4d5c52b83b28b797932b83fd2e0ef404857ec6e0 Mon Sep 17 00:00:00 2001 From: Kavin <20838718+FireMasterK@users.noreply.github.com> Date: Mon, 1 May 2023 04:55:36 +0100 Subject: [PATCH 2/2] Rename variables to more descriptive names. --- src/utils.rs | 54 ++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index 1de2868..a084af7 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -778,20 +778,20 @@ pub async fn catch_random(sub: &str, additional: &str) -> Result, } } -static REGEX_FORMAT_1: Lazy = Lazy::new(|| Regex::new(r"https://www\.reddit\.com/(.*)").unwrap()); -static REGEX_FORMAT_2: Lazy = Lazy::new(|| Regex::new(r"https://old\.reddit\.com/(.*)").unwrap()); -static REGEX_FORMAT_3: Lazy = Lazy::new(|| Regex::new(r"https://np\.reddit\.com/(.*)").unwrap()); -static REGEX_FORMAT_4: Lazy = Lazy::new(|| Regex::new(r"https://reddit\.com/(.*)").unwrap()); -static REGEX_FORMAT_5: Lazy = Lazy::new(|| Regex::new(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))").unwrap()); -static REGEX_FORMAT_6: Lazy = Lazy::new(|| Regex::new(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$").unwrap()); -static REGEX_FORMAT_7: Lazy = Lazy::new(|| Regex::new(r"https://i\.redd\.it/(.*)").unwrap()); -static REGEX_FORMAT_8: Lazy = Lazy::new(|| Regex::new(r"https://a\.thumbs\.redditmedia\.com/(.*)").unwrap()); -static REGEX_FORMAT_9: Lazy = Lazy::new(|| Regex::new(r"https://b\.thumbs\.redditmedia\.com/(.*)").unwrap()); -static REGEX_FORMAT_10: Lazy = Lazy::new(|| Regex::new(r"https://emoji\.redditmedia\.com/(.*)/(.*)").unwrap()); -static REGEX_FORMAT_11: Lazy = Lazy::new(|| Regex::new(r"https://preview\.redd\.it/(.*)").unwrap()); -static REGEX_FORMAT_12: Lazy = Lazy::new(|| Regex::new(r"https://external\-preview\.redd\.it/(.*)").unwrap()); -static REGEX_FORMAT_13: Lazy = Lazy::new(|| Regex::new(r"https://styles\.redditmedia\.com/(.*)").unwrap()); -static REGEX_FORMAT_14: Lazy = Lazy::new(|| Regex::new(r"https://www\.redditstatic\.com/(.*)").unwrap()); +static REGEX_URL_WWW: Lazy = Lazy::new(|| Regex::new(r"https://www\.reddit\.com/(.*)").unwrap()); +static REGEX_URL_OLD: Lazy = Lazy::new(|| Regex::new(r"https://old\.reddit\.com/(.*)").unwrap()); +static REGEX_URL_NP: Lazy = Lazy::new(|| Regex::new(r"https://np\.reddit\.com/(.*)").unwrap()); +static REGEX_URL_PLAIN: Lazy = Lazy::new(|| Regex::new(r"https://reddit\.com/(.*)").unwrap()); +static REGEX_URL_VIDEOS: Lazy = Lazy::new(|| Regex::new(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))").unwrap()); +static REGEX_URL_VIDEOS_HLS: Lazy = Lazy::new(|| Regex::new(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$").unwrap()); +static REGEX_URL_IMAGES: Lazy = Lazy::new(|| Regex::new(r"https://i\.redd\.it/(.*)").unwrap()); +static REGEX_URL_THUMBS_A: Lazy = Lazy::new(|| Regex::new(r"https://a\.thumbs\.redditmedia\.com/(.*)").unwrap()); +static REGEX_URL_THUMBS_B: Lazy = Lazy::new(|| Regex::new(r"https://b\.thumbs\.redditmedia\.com/(.*)").unwrap()); +static REGEX_URL_EMOJI: Lazy = Lazy::new(|| Regex::new(r"https://emoji\.redditmedia\.com/(.*)/(.*)").unwrap()); +static REGEX_URL_PREVIEW: Lazy = Lazy::new(|| Regex::new(r"https://preview\.redd\.it/(.*)").unwrap()); +static REGEX_URL_EXTERNAL_PREVIEW: Lazy = Lazy::new(|| Regex::new(r"https://external\-preview\.redd\.it/(.*)").unwrap()); +static REGEX_URL_STYLES: Lazy = Lazy::new(|| Regex::new(r"https://styles\.redditmedia\.com/(.*)").unwrap()); +static REGEX_URL_STATIC_MEDIA: Lazy = Lazy::new(|| Regex::new(r"https://www\.redditstatic\.com/(.*)").unwrap()); // Direct urls to proxy if proxy is enabled pub fn format_url(url: &str) -> String { @@ -831,19 +831,19 @@ pub fn format_url(url: &str) -> String { } match domain { - "www.reddit.com" => capture(®EX_FORMAT_1, "/", 1), - "old.reddit.com" => capture(®EX_FORMAT_2, "/", 1), - "np.reddit.com" => capture(®EX_FORMAT_3, "/", 1), - "reddit.com" => capture(®EX_FORMAT_4, "/", 1), - "v.redd.it" => chain!(capture(®EX_FORMAT_5, "/vid/", 2), capture(®EX_FORMAT_6, "/hls/", 2)), - "i.redd.it" => capture(®EX_FORMAT_7, "/img/", 1), - "a.thumbs.redditmedia.com" => capture(®EX_FORMAT_8, "/thumb/a/", 1), - "b.thumbs.redditmedia.com" => capture(®EX_FORMAT_9, "/thumb/b/", 1), - "emoji.redditmedia.com" => capture(®EX_FORMAT_10, "/emoji/", 2), - "preview.redd.it" => capture(®EX_FORMAT_11, "/preview/pre/", 1), - "external-preview.redd.it" => capture(®EX_FORMAT_12, "/preview/external-pre/", 1), - "styles.redditmedia.com" => capture(®EX_FORMAT_13, "/style/", 1), - "www.redditstatic.com" => capture(®EX_FORMAT_14, "/static/", 1), + "www.reddit.com" => capture(®EX_URL_WWW, "/", 1), + "old.reddit.com" => capture(®EX_URL_OLD, "/", 1), + "np.reddit.com" => capture(®EX_URL_NP, "/", 1), + "reddit.com" => capture(®EX_URL_PLAIN, "/", 1), + "v.redd.it" => chain!(capture(®EX_URL_VIDEOS, "/vid/", 2), capture(®EX_URL_VIDEOS_HLS, "/hls/", 2)), + "i.redd.it" => capture(®EX_URL_IMAGES, "/img/", 1), + "a.thumbs.redditmedia.com" => capture(®EX_URL_THUMBS_A, "/thumb/a/", 1), + "b.thumbs.redditmedia.com" => capture(®EX_URL_THUMBS_B, "/thumb/b/", 1), + "emoji.redditmedia.com" => capture(®EX_URL_EMOJI, "/emoji/", 2), + "preview.redd.it" => capture(®EX_URL_PREVIEW, "/preview/pre/", 1), + "external-preview.redd.it" => capture(®EX_URL_EXTERNAL_PREVIEW, "/preview/external-pre/", 1), + "styles.redditmedia.com" => capture(®EX_URL_STYLES, "/style/", 1), + "www.redditstatic.com" => capture(®EX_URL_STATIC_MEDIA, "/static/", 1), _ => url.to_string(), } })