From 3ac918702715227b8f1a8b7c0a042dbbbd2bf05d Mon Sep 17 00:00:00 2001 From: Sean McArthur Date: Tue, 6 Aug 2024 08:24:15 -0400 Subject: [PATCH 1/2] wip: proxy-env --- Cargo.toml | 12 +- src/client/mod.rs | 4 + src/client/proxy/matcher.rs | 462 ++++++++++++++++++++++++++++++++++++ src/client/proxy/mod.rs | 5 + 4 files changed, 479 insertions(+), 4 deletions(-) create mode 100644 src/client/proxy/matcher.rs create mode 100644 src/client/proxy/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 9088a465..4232af11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,13 +18,16 @@ features = ["full"] rustdoc-args = ["--cfg", "docsrs"] [dependencies] -hyper = "1.4.0" +base64 = { version = "0.22", optional = true } +bytes = "1.7.1" +futures-channel = { version = "0.3", optional = true } futures-util = { version = "0.3.16", default-features = false } http = "1.0" http-body = "1.0.0" -bytes = "1.7.1" +hyper = "1.4.0" +ipnet = { version = "2.9", optional = true } +percent-encoding = { version = "2.3", optional = true } pin-project-lite = "0.2.4" -futures-channel = { version = "0.3", optional = true } socket2 = { version = "0.5", optional = true, features = ["all"] } tracing = { version = "0.1", default-features = false, features = ["std"], optional = true } tokio = { version = "1", optional = true, default-features = false } @@ -42,7 +45,7 @@ pretty_env_logger = "0.5" pnet_datalink = "0.35.0" [features] -default = [] +default = ["client-proxy-env"] # Shorthand to enable everything full = [ @@ -59,6 +62,7 @@ full = [ client = ["hyper/client", "dep:tracing", "dep:futures-channel", "dep:tower-service"] client-legacy = ["client", "dep:socket2", "tokio/sync"] +client-proxy-env = ["client", "dep:base64", "dep:ipnet", "dep:percent-encoding"] server = ["hyper/server"] server-auto = ["server", "http1", "http2"] diff --git a/src/client/mod.rs b/src/client/mod.rs index e9215425..360a1cb2 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -3,3 +3,7 @@ /// Legacy implementations of `connect` module and `Client` #[cfg(feature = "client-legacy")] pub mod legacy; + +// for now, no others features use this mod +//#[cfg(feature = "client-proxy-env")] +pub mod proxy; diff --git a/src/client/proxy/matcher.rs b/src/client/proxy/matcher.rs new file mode 100644 index 00000000..c7e96e30 --- /dev/null +++ b/src/client/proxy/matcher.rs @@ -0,0 +1,462 @@ +use std::fmt; +use std::net::IpAddr; + +use http::header::HeaderValue; +use ipnet::IpNet; +use percent_encoding::percent_decode_str; + +/// A proxy matcher built using standard environment variables. +pub struct Matcher { + http: Option, + https: Option, + no: NoProxy, +} + +#[derive(Clone)] +pub struct Intercept { + uri: http::Uri, + basic_auth: Option, + raw_auth: Option<(String, String)>, +} + +#[derive(Default)] +struct Builder { + is_cgi: bool, + all: String, + http: String, + https: String, + no: String, +} + +struct NoProxy { + ips: IpMatcher, + domains: DomainMatcher, +} + +struct DomainMatcher(Vec); + +struct IpMatcher(Vec); + +enum Ip { + Address(IpAddr), + Network(IpNet), +} + +// ===== impl Matcher ===== + +impl Matcher { + /// Create a matcher reading the current environment variables. + pub fn from_env() -> Self { + Builder::from_env().build() + } + + /* + pub fn builder() -> Builder { + Builder::from_env().build() + } + */ + + /// Check if the destination should be intercepted by a proxy. + /// + /// If the proxy rules match the destination, a new `Uri` will be returned + /// to connect to. + pub fn intercept(&self, dst: &http::Uri) -> Option<&Intercept> { + if self.no.contains(dst.host()?) { + return None; + } + + match dst.scheme_str() { + Some("http") => self.http.as_ref(), + Some("https") => self.https.as_ref(), + _ => None, + } + } +} + +// ===== impl Intercept ===== + +impl Intercept { + pub fn uri(&self) -> &http::Uri { + &self.uri + } + + pub fn basic_auth(&self) -> Option<&HeaderValue> { + self.basic_auth.as_ref() + } + + pub fn raw_auth(&self) -> Option<(&str, &str)> { + self.raw_auth.as_ref().map(|&(ref u, ref p)| (&**u, &**p)) + } +} + +impl fmt::Debug for Intercept { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Intercept") + .field("uri", &self.uri) + // dont output auth, its sensitive + .finish() + } +} + +// ===== impl Builder ===== + +impl Builder { + fn from_env() -> Self { + Builder { + is_cgi: std::env::var_os("REQUEST_METHOD").is_some(), + all: get_first_env(&["ALL_PROXY", "all_proxy"]), + http: get_first_env(&["HTTP_PROXY", "http_proxy"]), + https: get_first_env(&["HTTPS_PROXY", "https_proxy"]), + no: get_first_env(&["NO_PROXY", "no_proxy"]), + } + } + + fn build(self) -> Matcher { + if self.is_cgi { + return Matcher { + http: None, + https: None, + no: NoProxy::empty(), + }; + } + + let all = parse_env_uri(&self.all); + + Matcher { + http: parse_env_uri(&self.http).or_else(|| all.clone()), + https: parse_env_uri(&self.https).or(all), + no: NoProxy::from_string(&self.no), + } + } +} + +fn get_first_env(names: &[&str]) -> String { + for name in names { + if let Ok(val) = std::env::var(name) { + return val; + } + } + + String::new() +} + +fn parse_env_uri(val: &str) -> Option { + let uri = val.parse::().ok()?; + let mut builder = http::Uri::builder(); + let mut is_httpish = false; + let mut basic_auth = None; + let mut raw_auth = None; + + builder = builder.scheme(match uri.scheme() { + Some(s) => { + if s == &http::uri::Scheme::HTTP || s == &http::uri::Scheme::HTTPS { + is_httpish = true; + s.clone() + } else if s.as_str() == "socks5" || s.as_str() == "socks5h" { + s.clone() + } else { + // can't use this proxy scheme + return None; + } + } + // if no scheme provided, assume they meant 'http' + None => { + is_httpish = true; + http::uri::Scheme::HTTP + }, + }); + + let authority = uri.authority()?; + + if let Some((userinfo, host_port)) = authority.as_str().split_once('@') { + let (user, pass) = userinfo.split_once(':')?; + let user = percent_decode_str(user).decode_utf8_lossy(); + let pass = percent_decode_str(pass).decode_utf8_lossy(); + if is_httpish { + basic_auth = Some(encode_basic_auth(&user, Some(&pass))); + } else { + raw_auth = Some((user.into(), pass.into())); + } + builder = builder.authority(host_port); + } else { + builder = builder.authority(authority.clone()); + } + + // removing any path, but we MUST specify one or the builder errors + builder = builder.path_and_query("/"); + + let dst = builder.build().ok()?; + + Some(Intercept { + uri: dst, + basic_auth, + raw_auth, + }) +} + +fn encode_basic_auth(user: &str, pass: Option<&str>) -> HeaderValue { + use base64::prelude::BASE64_STANDARD; + use base64::write::EncoderWriter; + use std::io::Write; + + let mut buf = b"Basic ".to_vec(); + { + let mut encoder = EncoderWriter::new(&mut buf, &BASE64_STANDARD); + let _ = write!(encoder, "{user}:"); + if let Some(password) = pass { + let _ = write!(encoder, "{password}"); + } + } + let mut header = HeaderValue::from_bytes(&buf).expect("base64 is always valid HeaderValue"); + header.set_sensitive(true); + header +} + +impl NoProxy { + /* + fn from_env() -> NoProxy { + let raw = std::env::var("NO_PROXY") + .or_else(|_| std::env::var("no_proxy")) + .unwrap_or_default(); + + Self::from_string(&raw) + } + */ + + fn empty() -> NoProxy { + NoProxy { + ips: IpMatcher(Vec::new()), + domains: DomainMatcher(Vec::new()), + } + } + + /// Returns a new no-proxy configuration based on a `no_proxy` string (or `None` if no variables + /// are set) + /// The rules are as follows: + /// * The environment variable `NO_PROXY` is checked, if it is not set, `no_proxy` is checked + /// * If neither environment variable is set, `None` is returned + /// * Entries are expected to be comma-separated (whitespace between entries is ignored) + /// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size, + /// for example "`192.168.1.0/24`"). + /// * An entry "`*`" matches all hostnames (this is the only wildcard allowed) + /// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com` + /// and `.google.com` are equivalent) and would match both that domain AND all subdomains. + /// + /// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all of the following would match + /// (and therefore would bypass the proxy): + /// * `http://google.com/` + /// * `http://www.google.com/` + /// * `http://192.168.1.42/` + /// + /// The URL `http://notgoogle.com/` would not match. + fn from_string(no_proxy_list: &str) -> Self { + let mut ips = Vec::new(); + let mut domains = Vec::new(); + let parts = no_proxy_list.split(',').map(str::trim); + for part in parts { + match part.parse::() { + // If we can parse an IP net or address, then use it, otherwise, assume it is a domain + Ok(ip) => ips.push(Ip::Network(ip)), + Err(_) => match part.parse::() { + Ok(addr) => ips.push(Ip::Address(addr)), + Err(_) => domains.push(part.to_owned()), + }, + } + } + NoProxy { + ips: IpMatcher(ips), + domains: DomainMatcher(domains), + } + } + + fn contains(&self, host: &str) -> bool { + // According to RFC3986, raw IPv6 hosts will be wrapped in []. So we need to strip those off + // the end in order to parse correctly + let host = if host.starts_with('[') { + let x: &[_] = &['[', ']']; + host.trim_matches(x) + } else { + host + }; + match host.parse::() { + // If we can parse an IP addr, then use it, otherwise, assume it is a domain + Ok(ip) => self.ips.contains(ip), + Err(_) => self.domains.contains(host), + } + } +} + +impl IpMatcher { + fn contains(&self, addr: IpAddr) -> bool { + for ip in &self.0 { + match ip { + Ip::Address(address) => { + if &addr == address { + return true; + } + } + Ip::Network(net) => { + if net.contains(&addr) { + return true; + } + } + } + } + false + } +} + +impl DomainMatcher { + // The following links may be useful to understand the origin of these rules: + // * https://curl.se/libcurl/c/CURLOPT_NOPROXY.html + // * https://github.com/curl/curl/issues/1208 + fn contains(&self, domain: &str) -> bool { + let domain_len = domain.len(); + for d in &self.0 { + if d == domain || d.strip_prefix('.') == Some(domain) { + return true; + } else if domain.ends_with(d) { + if d.starts_with('.') { + // If the first character of d is a dot, that means the first character of domain + // must also be a dot, so we are looking at a subdomain of d and that matches + return true; + } else if domain.as_bytes().get(domain_len - d.len() - 1) == Some(&b'.') { + // Given that d is a prefix of domain, if the prior character in domain is a dot + // then that means we must be matching a subdomain of d, and that matches + return true; + } + } else if d == "*" { + return true; + } + } + false + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_domain_matcher() { + let domains = vec![".foo.bar".into(), "bar.foo".into()]; + let matcher = DomainMatcher(domains); + + // domains match with leading `.` + assert!(matcher.contains("foo.bar")); + // subdomains match with leading `.` + assert!(matcher.contains("www.foo.bar")); + + // domains match with no leading `.` + assert!(matcher.contains("bar.foo")); + // subdomains match with no leading `.` + assert!(matcher.contains("www.bar.foo")); + + // non-subdomain string prefixes don't match + assert!(!matcher.contains("notfoo.bar")); + assert!(!matcher.contains("notbar.foo")); + } + + #[test] + fn test_no_proxy_wildcard() { + let no_proxy = NoProxy::from_string("*"); + assert!(no_proxy.contains("any.where")); + } + + #[test] + fn test_no_proxy_ip_ranges() { + let no_proxy = + NoProxy::from_string(".foo.bar, bar.baz,10.42.1.1/24,::1,10.124.7.8,2001::/17"); + + let should_not_match = [ + // random url, not in no_proxy + "hyper.rs", + // make sure that random non-subdomain string prefixes don't match + "notfoo.bar", + // make sure that random non-subdomain string prefixes don't match + "notbar.baz", + // ipv4 address out of range + "10.43.1.1", + // ipv4 address out of range + "10.124.7.7", + // ipv6 address out of range + "[ffff:db8:a0b:12f0::1]", + // ipv6 address out of range + "[2005:db8:a0b:12f0::1]", + ]; + + for host in &should_not_match { + assert!(!no_proxy.contains(host), "should not contain {:?}", host); + } + + let should_match = [ + // make sure subdomains (with leading .) match + "hello.foo.bar", + // make sure exact matches (without leading .) match (also makes sure spaces between entries work) + "bar.baz", + // make sure subdomains (without leading . in no_proxy) match + "foo.bar.baz", + // make sure subdomains (without leading . in no_proxy) match - this differs from cURL + "foo.bar", + // ipv4 address match within range + "10.42.1.100", + // ipv6 address exact match + "[::1]", + // ipv6 address match within range + "[2001:db8:a0b:12f0::1]", + // ipv4 address exact match + "10.124.7.8", + ]; + + for host in &should_match { + assert!(no_proxy.contains(host), "should contain {:?}", host); + } + } + + macro_rules! p { + ($($n:ident = $v:expr,)*) => ({Builder { + $($n: $v.into(),)* + ..Builder::default() + }.build()}); + } + + fn intercept<'a>(p: &'a Matcher, u: &str) -> &'a Intercept { + p.intercept(&u.parse().unwrap()).unwrap() + } + + #[test] + fn test_all_proxy() { + let p = p! { + all = "http://om.nom", + }; + + assert_eq!( + "http://om.nom", + intercept(&p, "http://example.com").uri() + ); + + assert_eq!( + "http://om.nom", + intercept(&p, "https://example.com").uri() + ); + } + + #[test] + fn test_specific_overrides_all() { + let p = p! { + all = "http://no.pe", + http = "http://y.ep", + }; + + assert_eq!( + "http://no.pe", + intercept(&p, "https://example.com").uri() + ); + + // the http rule is "more specific" than the all rule + assert_eq!( + "http://y.ep", + intercept(&p, "http://example.com").uri() + ); + } +} diff --git a/src/client/proxy/mod.rs b/src/client/proxy/mod.rs new file mode 100644 index 00000000..492b7885 --- /dev/null +++ b/src/client/proxy/mod.rs @@ -0,0 +1,5 @@ +//! Proxy utilities + +mod matcher; + +pub use self::matcher::Matcher; From 23f0d0158f1793e413d66d7402d5f30b8ab22cee Mon Sep 17 00:00:00 2001 From: Andrzej Sulkowski Date: Thu, 6 Mar 2025 09:47:57 +0100 Subject: [PATCH 2/2] feat(client): Add system proxy support for macOS #3850 --- Cargo.toml | 29 ++- src/client/proxy/builder.rs | 315 ++++++++++++++++++++++++++++ src/client/proxy/matcher.rs | 390 +++++------------------------------ src/client/proxy/mod.rs | 3 + src/client/proxy/no_proxy.rs | 208 +++++++++++++++++++ src/client/proxy/utils.rs | 86 ++++++++ 6 files changed, 691 insertions(+), 340 deletions(-) create mode 100644 src/client/proxy/builder.rs create mode 100644 src/client/proxy/no_proxy.rs create mode 100644 src/client/proxy/utils.rs diff --git a/Cargo.toml b/Cargo.toml index 4232af11..fa490409 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,11 @@ repository = "https://github.com/hyperium/hyper-util" license = "MIT" authors = ["Sean McArthur "] keywords = ["http", "hyper", "hyperium"] -categories = ["network-programming", "web-programming::http-client", "web-programming::http-server"] +categories = [ + "network-programming", + "web-programming::http-client", + "web-programming::http-server", +] edition = "2021" rust-version = "1.63" @@ -29,10 +33,19 @@ ipnet = { version = "2.9", optional = true } percent-encoding = { version = "2.3", optional = true } pin-project-lite = "0.2.4" socket2 = { version = "0.5", optional = true, features = ["all"] } -tracing = { version = "0.1", default-features = false, features = ["std"], optional = true } -tokio = { version = "1", optional = true, default-features = false } +tracing = { version = "0.1", default-features = false, features = [ + "std", +], optional = true } +tokio = { version = "1", optional = true, default-features = false } tower-service = { version = "0.3", optional = true } +# Conditional dependencies for system proxy support +[target.'cfg(target_os = "macos")'.dependencies] +system-configuration = { version = "0.6.1", optional = true } + +[target.'cfg(target_os = "windows")'.dependencies] +winreg = { version = "0.55.0", optional = true } + [dev-dependencies] hyper = { version = "1.4.0", features = ["full"] } bytes = "1" @@ -58,9 +71,15 @@ full = [ "http1", "http2", "tokio", + "system-proxies" ] -client = ["hyper/client", "dep:tracing", "dep:futures-channel", "dep:tower-service"] +client = [ + "hyper/client", + "dep:tracing", + "dep:futures-channel", + "dep:tower-service", +] client-legacy = ["client", "dep:socket2", "tokio/sync"] client-proxy-env = ["client", "dep:base64", "dep:ipnet", "dep:percent-encoding"] @@ -75,6 +94,8 @@ http2 = ["hyper/http2"] tokio = ["dep:tokio", "tokio/net", "tokio/rt", "tokio/time"] +system-proxies = ["system-configuration", "winreg"] + # internal features used in CI __internal_happy_eyeballs_tests = [] diff --git a/src/client/proxy/builder.rs b/src/client/proxy/builder.rs new file mode 100644 index 00000000..2a5f0d6f --- /dev/null +++ b/src/client/proxy/builder.rs @@ -0,0 +1,315 @@ +use super::no_proxy::NoProxy; +use super::utils::{get_first_env, parse_env_uri}; +use super::Matcher; + +#[derive(Default)] +pub struct Builder { + pub(crate) is_cgi: bool, + pub(crate) all: String, + pub(crate) http: String, + pub(crate) https: String, + pub(crate) no: String, +} + +// ===== impl Builder ===== +impl Builder { + pub(crate) fn from_env() -> Self { + Builder { + is_cgi: std::env::var_os("REQUEST_METHOD").is_some(), + all: get_first_env(&["ALL_PROXY", "all_proxy"]), + http: get_first_env(&["HTTP_PROXY", "http_proxy"]), + https: get_first_env(&["HTTPS_PROXY", "https_proxy"]), + no: get_first_env(&["NO_PROXY", "no_proxy"]), + } + } + + /// Set a proxy for all schemes (ALL_PROXY equivalent). + pub fn all_proxy(mut self, proxy: impl Into) -> Self { + self.all = proxy.into(); + self + } + + /// Set a proxy for HTTP schemes (HTTP_PROXY equivalent). + pub fn http_proxy(mut self, proxy: impl Into) -> Self { + self.http = proxy.into(); + self + } + + /// Set a proxy for HTTPS schemes (HTTPS_PROXY equivalent). + pub fn https_proxy(mut self, proxy: impl Into) -> Self { + self.https = proxy.into(); + self + } + + /// Set no-proxy rules (NO_PROXY equivalent). + pub fn no_proxy(mut self, no_proxy: impl Into) -> Self { + self.no = no_proxy.into(); + self + } + + pub(crate) fn build(self) -> Matcher { + if self.is_cgi { + return Matcher { + http: None, + https: None, + no: NoProxy::empty(), + }; + } + + let all = parse_env_uri(&self.all); + + Matcher { + http: parse_env_uri(&self.http).or_else(|| all.clone()), + https: parse_env_uri(&self.https).or(all), + no: NoProxy::from_string(&self.no), + } + } +} + +// ===== MacOS Builder System Proxies ===== +#[cfg(feature = "system-proxies")] +#[cfg(target_os = "macos")] +mod macos_proxies { + use super::*; + + use system_configuration::core_foundation::array::CFArray; + use system_configuration::core_foundation::base::{CFType, TCFType, TCFTypeRef}; + use system_configuration::core_foundation::dictionary::CFDictionary; + use system_configuration::core_foundation::number::CFNumber; + use system_configuration::core_foundation::string::{CFString, CFStringRef}; + use system_configuration::dynamic_store::{SCDynamicStore, SCDynamicStoreBuilder}; + + impl Builder { + // Helper function to check if a proxy is enabled + fn is_proxy_enabled(&self, prefix: &str, proxies: &CFDictionary) -> bool { + let key = format!("{}Enable", prefix); + proxies + .find(CFString::new(&key)) + .map(|val| { + // Try to get the value as i32 directly + unsafe { + let num_ref = val.as_concrete_TypeRef(); + if num_ref.is_null() { + return false; + } + let num = CFNumber::wrap_under_get_rule(num_ref as *const _); + num.to_i32() == Some(1) + } + }) + .unwrap_or(false) + } + // Helper function to get a string value + fn get_string( + &self, + key: &str, + proxies: &CFDictionary, + ) -> Option { + proxies + .find(CFString::new(key)) + .map(|val| unsafe { + let str_ref = val.as_concrete_TypeRef(); + if str_ref.is_null() { + return None; + } + let cfstr = CFString::wrap_under_get_rule(str_ref as *const _); + Some(cfstr.to_string()) + }) + .flatten() + } + // Helper function to get an integer value + fn get_int(&self, key: &str, proxies: &CFDictionary) -> Option { + proxies + .find(CFString::new(key)) + .map(|val| unsafe { + let num_ref = val.as_concrete_TypeRef(); + if num_ref.is_null() { + return None; + } + let num = CFNumber::wrap_under_get_rule(num_ref as *const _); + num.to_i32() + }) + .flatten() + } + + pub fn from_system_proxy(mut self) -> Self { + let store = SCDynamicStoreBuilder::new("proxy-fetcher").build(); + + if let Some(proxies) = store.get_proxies() { + let (http, https, no) = self.extract_system_proxy(proxies); + + if let Some(http_proxy) = http { + self.http = http_proxy; + } + if let Some(https_proxy) = https { + self.https = https_proxy; + } + if let Some(no_proxy) = no { + self.no = no_proxy; + } + } + + self + } + pub(crate) fn extract_system_proxy( + &self, + proxies: CFDictionary, + ) -> (Option, Option, Option) { + let mut http: Option = None; + let mut https: Option = None; + let mut no: Option = None; + + // Process HTTP proxy + if self.is_proxy_enabled("HTTP", &proxies) { + if let Some(host) = self.get_string("HTTPProxy", &proxies) { + let port = self.get_int("HTTPPort", &proxies); + http = match port { + Some(p) => Some(format!("http://{}:{}", host, p)), + None => Some(format!("http://{}", host)), + }; + } + } + + // Process HTTPS proxy + if self.is_proxy_enabled("HTTPS", &proxies) { + if let Some(host) = self.get_string("HTTPSProxy", &proxies) { + let port = self.get_int("HTTPSPort", &proxies); + https = match port { + Some(p) => Some(format!("https://{}:{}", host, p)), + None => Some(format!("https://{}", host)), + }; + } + } + + // Process exceptions (NO_PROXY) + if let Some(exceptions_ref) = proxies.find(CFString::new("ExceptionsList")) { + if let Some(arr) = exceptions_ref.downcast::() { + let exceptions: Vec = arr + .iter() + .filter_map(|item| unsafe { + // Get the raw pointer value + let ptr = item.as_void_ptr(); + if ptr.is_null() { + return None; + } + // Try to convert it to a CFString + let cfstr = CFString::wrap_under_get_rule(ptr as *const _); + Some(cfstr.to_string()) + }) + .collect(); + no = Some(exceptions.join(",")); + } + } + + (http, https, no) + } + } + + #[cfg(test)] + mod tests { + use super::*; + use crate::client::proxy::Matcher; + use system_configuration::core_foundation::array::CFArray; + use std::{net::IpAddr, str::FromStr}; + + struct MockSCDynamicStore { + pairs: Vec<(CFString, CFType)>, + } + + impl MockSCDynamicStore { + fn new() -> Self { + let mut keys = Vec::new(); + let mut values = Vec::new(); + + // HTTP proxy enabled + keys.push(CFString::new("HTTPEnable")); + values.push(CFNumber::from(1).as_CFType()); + + // HTTP proxy host and port + keys.push(CFString::new("HTTPProxy")); + values.push(CFString::new("test-proxy.example.com").as_CFType()); + keys.push(CFString::new("HTTPPort")); + values.push(CFNumber::from(8080).as_CFType()); + + // HTTPS proxy enabled + keys.push(CFString::new("HTTPSEnable")); + values.push(CFNumber::from(1).as_CFType()); + // HTTPS proxy host and port + keys.push(CFString::new("HTTPSProxy")); + values.push(CFString::new("secure-proxy.example.com").as_CFType()); + keys.push(CFString::new("HTTPSPort")); + values.push(CFNumber::from(8443).as_CFType()); + + // Exception list + keys.push(CFString::new("ExceptionsList")); + let exceptions = vec![ + CFString::new("localhost").as_CFType(), + CFString::new("127.0.0.1").as_CFType(), + CFString::new("*.local").as_CFType(), + ]; + values.push(CFArray::from_CFTypes(&exceptions).as_CFType()); + + let pairs = keys + .iter() + .map(|k| k.clone()) + .zip(values.iter().map(|v| v.as_CFType())) + .collect::>(); + + MockSCDynamicStore { pairs } + } + + fn get_proxies(&self) -> Option> { + let proxies = CFDictionary::from_CFType_pairs(&self.pairs.clone()); + Some(proxies) + } + } + + #[test] + fn test_mac_os_proxy_mocked() { + let mock_store = MockSCDynamicStore::new(); + let proxies = mock_store.get_proxies().unwrap(); + let (http, https, ns) = Matcher::builder().extract_system_proxy(proxies); + + assert!(http.is_some()); + assert!(https.is_some()); + assert!(ns.is_some()); + } + + #[ignore] + #[test] + fn test_mac_os_proxy() { + let matcher = Matcher::builder().from_system_proxy().build(); + assert!(matcher + .http + .unwrap() + .uri + .eq("http://proxy.example.com:8080")); + assert!(matcher + .https + .unwrap() + .uri + .eq("https://proxy.example.com:8080")); + + assert!(matcher.no.domains.contains("ebay.com")); + assert!(matcher.no.domains.contains("amazon.com")); + + let ip = IpAddr::from_str("54.239.28.85").unwrap(); + assert!(matcher.no.ips.contains(ip)); + } + } +} + +// ===== Windows Builder System Proxies ===== +#[cfg(feature = "system-proxies")] +#[cfg(target_os = "win")] +mod win_proxies { + impl Builder { + pub fn from_system_proxy(mut self) -> Self { + todo!("Load Win system proxy settings"); + } + } + + #[cfg(test)] + mod tests { + use super::*; + } +} diff --git a/src/client/proxy/matcher.rs b/src/client/proxy/matcher.rs index c7e96e30..808698de 100644 --- a/src/client/proxy/matcher.rs +++ b/src/client/proxy/matcher.rs @@ -1,46 +1,25 @@ use std::fmt; -use std::net::IpAddr; use http::header::HeaderValue; -use ipnet::IpNet; -use percent_encoding::percent_decode_str; +use super::builder::Builder; +use super::no_proxy::NoProxy; + /// A proxy matcher built using standard environment variables. +#[derive(Debug)] pub struct Matcher { - http: Option, - https: Option, - no: NoProxy, + pub (crate) http: Option, + pub (crate) https: Option, + pub (crate) no: NoProxy, } #[derive(Clone)] pub struct Intercept { - uri: http::Uri, - basic_auth: Option, - raw_auth: Option<(String, String)>, -} - -#[derive(Default)] -struct Builder { - is_cgi: bool, - all: String, - http: String, - https: String, - no: String, + pub (crate) uri: http::Uri, + pub (crate) basic_auth: Option, + pub (crate) raw_auth: Option<(String, String)>, } -struct NoProxy { - ips: IpMatcher, - domains: DomainMatcher, -} - -struct DomainMatcher(Vec); - -struct IpMatcher(Vec); - -enum Ip { - Address(IpAddr), - Network(IpNet), -} // ===== impl Matcher ===== @@ -50,11 +29,10 @@ impl Matcher { Builder::from_env().build() } - /* + /// Create a builder to configure a Matcher programmatically. pub fn builder() -> Builder { - Builder::from_env().build() + Builder::default() } - */ /// Check if the destination should be intercepted by a proxy. /// @@ -98,244 +76,52 @@ impl fmt::Debug for Intercept { } } -// ===== impl Builder ===== - -impl Builder { - fn from_env() -> Self { - Builder { - is_cgi: std::env::var_os("REQUEST_METHOD").is_some(), - all: get_first_env(&["ALL_PROXY", "all_proxy"]), - http: get_first_env(&["HTTP_PROXY", "http_proxy"]), - https: get_first_env(&["HTTPS_PROXY", "https_proxy"]), - no: get_first_env(&["NO_PROXY", "no_proxy"]), - } - } - - fn build(self) -> Matcher { - if self.is_cgi { - return Matcher { - http: None, - https: None, - no: NoProxy::empty(), - }; - } - - let all = parse_env_uri(&self.all); - - Matcher { - http: parse_env_uri(&self.http).or_else(|| all.clone()), - https: parse_env_uri(&self.https).or(all), - no: NoProxy::from_string(&self.no), - } - } -} - -fn get_first_env(names: &[&str]) -> String { - for name in names { - if let Ok(val) = std::env::var(name) { - return val; - } - } - - String::new() -} - -fn parse_env_uri(val: &str) -> Option { - let uri = val.parse::().ok()?; - let mut builder = http::Uri::builder(); - let mut is_httpish = false; - let mut basic_auth = None; - let mut raw_auth = None; - - builder = builder.scheme(match uri.scheme() { - Some(s) => { - if s == &http::uri::Scheme::HTTP || s == &http::uri::Scheme::HTTPS { - is_httpish = true; - s.clone() - } else if s.as_str() == "socks5" || s.as_str() == "socks5h" { - s.clone() - } else { - // can't use this proxy scheme - return None; - } - } - // if no scheme provided, assume they meant 'http' - None => { - is_httpish = true; - http::uri::Scheme::HTTP - }, - }); - - let authority = uri.authority()?; - - if let Some((userinfo, host_port)) = authority.as_str().split_once('@') { - let (user, pass) = userinfo.split_once(':')?; - let user = percent_decode_str(user).decode_utf8_lossy(); - let pass = percent_decode_str(pass).decode_utf8_lossy(); - if is_httpish { - basic_auth = Some(encode_basic_auth(&user, Some(&pass))); - } else { - raw_auth = Some((user.into(), pass.into())); - } - builder = builder.authority(host_port); - } else { - builder = builder.authority(authority.clone()); - } - - // removing any path, but we MUST specify one or the builder errors - builder = builder.path_and_query("/"); - - let dst = builder.build().ok()?; - - Some(Intercept { - uri: dst, - basic_auth, - raw_auth, - }) -} - -fn encode_basic_auth(user: &str, pass: Option<&str>) -> HeaderValue { - use base64::prelude::BASE64_STANDARD; - use base64::write::EncoderWriter; - use std::io::Write; - - let mut buf = b"Basic ".to_vec(); - { - let mut encoder = EncoderWriter::new(&mut buf, &BASE64_STANDARD); - let _ = write!(encoder, "{user}:"); - if let Some(password) = pass { - let _ = write!(encoder, "{password}"); - } - } - let mut header = HeaderValue::from_bytes(&buf).expect("base64 is always valid HeaderValue"); - header.set_sensitive(true); - header -} - -impl NoProxy { - /* - fn from_env() -> NoProxy { - let raw = std::env::var("NO_PROXY") - .or_else(|_| std::env::var("no_proxy")) - .unwrap_or_default(); +#[cfg(test)] +mod tests { + use crate::client::proxy::no_proxy::DomainMatcher; + use super::*; - Self::from_string(&raw) - } - */ + #[test] + fn test_manual_configuration() { + let matcher = Matcher::builder() + .http_proxy("http://proxy.example.com:8080") + .no_proxy("localhost, 127.0.0.1") + .build(); + + // HTTP URL should use the proxy + let intercept = matcher.intercept(&"http://example.com".parse().unwrap()); + assert!(intercept.is_some()); + assert_eq!( + intercept.unwrap().uri().to_string(), + "http://proxy.example.com:8080/" + ); - fn empty() -> NoProxy { - NoProxy { - ips: IpMatcher(Vec::new()), - domains: DomainMatcher(Vec::new()), - } - } + // No-proxy hosts should bypass the proxy + let intercept = matcher.intercept(&"http://localhost".parse().unwrap()); + assert!(intercept.is_none()); - /// Returns a new no-proxy configuration based on a `no_proxy` string (or `None` if no variables - /// are set) - /// The rules are as follows: - /// * The environment variable `NO_PROXY` is checked, if it is not set, `no_proxy` is checked - /// * If neither environment variable is set, `None` is returned - /// * Entries are expected to be comma-separated (whitespace between entries is ignored) - /// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size, - /// for example "`192.168.1.0/24`"). - /// * An entry "`*`" matches all hostnames (this is the only wildcard allowed) - /// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com` - /// and `.google.com` are equivalent) and would match both that domain AND all subdomains. - /// - /// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all of the following would match - /// (and therefore would bypass the proxy): - /// * `http://google.com/` - /// * `http://www.google.com/` - /// * `http://192.168.1.42/` - /// - /// The URL `http://notgoogle.com/` would not match. - fn from_string(no_proxy_list: &str) -> Self { - let mut ips = Vec::new(); - let mut domains = Vec::new(); - let parts = no_proxy_list.split(',').map(str::trim); - for part in parts { - match part.parse::() { - // If we can parse an IP net or address, then use it, otherwise, assume it is a domain - Ok(ip) => ips.push(Ip::Network(ip)), - Err(_) => match part.parse::() { - Ok(addr) => ips.push(Ip::Address(addr)), - Err(_) => domains.push(part.to_owned()), - }, - } - } - NoProxy { - ips: IpMatcher(ips), - domains: DomainMatcher(domains), - } + let intercept = matcher.intercept(&"http://127.0.0.1".parse().unwrap()); + assert!(intercept.is_none()); } - fn contains(&self, host: &str) -> bool { - // According to RFC3986, raw IPv6 hosts will be wrapped in []. So we need to strip those off - // the end in order to parse correctly - let host = if host.starts_with('[') { - let x: &[_] = &['[', ']']; - host.trim_matches(x) - } else { - host - }; - match host.parse::() { - // If we can parse an IP addr, then use it, otherwise, assume it is a domain - Ok(ip) => self.ips.contains(ip), - Err(_) => self.domains.contains(host), - } - } -} + #[test] + fn test_all_proxy_manual() { + let matcher = Matcher::builder() + .all_proxy("http://all.proxy.com:9999") + .build(); -impl IpMatcher { - fn contains(&self, addr: IpAddr) -> bool { - for ip in &self.0 { - match ip { - Ip::Address(address) => { - if &addr == address { - return true; - } - } - Ip::Network(net) => { - if net.contains(&addr) { - return true; - } - } - } - } - false - } -} + let intercept = matcher.intercept(&"http://example.com".parse().unwrap()); + assert_eq!( + intercept.unwrap().uri().to_string(), + "http://all.proxy.com:9999/" + ); -impl DomainMatcher { - // The following links may be useful to understand the origin of these rules: - // * https://curl.se/libcurl/c/CURLOPT_NOPROXY.html - // * https://github.com/curl/curl/issues/1208 - fn contains(&self, domain: &str) -> bool { - let domain_len = domain.len(); - for d in &self.0 { - if d == domain || d.strip_prefix('.') == Some(domain) { - return true; - } else if domain.ends_with(d) { - if d.starts_with('.') { - // If the first character of d is a dot, that means the first character of domain - // must also be a dot, so we are looking at a subdomain of d and that matches - return true; - } else if domain.as_bytes().get(domain_len - d.len() - 1) == Some(&b'.') { - // Given that d is a prefix of domain, if the prior character in domain is a dot - // then that means we must be matching a subdomain of d, and that matches - return true; - } - } else if d == "*" { - return true; - } - } - false + let intercept = matcher.intercept(&"https://example.com".parse().unwrap()); + assert_eq!( + intercept.unwrap().uri().to_string(), + "http://all.proxy.com:9999/" + ); } -} - -#[cfg(test)] -mod tests { - use super::*; #[test] fn test_domain_matcher() { @@ -357,62 +143,6 @@ mod tests { assert!(!matcher.contains("notbar.foo")); } - #[test] - fn test_no_proxy_wildcard() { - let no_proxy = NoProxy::from_string("*"); - assert!(no_proxy.contains("any.where")); - } - - #[test] - fn test_no_proxy_ip_ranges() { - let no_proxy = - NoProxy::from_string(".foo.bar, bar.baz,10.42.1.1/24,::1,10.124.7.8,2001::/17"); - - let should_not_match = [ - // random url, not in no_proxy - "hyper.rs", - // make sure that random non-subdomain string prefixes don't match - "notfoo.bar", - // make sure that random non-subdomain string prefixes don't match - "notbar.baz", - // ipv4 address out of range - "10.43.1.1", - // ipv4 address out of range - "10.124.7.7", - // ipv6 address out of range - "[ffff:db8:a0b:12f0::1]", - // ipv6 address out of range - "[2005:db8:a0b:12f0::1]", - ]; - - for host in &should_not_match { - assert!(!no_proxy.contains(host), "should not contain {:?}", host); - } - - let should_match = [ - // make sure subdomains (with leading .) match - "hello.foo.bar", - // make sure exact matches (without leading .) match (also makes sure spaces between entries work) - "bar.baz", - // make sure subdomains (without leading . in no_proxy) match - "foo.bar.baz", - // make sure subdomains (without leading . in no_proxy) match - this differs from cURL - "foo.bar", - // ipv4 address match within range - "10.42.1.100", - // ipv6 address exact match - "[::1]", - // ipv6 address match within range - "[2001:db8:a0b:12f0::1]", - // ipv4 address exact match - "10.124.7.8", - ]; - - for host in &should_match { - assert!(no_proxy.contains(host), "should contain {:?}", host); - } - } - macro_rules! p { ($($n:ident = $v:expr,)*) => ({Builder { $($n: $v.into(),)* @@ -430,15 +160,9 @@ mod tests { all = "http://om.nom", }; - assert_eq!( - "http://om.nom", - intercept(&p, "http://example.com").uri() - ); + assert_eq!("http://om.nom", intercept(&p, "http://example.com").uri()); - assert_eq!( - "http://om.nom", - intercept(&p, "https://example.com").uri() - ); + assert_eq!("http://om.nom", intercept(&p, "https://example.com").uri()); } #[test] @@ -448,15 +172,9 @@ mod tests { http = "http://y.ep", }; - assert_eq!( - "http://no.pe", - intercept(&p, "https://example.com").uri() - ); + assert_eq!("http://no.pe", intercept(&p, "https://example.com").uri()); // the http rule is "more specific" than the all rule - assert_eq!( - "http://y.ep", - intercept(&p, "http://example.com").uri() - ); + assert_eq!("http://y.ep", intercept(&p, "http://example.com").uri()); } } diff --git a/src/client/proxy/mod.rs b/src/client/proxy/mod.rs index 492b7885..c0f6c59f 100644 --- a/src/client/proxy/mod.rs +++ b/src/client/proxy/mod.rs @@ -1,5 +1,8 @@ //! Proxy utilities mod matcher; +mod builder; +mod no_proxy; +mod utils; pub use self::matcher::Matcher; diff --git a/src/client/proxy/no_proxy.rs b/src/client/proxy/no_proxy.rs new file mode 100644 index 00000000..7e404525 --- /dev/null +++ b/src/client/proxy/no_proxy.rs @@ -0,0 +1,208 @@ +use std::net::IpAddr; +use ipnet::IpNet; + +#[derive(Debug)] +pub struct DomainMatcher(pub (crate) Vec); + +#[derive(Debug)] +pub struct IpMatcher(pub (crate) Vec); + +#[derive(Debug)] +pub enum Ip { + Address(IpAddr), + Network(IpNet), +} + +#[derive(Debug)] +pub struct NoProxy { + pub (crate) ips: IpMatcher, + pub (crate) domains: DomainMatcher, +} + +// ===== impl NoProxy ===== + +impl NoProxy { + /* + fn from_env() -> NoProxy { + let raw = std::env::var("NO_PROXY") + .or_else(|_| std::env::var("no_proxy")) + .unwrap_or_default(); + + Self::from_string(&raw) + } + */ + + pub fn empty() -> NoProxy { + NoProxy { + ips: IpMatcher(Vec::new()), + domains: DomainMatcher(Vec::new()), + } + } + + /// Returns a new no-proxy configuration based on a `no_proxy` string (or `None` if no variables + /// are set) + /// The rules are as follows: + /// * The environment variable `NO_PROXY` is checked, if it is not set, `no_proxy` is checked + /// * If neither environment variable is set, `None` is returned + /// * Entries are expected to be comma-separated (whitespace between entries is ignored) + /// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size, + /// for example "`192.168.1.0/24`"). + /// * An entry "`*`" matches all hostnames (this is the only wildcard allowed) + /// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com` + /// and `.google.com` are equivalent) and would match both that domain AND all subdomains. + /// + /// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all of the following would match + /// (and therefore would bypass the proxy): + /// * `http://google.com/` + /// * `http://www.google.com/` + /// * `http://192.168.1.42/` + /// + /// The URL `http://notgoogle.com/` would not match. + pub fn from_string(no_proxy_list: &str) -> Self { + let mut ips = Vec::new(); + let mut domains = Vec::new(); + let parts = no_proxy_list.split(',').map(str::trim); + for part in parts { + match part.parse::() { + // If we can parse an IP net or address, then use it, otherwise, assume it is a domain + Ok(ip) => ips.push(Ip::Network(ip)), + Err(_) => match part.parse::() { + Ok(addr) => ips.push(Ip::Address(addr)), + Err(_) => domains.push(part.to_owned()), + }, + } + } + NoProxy { + ips: IpMatcher(ips), + domains: DomainMatcher(domains), + } + } + + pub fn contains(&self, host: &str) -> bool { + // According to RFC3986, raw IPv6 hosts will be wrapped in []. So we need to strip those off + // the end in order to parse correctly + let host = if host.starts_with('[') { + let x: &[_] = &['[', ']']; + host.trim_matches(x) + } else { + host + }; + match host.parse::() { + // If we can parse an IP addr, then use it, otherwise, assume it is a domain + Ok(ip) => self.ips.contains(ip), + Err(_) => self.domains.contains(host), + } + } +} + +// ===== impl IpMatcher ===== + +impl IpMatcher { + pub fn contains(&self, addr: IpAddr) -> bool { + for ip in &self.0 { + match ip { + Ip::Address(address) => { + if &addr == address { + return true; + } + } + Ip::Network(net) => { + if net.contains(&addr) { + return true; + } + } + } + } + false + } +} + +// ===== impl DomainMatcher ===== + +impl DomainMatcher { + // The following links may be useful to understand the origin of these rules: + // * https://curl.se/libcurl/c/CURLOPT_NOPROXY.html + // * https://github.com/curl/curl/issues/1208 + pub fn contains(&self, domain: &str) -> bool { + let domain_len = domain.len(); + for d in &self.0 { + if d == domain || d.strip_prefix('.') == Some(domain) { + return true; + } else if domain.ends_with(d) { + if d.starts_with('.') { + // If the first character of d is a dot, that means the first character of domain + // must also be a dot, so we are looking at a subdomain of d and that matches + return true; + } else if domain.as_bytes().get(domain_len - d.len() - 1) == Some(&b'.') { + // Given that d is a prefix of domain, if the prior character in domain is a dot + // then that means we must be matching a subdomain of d, and that matches + return true; + } + } else if d == "*" { + return true; + } + } + false + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_proxy_wildcard() { + let no_proxy = NoProxy::from_string("*"); + assert!(no_proxy.contains("any.where")); + } + + #[test] + fn test_no_proxy_ip_ranges() { + let no_proxy = + NoProxy::from_string(".foo.bar, bar.baz,10.42.1.1/24,::1,10.124.7.8,2001::/17"); + + let should_not_match = [ + // random url, not in no_proxy + "hyper.rs", + // make sure that random non-subdomain string prefixes don't match + "notfoo.bar", + // make sure that random non-subdomain string prefixes don't match + "notbar.baz", + // ipv4 address out of range + "10.43.1.1", + // ipv4 address out of range + "10.124.7.7", + // ipv6 address out of range + "[ffff:db8:a0b:12f0::1]", + // ipv6 address out of range + "[2005:db8:a0b:12f0::1]", + ]; + + for host in &should_not_match { + assert!(!no_proxy.contains(host), "should not contain {:?}", host); + } + + let should_match = [ + // make sure subdomains (with leading .) match + "hello.foo.bar", + // make sure exact matches (without leading .) match (also makes sure spaces between entries work) + "bar.baz", + // make sure subdomains (without leading . in no_proxy) match + "foo.bar.baz", + // make sure subdomains (without leading . in no_proxy) match - this differs from cURL + "foo.bar", + // ipv4 address match within range + "10.42.1.100", + // ipv6 address exact match + "[::1]", + // ipv6 address match within range + "[2001:db8:a0b:12f0::1]", + // ipv4 address exact match + "10.124.7.8", + ]; + + for host in &should_match { + assert!(no_proxy.contains(host), "should contain {:?}", host); + } + } +} diff --git a/src/client/proxy/utils.rs b/src/client/proxy/utils.rs new file mode 100644 index 00000000..cbb8f3c3 --- /dev/null +++ b/src/client/proxy/utils.rs @@ -0,0 +1,86 @@ +use http::HeaderValue; +use percent_encoding::percent_decode_str; +use super::matcher::Intercept; + + +pub fn get_first_env(names: &[&str]) -> String { + for name in names { + if let Ok(val) = std::env::var(name) { + return val; + } + } + + String::new() +} + +pub fn parse_env_uri(val: &str) -> Option { + let uri = val.parse::().ok()?; + let mut builder = http::Uri::builder(); + let mut is_httpish = false; + let mut basic_auth = None; + let mut raw_auth = None; + + builder = builder.scheme(match uri.scheme() { + Some(s) => { + if s == &http::uri::Scheme::HTTP || s == &http::uri::Scheme::HTTPS { + is_httpish = true; + s.clone() + } else if s.as_str() == "socks5" || s.as_str() == "socks5h" { + s.clone() + } else { + // can't use this proxy scheme + return None; + } + } + // if no scheme provided, assume they meant 'http' + None => { + is_httpish = true; + http::uri::Scheme::HTTP + } + }); + + let authority = uri.authority()?; + + if let Some((userinfo, host_port)) = authority.as_str().split_once('@') { + let (user, pass) = userinfo.split_once(':')?; + let user = percent_decode_str(user).decode_utf8_lossy(); + let pass = percent_decode_str(pass).decode_utf8_lossy(); + if is_httpish { + basic_auth = Some(encode_basic_auth(&user, Some(&pass))); + } else { + raw_auth = Some((user.into(), pass.into())); + } + builder = builder.authority(host_port); + } else { + builder = builder.authority(authority.clone()); + } + + // removing any path, but we MUST specify one or the builder errors + builder = builder.path_and_query("/"); + + let dst = builder.build().ok()?; + + Some(Intercept { + uri: dst, + basic_auth, + raw_auth, + }) +} + +pub fn encode_basic_auth(user: &str, pass: Option<&str>) -> HeaderValue { + use base64::prelude::BASE64_STANDARD; + use base64::write::EncoderWriter; + use std::io::Write; + + let mut buf = b"Basic ".to_vec(); + { + let mut encoder = EncoderWriter::new(&mut buf, &BASE64_STANDARD); + let _ = write!(encoder, "{user}:"); + if let Some(password) = pass { + let _ = write!(encoder, "{password}"); + } + } + let mut header = HeaderValue::from_bytes(&buf).expect("base64 is always valid HeaderValue"); + header.set_sensitive(true); + header +}