diff --git a/Cargo.toml b/Cargo.toml index 22e6e855..4d6bbd3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,13 +18,16 @@ features = ["full"] rustdoc-args = ["--cfg", "docsrs"] [dependencies] -hyper = "1.6.0" +base64 = { version = "0.22", optional = true } +bytes = "1.7.1" +futures-channel = { version = "0.3", optional = true } futures-util = { version = "0.3.16", default-features = false } http = "1.0" http-body = "1.0.0" -bytes = "1.7.1" +hyper = "1.6.0" +ipnet = { version = "2.9", optional = true } +percent-encoding = { version = "2.3", optional = true } pin-project-lite = "0.2.4" -futures-channel = { version = "0.3", optional = true } socket2 = { version = "0.5.9", optional = true, features = ["all"] } tracing = { version = "0.1", default-features = false, features = ["std"], optional = true } tokio = { version = "1", optional = true, default-features = false } @@ -61,6 +64,7 @@ full = [ client = ["hyper/client", "dep:tracing", "dep:futures-channel", "dep:tower-service"] client-legacy = ["client", "dep:socket2", "tokio/sync", "dep:libc"] +client-proxy = ["client", "dep:base64", "dep:ipnet", "dep:percent-encoding"] server = ["hyper/server"] server-auto = ["server", "http1", "http2"] diff --git a/src/client/mod.rs b/src/client/mod.rs index e9215425..0d896030 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -3,3 +3,6 @@ /// Legacy implementations of `connect` module and `Client` #[cfg(feature = "client-legacy")] pub mod legacy; + +#[cfg(feature = "client-proxy")] +pub mod proxy; diff --git a/src/client/proxy/matcher.rs b/src/client/proxy/matcher.rs new file mode 100644 index 00000000..a9b5bd5f --- /dev/null +++ b/src/client/proxy/matcher.rs @@ -0,0 +1,700 @@ +//! Proxy matchers +//! +//! This module contains different matchers to configure rules for when a proxy +//! should be used, and if so, with what arguments. +//! +//! A [`Matcher`] can be constructed either using environment variables, or +//! a [`Matcher::builder()`]. +//! +//! Once constructed, the `Matcher` can be asked if it intercepts a `Uri` by +//! calling [`Matcher::intercept()`]. +//! +//! An [`Intercept`] includes the destination for the proxy, and any parsed +//! authentication to be used. + +use std::fmt; +use std::net::IpAddr; + +use http::header::HeaderValue; +use ipnet::IpNet; +use percent_encoding::percent_decode_str; + +#[cfg(docsrs)] +pub use self::builder::IntoValue; +#[cfg(not(docsrs))] +use self::builder::IntoValue; + +/// A proxy matcher, usually built from environment variables. +pub struct Matcher { + http: Option, + https: Option, + no: NoProxy, +} + +/// A matched proxy, +/// +/// This is returned by a matcher if a proxy should be used. +#[derive(Clone)] +pub struct Intercept { + uri: http::Uri, + auth: Auth, +} + +/// A builder to create a [`Matcher`]. +/// +/// Construct with [`Matcher::builder()`]. +#[derive(Default)] +pub struct Builder { + is_cgi: bool, + all: String, + http: String, + https: String, + no: String, +} + +#[derive(Clone)] +enum Auth { + Empty, + Basic(http::header::HeaderValue), + Raw(String, String), +} + +/// A filter for proxy matchers. +/// +/// This type is based off the `NO_PROXY` rules used by curl. +#[derive(Clone, Debug, Default)] +struct NoProxy { + ips: IpMatcher, + domains: DomainMatcher, +} + +#[derive(Clone, Debug, Default)] +struct DomainMatcher(Vec); + +#[derive(Clone, Debug, Default)] +struct IpMatcher(Vec); + +#[derive(Clone, Debug)] +enum Ip { + Address(IpAddr), + Network(IpNet), +} + +// ===== impl Matcher ===== + +impl Matcher { + /// Create a matcher reading the current environment variables. + /// + /// This checks for values in the following variables, treating them the + /// same as curl does: + /// + /// - `ALL_PROXY`/`all_proxy` + /// - `HTTPS_PROXY`/`https_proxy` + /// - `HTTP_PROXY`/`http_proxy` + /// - `NO_PROXY`/`no_proxy` + pub fn from_env() -> Self { + Builder::from_env().build() + } + + /// Start a builder to configure a matcher. + pub fn builder() -> Builder { + Builder::default() + } + + /// Check if the destination should be intercepted by a proxy. + /// + /// If the proxy rules match the destination, a new `Uri` will be returned + /// to connect to. + pub fn intercept(&self, dst: &http::Uri) -> Option { + // TODO(perf): don't need to check `no` if below doesn't match... + if self.no.contains(dst.host()?) { + return None; + } + + match dst.scheme_str() { + Some("http") => self.http.clone(), + Some("https") => self.https.clone(), + _ => None, + } + } +} + +impl fmt::Debug for Matcher { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut b = f.debug_struct("Matcher"); + + if let Some(ref http) = self.http { + b.field("http", http); + } + + if let Some(ref https) = self.https { + b.field("https", https); + } + + if !self.no.is_empty() { + b.field("no", &self.no); + } + b.finish() + } +} + +// ===== impl Intercept ===== + +impl Intercept { + /// Get the `http::Uri` for the target proxy. + pub fn uri(&self) -> &http::Uri { + &self.uri + } + + /// Get any configured basic authorization. + /// + /// This should usually be used with a `Proxy-Authorization` header, to + /// send in Basic format. + /// + /// # Example + /// + /// ```rust + /// # use hyper_util::client::proxy::matcher::Matcher; + /// # let uri = http::Uri::from_static("https://hyper.rs"); + /// let m = Matcher::builder() + /// .all("https://Aladdin:opensesame@localhost:8887") + /// .build(); + /// + /// let proxy = m.intercept(&uri).expect("example"); + /// let auth = proxy.basic_auth().expect("example"); + /// assert_eq!(auth, "Basic QWxhZGRpbjpvcGVuc2VzYW1l"); + /// ``` + pub fn basic_auth(&self) -> Option<&HeaderValue> { + if let Auth::Basic(ref val) = self.auth { + Some(val) + } else { + None + } + } + + /// Get any configured raw authorization. + /// + /// If not detected as another scheme, this is the username and password + /// that should be sent with whatever protocol the proxy handshake uses. + /// + /// # Example + /// + /// ```rust + /// # use hyper_util::client::proxy::matcher::Matcher; + /// # let uri = http::Uri::from_static("https://hyper.rs"); + /// let m = Matcher::builder() + /// .all("socks5h://Aladdin:opensesame@localhost:8887") + /// .build(); + /// + /// let proxy = m.intercept(&uri).expect("example"); + /// let auth = proxy.raw_auth().expect("example"); + /// assert_eq!(auth, ("Aladdin", "opensesame")); + /// ``` + pub fn raw_auth(&self) -> Option<(&str, &str)> { + if let Auth::Raw(ref u, ref p) = self.auth { + Some((u.as_str(), p.as_str())) + } else { + None + } + } +} + +impl fmt::Debug for Intercept { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Intercept") + .field("uri", &self.uri) + // dont output auth, its sensitive + .finish() + } +} + +// ===== impl Builder ===== + +impl Builder { + fn from_env() -> Self { + Builder { + is_cgi: std::env::var_os("REQUEST_METHOD").is_some(), + all: get_first_env(&["ALL_PROXY", "all_proxy"]), + http: get_first_env(&["HTTP_PROXY", "http_proxy"]), + https: get_first_env(&["HTTPS_PROXY", "https_proxy"]), + no: get_first_env(&["NO_PROXY", "no_proxy"]), + } + } + + /// Set the target proxy for all destinations. + pub fn all(mut self, val: S) -> Self + where + S: IntoValue, + { + self.all = val.into_value(); + self + } + + /// Set the target proxy for HTTP destinations. + pub fn http(mut self, val: S) -> Self + where + S: IntoValue, + { + self.http = val.into_value(); + self + } + + /// Set the target proxy for HTTPS destinations. + pub fn https(mut self, val: S) -> Self + where + S: IntoValue, + { + self.https = val.into_value(); + self + } + + /// Set the "no" proxy filter. + /// + /// The rules are as follows: + /// * Entries are expected to be comma-separated (whitespace between entries is ignored) + /// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size, + /// for example "`192.168.1.0/24`"). + /// * An entry "`*`" matches all hostnames (this is the only wildcard allowed) + /// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com` + /// and `.google.com` are equivalent) and would match both that domain AND all subdomains. + /// + /// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all of the following would match + /// (and therefore would bypass the proxy): + /// * `http://google.com/` + /// * `http://www.google.com/` + /// * `http://192.168.1.42/` + /// + /// The URL `http://notgoogle.com/` would not match. + pub fn no(mut self, val: S) -> Self + where + S: IntoValue, + { + self.no = val.into_value(); + self + } + + /// Construct a [`Matcher`] using the configured values. + pub fn build(self) -> Matcher { + if self.is_cgi { + return Matcher { + http: None, + https: None, + no: NoProxy::empty(), + }; + } + + let all = parse_env_uri(&self.all); + + Matcher { + http: parse_env_uri(&self.http).or_else(|| all.clone()), + https: parse_env_uri(&self.https).or(all), + no: NoProxy::from_string(&self.no), + } + } +} + +fn get_first_env(names: &[&str]) -> String { + for name in names { + if let Ok(val) = std::env::var(name) { + return val; + } + } + + String::new() +} + +fn parse_env_uri(val: &str) -> Option { + let uri = val.parse::().ok()?; + let mut builder = http::Uri::builder(); + let mut is_httpish = false; + let mut auth = Auth::Empty; + + builder = builder.scheme(match uri.scheme() { + Some(s) => { + if s == &http::uri::Scheme::HTTP || s == &http::uri::Scheme::HTTPS { + is_httpish = true; + s.clone() + } else if s.as_str() == "socks5" || s.as_str() == "socks5h" { + s.clone() + } else { + // can't use this proxy scheme + return None; + } + } + // if no scheme provided, assume they meant 'http' + None => { + is_httpish = true; + http::uri::Scheme::HTTP + } + }); + + let authority = uri.authority()?; + + if let Some((userinfo, host_port)) = authority.as_str().split_once('@') { + let (user, pass) = userinfo.split_once(':')?; + let user = percent_decode_str(user).decode_utf8_lossy(); + let pass = percent_decode_str(pass).decode_utf8_lossy(); + if is_httpish { + auth = Auth::Basic(encode_basic_auth(&user, Some(&pass))); + } else { + auth = Auth::Raw(user.into(), pass.into()); + } + builder = builder.authority(host_port); + } else { + builder = builder.authority(authority.clone()); + } + + // removing any path, but we MUST specify one or the builder errors + builder = builder.path_and_query("/"); + + let dst = builder.build().ok()?; + + Some(Intercept { uri: dst, auth }) +} + +fn encode_basic_auth(user: &str, pass: Option<&str>) -> HeaderValue { + use base64::prelude::BASE64_STANDARD; + use base64::write::EncoderWriter; + use std::io::Write; + + let mut buf = b"Basic ".to_vec(); + { + let mut encoder = EncoderWriter::new(&mut buf, &BASE64_STANDARD); + let _ = write!(encoder, "{user}:"); + if let Some(password) = pass { + let _ = write!(encoder, "{password}"); + } + } + let mut header = HeaderValue::from_bytes(&buf).expect("base64 is always valid HeaderValue"); + header.set_sensitive(true); + header +} + +impl NoProxy { + /* + fn from_env() -> NoProxy { + let raw = std::env::var("NO_PROXY") + .or_else(|_| std::env::var("no_proxy")) + .unwrap_or_default(); + + Self::from_string(&raw) + } + */ + + fn empty() -> NoProxy { + NoProxy { + ips: IpMatcher(Vec::new()), + domains: DomainMatcher(Vec::new()), + } + } + + /// Returns a new no-proxy configuration based on a `no_proxy` string (or `None` if no variables + /// are set) + /// The rules are as follows: + /// * The environment variable `NO_PROXY` is checked, if it is not set, `no_proxy` is checked + /// * If neither environment variable is set, `None` is returned + /// * Entries are expected to be comma-separated (whitespace between entries is ignored) + /// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size, + /// for example "`192.168.1.0/24`"). + /// * An entry "`*`" matches all hostnames (this is the only wildcard allowed) + /// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com` + /// and `.google.com` are equivalent) and would match both that domain AND all subdomains. + /// + /// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all of the following would match + /// (and therefore would bypass the proxy): + /// * `http://google.com/` + /// * `http://www.google.com/` + /// * `http://192.168.1.42/` + /// + /// The URL `http://notgoogle.com/` would not match. + pub fn from_string(no_proxy_list: &str) -> Self { + let mut ips = Vec::new(); + let mut domains = Vec::new(); + let parts = no_proxy_list.split(',').map(str::trim); + for part in parts { + match part.parse::() { + // If we can parse an IP net or address, then use it, otherwise, assume it is a domain + Ok(ip) => ips.push(Ip::Network(ip)), + Err(_) => match part.parse::() { + Ok(addr) => ips.push(Ip::Address(addr)), + Err(_) => { + if !part.trim().is_empty() { + domains.push(part.to_owned()) + } + } + }, + } + } + NoProxy { + ips: IpMatcher(ips), + domains: DomainMatcher(domains), + } + } + + /// Return true if this matches the host (domain or IP). + pub fn contains(&self, host: &str) -> bool { + // According to RFC3986, raw IPv6 hosts will be wrapped in []. So we need to strip those off + // the end in order to parse correctly + let host = if host.starts_with('[') { + let x: &[_] = &['[', ']']; + host.trim_matches(x) + } else { + host + }; + match host.parse::() { + // If we can parse an IP addr, then use it, otherwise, assume it is a domain + Ok(ip) => self.ips.contains(ip), + Err(_) => self.domains.contains(host), + } + } + + fn is_empty(&self) -> bool { + self.ips.0.is_empty() && self.domains.0.is_empty() + } +} + +impl IpMatcher { + fn contains(&self, addr: IpAddr) -> bool { + for ip in &self.0 { + match ip { + Ip::Address(address) => { + if &addr == address { + return true; + } + } + Ip::Network(net) => { + if net.contains(&addr) { + return true; + } + } + } + } + false + } +} + +impl DomainMatcher { + // The following links may be useful to understand the origin of these rules: + // * https://curl.se/libcurl/c/CURLOPT_NOPROXY.html + // * https://github.com/curl/curl/issues/1208 + fn contains(&self, domain: &str) -> bool { + let domain_len = domain.len(); + for d in &self.0 { + if d == domain || d.strip_prefix('.') == Some(domain) { + return true; + } else if domain.ends_with(d) { + if d.starts_with('.') { + // If the first character of d is a dot, that means the first character of domain + // must also be a dot, so we are looking at a subdomain of d and that matches + return true; + } else if domain.as_bytes().get(domain_len - d.len() - 1) == Some(&b'.') { + // Given that d is a prefix of domain, if the prior character in domain is a dot + // then that means we must be matching a subdomain of d, and that matches + return true; + } + } else if d == "*" { + return true; + } + } + false + } +} + +mod builder { + /// A type that can used as a `Builder` value. + /// + /// Private and sealed, only visible in docs. + pub trait IntoValue { + #[doc(hidden)] + fn into_value(self) -> String; + } + + impl IntoValue for String { + #[doc(hidden)] + fn into_value(self) -> String { + self + } + } + + impl IntoValue for &String { + #[doc(hidden)] + fn into_value(self) -> String { + self.into() + } + } + + impl IntoValue for &str { + #[doc(hidden)] + fn into_value(self) -> String { + self.into() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_domain_matcher() { + let domains = vec![".foo.bar".into(), "bar.foo".into()]; + let matcher = DomainMatcher(domains); + + // domains match with leading `.` + assert!(matcher.contains("foo.bar")); + // subdomains match with leading `.` + assert!(matcher.contains("www.foo.bar")); + + // domains match with no leading `.` + assert!(matcher.contains("bar.foo")); + // subdomains match with no leading `.` + assert!(matcher.contains("www.bar.foo")); + + // non-subdomain string prefixes don't match + assert!(!matcher.contains("notfoo.bar")); + assert!(!matcher.contains("notbar.foo")); + } + + #[test] + fn test_no_proxy_wildcard() { + let no_proxy = NoProxy::from_string("*"); + assert!(no_proxy.contains("any.where")); + } + + #[test] + fn test_no_proxy_ip_ranges() { + let no_proxy = + NoProxy::from_string(".foo.bar, bar.baz,10.42.1.1/24,::1,10.124.7.8,2001::/17"); + + let should_not_match = [ + // random url, not in no_proxy + "hyper.rs", + // make sure that random non-subdomain string prefixes don't match + "notfoo.bar", + // make sure that random non-subdomain string prefixes don't match + "notbar.baz", + // ipv4 address out of range + "10.43.1.1", + // ipv4 address out of range + "10.124.7.7", + // ipv6 address out of range + "[ffff:db8:a0b:12f0::1]", + // ipv6 address out of range + "[2005:db8:a0b:12f0::1]", + ]; + + for host in &should_not_match { + assert!(!no_proxy.contains(host), "should not contain {:?}", host); + } + + let should_match = [ + // make sure subdomains (with leading .) match + "hello.foo.bar", + // make sure exact matches (without leading .) match (also makes sure spaces between entries work) + "bar.baz", + // make sure subdomains (without leading . in no_proxy) match + "foo.bar.baz", + // make sure subdomains (without leading . in no_proxy) match - this differs from cURL + "foo.bar", + // ipv4 address match within range + "10.42.1.100", + // ipv6 address exact match + "[::1]", + // ipv6 address match within range + "[2001:db8:a0b:12f0::1]", + // ipv4 address exact match + "10.124.7.8", + ]; + + for host in &should_match { + assert!(no_proxy.contains(host), "should contain {:?}", host); + } + } + + macro_rules! p { + ($($n:ident = $v:expr,)*) => ({Builder { + $($n: $v.into(),)* + ..Builder::default() + }.build()}); + } + + fn intercept<'a>(p: &'a Matcher, u: &str) -> Intercept { + p.intercept(&u.parse().unwrap()).unwrap() + } + + #[test] + fn test_all_proxy() { + let p = p! { + all = "http://om.nom", + }; + + assert_eq!("http://om.nom", intercept(&p, "http://example.com").uri()); + + assert_eq!("http://om.nom", intercept(&p, "https://example.com").uri()); + } + + #[test] + fn test_specific_overrides_all() { + let p = p! { + all = "http://no.pe", + http = "http://y.ep", + }; + + assert_eq!("http://no.pe", intercept(&p, "https://example.com").uri()); + + // the http rule is "more specific" than the all rule + assert_eq!("http://y.ep", intercept(&p, "http://example.com").uri()); + } + + #[test] + fn test_parse_no_scheme_defaults_to_http() { + let p = p! { + https = "y.ep", + http = "127.0.0.1:8887", + }; + + assert_eq!(intercept(&p, "https://example.local").uri(), "http://y.ep"); + assert_eq!( + intercept(&p, "http://example.local").uri(), + "http://127.0.0.1:8887" + ); + } + + #[test] + fn test_parse_http_auth() { + let p = p! { + all = "http://Aladdin:opensesame@y.ep", + }; + + let proxy = intercept(&p, "https://example.local"); + assert_eq!(proxy.uri(), "http://y.ep"); + assert_eq!( + proxy.basic_auth().expect("basic_auth"), + "Basic QWxhZGRpbjpvcGVuc2VzYW1l" + ); + } + + #[test] + fn test_parse_http_auth_without_scheme() { + let p = p! { + all = "Aladdin:opensesame@y.ep", + }; + + let proxy = intercept(&p, "https://example.local"); + assert_eq!(proxy.uri(), "http://y.ep"); + assert_eq!( + proxy.basic_auth().expect("basic_auth"), + "Basic QWxhZGRpbjpvcGVuc2VzYW1l" + ); + } + + #[test] + fn test_dont_parse_http_when_is_cgi() { + let mut builder = Matcher::builder(); + builder.is_cgi = true; + builder.http = "http://never.gonna.let.you.go".into(); + let m = builder.build(); + + assert!(m.intercept(&"http://rick.roll".parse().unwrap()).is_none()); + } +} diff --git a/src/client/proxy/mod.rs b/src/client/proxy/mod.rs new file mode 100644 index 00000000..59c8e46d --- /dev/null +++ b/src/client/proxy/mod.rs @@ -0,0 +1,3 @@ +//! Proxy utilities + +pub mod matcher;