diff --git a/crates/stackable-operator/CHANGELOG.md b/crates/stackable-operator/CHANGELOG.md index e4a1df68e..f9ff6cc81 100644 --- a/crates/stackable-operator/CHANGELOG.md +++ b/crates/stackable-operator/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Fixed + +- Fix Kubernetes cluster domain parsing from resolv.conf, e.g. on AWS EKS. + We now only consider Kubernetes services domains instead of all domains (which could include non-Kubernetes domains) ([#895]). + +[#895]: https://github.com/stackabletech/operator-rs/pull/895 + ## [0.79.0] - 2024-10-18 ### Added diff --git a/crates/stackable-operator/fixtures/cluster_domain/fail/no-service-domain.conf b/crates/stackable-operator/fixtures/cluster_domain/fail/no-service-domain.conf new file mode 100644 index 000000000..b6342feff --- /dev/null +++ b/crates/stackable-operator/fixtures/cluster_domain/fail/no-service-domain.conf @@ -0,0 +1,3 @@ +search cluster.local +nameserver 10.243.21.53 +options ndots:5 diff --git a/crates/stackable-operator/fixtures/cluster_domain/pass/aws-eks.resolv.conf b/crates/stackable-operator/fixtures/cluster_domain/pass/aws-eks.resolv.conf new file mode 100644 index 000000000..9bdf6a6da --- /dev/null +++ b/crates/stackable-operator/fixtures/cluster_domain/pass/aws-eks.resolv.conf @@ -0,0 +1,3 @@ +search default.svc.cluster.local svc.cluster.local cluster.local ec2.internal +nameserver 172.20.0.10 +options ndots:5 diff --git a/crates/stackable-operator/src/utils/cluster_domain.rs b/crates/stackable-operator/src/utils/cluster_domain.rs index 4a0b9bd4d..c690369f6 100644 --- a/crates/stackable-operator/src/utils/cluster_domain.rs +++ b/crates/stackable-operator/src/utils/cluster_domain.rs @@ -1,6 +1,6 @@ use std::{env, path::PathBuf, str::FromStr, sync::OnceLock}; -use snafu::{ResultExt, Snafu}; +use snafu::{OptionExt, ResultExt, Snafu}; use tracing::instrument; use crate::commons::networking::DomainName; @@ -25,8 +25,10 @@ pub enum Error { #[snafu(display(r#"unable to find "search" entry"#))] NoSearchEntry, - #[snafu(display(r#"unable to find unambiguous domain in "search" entry"#))] - AmbiguousDomainEntries, + #[snafu(display( + r#"unable to find the Kubernetes service domain, which needs to start with "svc.""# + ))] + FindKubernetesServiceDomain, } /// Tries to retrieve the Kubernetes cluster domain. @@ -118,24 +120,25 @@ fn retrieve_cluster_domain_from_resolv_conf( }) .context(ReadResolvConfFileSnafu)?; - // If there are multiple search directives, only the search - // man 5 resolv.conf - let Some(last_search_entry) = content + // If there are multiple search directives, only the last search directive is relevant. + // See `man 5 resolv.conf` + let last_search_entry = content .lines() .rev() - .map(|l| l.trim()) - .find(|&l| l.starts_with("search")) - .map(|l| l.trim_start_matches("search").trim()) - else { - return NoSearchEntrySnafu.fail(); - }; - - let Some(shortest_entry) = last_search_entry + .map(|entry| entry.trim()) + .find(|&entry| entry.starts_with("search")) + .map(|entry| entry.trim_start_matches("search").trim()) + .context(NoSearchEntrySnafu)?; + + // We only care about entries starting with "svc." to limit the entries to the ones used by + // Kubernetes for Services. + let shortest_entry = last_search_entry .split_ascii_whitespace() - .min_by_key(|item| item.len()) - else { - return AmbiguousDomainEntriesSnafu.fail(); - }; + // Normally there should only be one such entry, but we take the first on in any case. + .find(|&entry| entry.starts_with("svc.")) + // Strip the "svc." prefix to get only the cluster domain. + .map(|entry| entry.trim_start_matches("svc.").trim_end()) + .context(FindKubernetesServiceDomainSnafu)?; // NOTE (@Techassi): This is really sad and bothers me more than I would like to admit. This // clone could be removed by using the code directly in the calling function. But that would