Skip to content

Commit 0bf2a4f

Browse files
committed
Generate RayCluster CA certificate Secret
1 parent f5be334 commit 0bf2a4f

File tree

2 files changed

+140
-59
lines changed

2 files changed

+140
-59
lines changed

main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func init() {
7979
utilruntime.Must(dsciv1.AddToScheme(scheme))
8080
}
8181

82-
// +kubebuilder:rbac:groups=config.openshift.io,resources=ingresses,verbs=get;
82+
// +kubebuilder:rbac:groups=config.openshift.io,resources=ingresses,verbs=get
8383

8484
func main() {
8585
var configMapName string

pkg/controllers/raycluster_controller.go

Lines changed: 139 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,16 @@ package controllers
1919
import (
2020
"context"
2121
"crypto/rand"
22+
"crypto/rsa"
2223
"crypto/sha1"
24+
"crypto/x509"
25+
"crypto/x509/pkix"
2326
"encoding/base64"
27+
"encoding/pem"
2428
"fmt"
29+
"math/big"
30+
rand2 "math/rand"
31+
"time"
2532

2633
dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1"
2734
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
@@ -32,18 +39,18 @@ import (
3239
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3340
"k8s.io/apimachinery/pkg/runtime"
3441
"k8s.io/apimachinery/pkg/util/intstr"
35-
coreapply "k8s.io/client-go/applyconfigurations/core/v1"
36-
metav1apply "k8s.io/client-go/applyconfigurations/meta/v1"
37-
v1 "k8s.io/client-go/applyconfigurations/meta/v1"
38-
networkingapply "k8s.io/client-go/applyconfigurations/networking/v1"
39-
rbacapply "k8s.io/client-go/applyconfigurations/rbac/v1"
42+
corev1ac "k8s.io/client-go/applyconfigurations/core/v1"
43+
metav1ac "k8s.io/client-go/applyconfigurations/meta/v1"
44+
networkingv1ac "k8s.io/client-go/applyconfigurations/networking/v1"
45+
rbacv1ac "k8s.io/client-go/applyconfigurations/rbac/v1"
4046
"k8s.io/client-go/kubernetes"
47+
"k8s.io/utils/ptr"
4148
ctrl "sigs.k8s.io/controller-runtime"
4249
"sigs.k8s.io/controller-runtime/pkg/client"
4350
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
4451

4552
routev1 "github.com/openshift/api/route/v1"
46-
routeapply "github.com/openshift/client-go/route/applyconfigurations/route/v1"
53+
routev1ac "github.com/openshift/client-go/route/applyconfigurations/route/v1"
4754
routev1client "github.com/openshift/client-go/route/clientset/versioned/typed/route/v1"
4855

4956
"github.com/project-codeflare/codeflare-operator/pkg/config"
@@ -144,6 +151,26 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
144151
return ctrl.Result{}, nil
145152
}
146153

154+
if isMTLSEnabled(r.Config) {
155+
caSecretName := caSecretNameFromCluster(cluster)
156+
_, err := r.kubeClient.CoreV1().Secrets(cluster.Namespace).Get(ctx, caSecretName, metav1.GetOptions{})
157+
if errors.IsNotFound(err) {
158+
key, cert, err := generateCACertificate()
159+
if err != nil {
160+
logger.Error(err, "Failed to generate CA certificate")
161+
return ctrl.Result{RequeueAfter: requeueTime}, err
162+
}
163+
_, err = r.kubeClient.CoreV1().Secrets(cluster.Namespace).Apply(ctx, desiredCASecret(cluster, key, cert), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
164+
if err != nil {
165+
logger.Error(err, "Failed to create CA Secret")
166+
return ctrl.Result{RequeueAfter: requeueTime}, err
167+
}
168+
} else if err != nil {
169+
logger.Error(err, "Failed to get CA Secret")
170+
return ctrl.Result{RequeueAfter: requeueTime}, err
171+
}
172+
}
173+
147174
if cluster.Status.State != "suspended" && isRayDashboardOAuthEnabled(r.Config) && r.IsOpenShift {
148175
logger.Info("Creating OAuth Objects")
149176
_, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredClusterRoute(cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
@@ -152,7 +179,7 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
152179
return ctrl.Result{RequeueAfter: requeueTime}, err
153180
}
154181

155-
_, err = r.kubeClient.CoreV1().Secrets(cluster.Namespace).Apply(ctx, desiredOAuthSecret(cluster, r), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
182+
_, err = r.kubeClient.CoreV1().Secrets(cluster.Namespace).Apply(ctx, desiredOAuthSecret(cluster, r.CookieSalt), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
156183
if err != nil {
157184
logger.Error(err, "Failed to create OAuth Secret")
158185
return ctrl.Result{RequeueAfter: requeueTime}, err
@@ -244,28 +271,29 @@ func getIngressHost(cfg *config.KubeRayConfiguration, cluster *rayv1.RayCluster,
244271
}
245272

246273
func isRayDashboardOAuthEnabled(cfg *config.KubeRayConfiguration) bool {
247-
if cfg != nil && cfg.RayDashboardOAuthEnabled != nil {
248-
return *cfg.RayDashboardOAuthEnabled
249-
}
250-
return true
274+
return cfg == nil || ptr.Deref(cfg.RayDashboardOAuthEnabled, true)
275+
}
276+
277+
func isMTLSEnabled(cfg *config.KubeRayConfiguration) bool {
278+
return cfg == nil || ptr.Deref(cfg.MTLSEnabled, true)
251279
}
252280

253281
func crbNameFromCluster(cluster *rayv1.RayCluster) string {
254282
return cluster.Name + "-" + cluster.Namespace + "-auth" // NOTE: potential naming conflicts ie {name: foo, ns: bar-baz} and {name: foo-bar, ns: baz}
255283
}
256284

257-
func desiredOAuthClusterRoleBinding(cluster *rayv1.RayCluster) *rbacapply.ClusterRoleBindingApplyConfiguration {
258-
return rbacapply.ClusterRoleBinding(
285+
func desiredOAuthClusterRoleBinding(cluster *rayv1.RayCluster) *rbacv1ac.ClusterRoleBindingApplyConfiguration {
286+
return rbacv1ac.ClusterRoleBinding(
259287
crbNameFromCluster(cluster)).
260288
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
261289
WithSubjects(
262-
rbacapply.Subject().
290+
rbacv1ac.Subject().
263291
WithKind("ServiceAccount").
264292
WithName(oauthServiceAccountNameFromCluster(cluster)).
265293
WithNamespace(cluster.Namespace),
266294
).
267295
WithRoleRef(
268-
rbacapply.RoleRef().
296+
rbacv1ac.RoleRef().
269297
WithAPIGroup("rbac.authorization.k8s.io").
270298
WithKind("ClusterRole").
271299
WithName("system:auth-delegator"),
@@ -276,16 +304,16 @@ func oauthServiceAccountNameFromCluster(cluster *rayv1.RayCluster) string {
276304
return cluster.Name + "-oauth-proxy"
277305
}
278306

279-
func desiredServiceAccount(cluster *rayv1.RayCluster) *coreapply.ServiceAccountApplyConfiguration {
280-
return coreapply.ServiceAccount(oauthServiceAccountNameFromCluster(cluster), cluster.Namespace).
307+
func desiredServiceAccount(cluster *rayv1.RayCluster) *corev1ac.ServiceAccountApplyConfiguration {
308+
return corev1ac.ServiceAccount(oauthServiceAccountNameFromCluster(cluster), cluster.Namespace).
281309
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
282310
WithAnnotations(map[string]string{
283311
"serviceaccounts.openshift.io/oauth-redirectreference.first": "" +
284312
`{"kind":"OAuthRedirectReference","apiVersion":"v1",` +
285313
`"reference":{"kind":"Route","name":"` + dashboardNameFromCluster(cluster) + `"}}`,
286314
}).
287315
WithOwnerReferences(
288-
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
316+
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
289317
)
290318
}
291319

@@ -297,19 +325,19 @@ func rayClientNameFromCluster(cluster *rayv1.RayCluster) string {
297325
return "rayclient-" + cluster.Name
298326
}
299327

300-
func desiredClusterRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration {
301-
return routeapply.Route(dashboardNameFromCluster(cluster), cluster.Namespace).
328+
func desiredClusterRoute(cluster *rayv1.RayCluster) *routev1ac.RouteApplyConfiguration {
329+
return routev1ac.Route(dashboardNameFromCluster(cluster), cluster.Namespace).
302330
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
303-
WithSpec(routeapply.RouteSpec().
304-
WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(oauthServiceNameFromCluster(cluster))).
305-
WithPort(routeapply.RoutePort().WithTargetPort(intstr.FromString((oAuthServicePortName)))).
306-
WithTLS(routeapply.TLSConfig().
331+
WithSpec(routev1ac.RouteSpec().
332+
WithTo(routev1ac.RouteTargetReference().WithKind("Service").WithName(oauthServiceNameFromCluster(cluster))).
333+
WithPort(routev1ac.RoutePort().WithTargetPort(intstr.FromString((oAuthServicePortName)))).
334+
WithTLS(routev1ac.TLSConfig().
307335
WithInsecureEdgeTerminationPolicy(routev1.InsecureEdgeTerminationPolicyRedirect).
308336
WithTermination(routev1.TLSTerminationReencrypt),
309337
),
310338
).
311339
WithOwnerReferences(
312-
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
340+
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
313341
)
314342
}
315343

@@ -321,14 +349,14 @@ func oauthServiceTLSSecretName(cluster *rayv1.RayCluster) string {
321349
return cluster.Name + "-proxy-tls-secret"
322350
}
323351

324-
func desiredOAuthService(cluster *rayv1.RayCluster) *coreapply.ServiceApplyConfiguration {
325-
return coreapply.Service(oauthServiceNameFromCluster(cluster), cluster.Namespace).
352+
func desiredOAuthService(cluster *rayv1.RayCluster) *corev1ac.ServiceApplyConfiguration {
353+
return corev1ac.Service(oauthServiceNameFromCluster(cluster), cluster.Namespace).
326354
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
327355
WithAnnotations(map[string]string{"service.beta.openshift.io/serving-cert-secret-name": oauthServiceTLSSecretName(cluster)}).
328356
WithSpec(
329-
coreapply.ServiceSpec().
357+
corev1ac.ServiceSpec().
330358
WithPorts(
331-
coreapply.ServicePort().
359+
corev1ac.ServicePort().
332360
WithName(oAuthServicePortName).
333361
WithPort(oAuthServicePort).
334362
WithTargetPort(intstr.FromString(oAuthServicePortName)).
@@ -337,7 +365,7 @@ func desiredOAuthService(cluster *rayv1.RayCluster) *coreapply.ServiceApplyConfi
337365
WithSelector(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "head"}),
338366
).
339367
WithOwnerReferences(
340-
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
368+
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
341369
)
342370
}
343371

@@ -346,68 +374,121 @@ func oauthSecretNameFromCluster(cluster *rayv1.RayCluster) string {
346374
}
347375

348376
// desiredOAuthSecret defines the desired OAuth secret object
349-
func desiredOAuthSecret(cluster *rayv1.RayCluster, r *RayClusterReconciler) *coreapply.SecretApplyConfiguration {
377+
func desiredOAuthSecret(cluster *rayv1.RayCluster, cookieSalt string) *corev1ac.SecretApplyConfiguration {
350378
// Generate the cookie secret for the OAuth proxy
351379
hasher := sha1.New() // REVIEW is SHA1 okay here?
352-
hasher.Write([]byte(cluster.Name + r.CookieSalt))
380+
hasher.Write([]byte(cluster.Name + cookieSalt))
353381
cookieSecret := base64.StdEncoding.EncodeToString(hasher.Sum(nil))
354382

355-
return coreapply.Secret(oauthSecretNameFromCluster(cluster), cluster.Namespace).
383+
return corev1ac.Secret(oauthSecretNameFromCluster(cluster), cluster.Namespace).
356384
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
357385
WithStringData(map[string]string{"cookie_secret": cookieSecret}).
358386
WithOwnerReferences(
359-
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
387+
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
360388
)
361-
// Create a Kubernetes secret to store the cookie secret
362389
}
363390

364-
func desiredNetworkPolicy(cluster *rayv1.RayCluster, kubeRayNamespaces []string) *networkingapply.NetworkPolicyApplyConfiguration {
365-
return networkingapply.NetworkPolicy(cluster.Name, cluster.Namespace).
391+
func caSecretNameFromCluster(cluster *rayv1.RayCluster) string {
392+
return "ca-secret-" + cluster.Name
393+
}
394+
395+
func desiredCASecret(cluster *rayv1.RayCluster, key, cert []byte) *corev1ac.SecretApplyConfiguration {
396+
return corev1ac.Secret(caSecretNameFromCluster(cluster), cluster.Namespace).
397+
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
398+
WithData(map[string][]byte{
399+
corev1.TLSPrivateKeyKey: key,
400+
corev1.TLSCertKey: cert,
401+
}).
402+
WithOwnerReferences(metav1ac.OwnerReference().
403+
WithUID(cluster.UID).
404+
WithName(cluster.Name).
405+
WithKind(cluster.Kind).
406+
WithAPIVersion(cluster.APIVersion))
407+
}
408+
409+
func generateCACertificate() ([]byte, []byte, error) {
410+
serialNumber := big.NewInt(rand2.Int63())
411+
cert := &x509.Certificate{
412+
SerialNumber: serialNumber,
413+
Subject: pkix.Name{
414+
Organization: []string{"OpenShift AI"},
415+
},
416+
NotBefore: time.Now(),
417+
NotAfter: time.Now().AddDate(1, 0, 0),
418+
IsCA: true,
419+
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth},
420+
KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign,
421+
BasicConstraintsValid: true,
422+
}
423+
424+
certPrivateKey, err := rsa.GenerateKey(rand.Reader, 2048)
425+
if err != nil {
426+
return nil, nil, err
427+
}
428+
429+
privateKeyBytes := x509.MarshalPKCS1PrivateKey(certPrivateKey)
430+
privateKeyPem := pem.EncodeToMemory(
431+
&pem.Block{
432+
Type: "RSA PRIVATE KEY",
433+
Bytes: privateKeyBytes,
434+
},
435+
)
436+
certBytes, err := x509.CreateCertificate(rand.Reader, cert, cert, &certPrivateKey.PublicKey, certPrivateKey)
437+
certPem := pem.EncodeToMemory(&pem.Block{
438+
Type: "CERTIFICATE",
439+
Bytes: certBytes,
440+
})
441+
442+
return privateKeyPem, certPem, nil
443+
}
444+
445+
func desiredNetworkPolicy(cluster *rayv1.RayCluster, kubeRayNamespaces []string) *networkingv1ac.NetworkPolicyApplyConfiguration {
446+
return networkingv1ac.NetworkPolicy(cluster.Name, cluster.Namespace).
366447
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
367-
WithSpec(networkingapply.NetworkPolicySpec().
368-
WithPodSelector(metav1apply.LabelSelector().WithMatchLabels(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "head"})).
448+
WithSpec(networkingv1ac.NetworkPolicySpec().
449+
WithPodSelector(metav1ac.LabelSelector().WithMatchLabels(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "head"})).
369450
WithIngress(
370-
networkingapply.NetworkPolicyIngressRule().
451+
networkingv1ac.NetworkPolicyIngressRule().
371452
WithPorts(
372-
networkingapply.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(6379)),
373-
networkingapply.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(10001)),
374-
networkingapply.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8080)),
375-
networkingapply.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8265)),
453+
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(6379)),
454+
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(10001)),
455+
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8080)),
456+
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8265)),
376457
).WithFrom(
377-
networkingapply.NetworkPolicyPeer().WithPodSelector(metav1apply.LabelSelector()),
458+
networkingv1ac.NetworkPolicyPeer().WithPodSelector(metav1ac.LabelSelector()),
378459
),
379-
networkingapply.NetworkPolicyIngressRule().
460+
networkingv1ac.NetworkPolicyIngressRule().
380461
WithFrom(
381-
networkingapply.NetworkPolicyPeer().WithPodSelector(metav1apply.LabelSelector().
462+
networkingv1ac.NetworkPolicyPeer().WithPodSelector(metav1ac.LabelSelector().
382463
WithMatchLabels(map[string]string{"app.kubernetes.io/component": "kuberay-operator"})).
383-
WithNamespaceSelector(metav1apply.LabelSelector().
384-
WithMatchExpressions(metav1apply.LabelSelectorRequirement().
464+
WithNamespaceSelector(metav1ac.LabelSelector().
465+
WithMatchExpressions(metav1ac.LabelSelectorRequirement().
385466
WithKey(corev1.LabelMetadataName).
386467
WithOperator(metav1.LabelSelectorOpIn).
387468
WithValues(kubeRayNamespaces...)))).
388469
WithPorts(
389-
networkingapply.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8265)),
390-
networkingapply.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(10001)),
470+
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8265)),
471+
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(10001)),
391472
),
392-
networkingapply.NetworkPolicyIngressRule().
473+
networkingv1ac.NetworkPolicyIngressRule().
393474
WithPorts(
394-
networkingapply.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8080)),
475+
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8080)),
395476
).
396477
WithFrom(
397-
networkingapply.NetworkPolicyPeer().WithNamespaceSelector(metav1apply.LabelSelector().
398-
WithMatchExpressions(metav1apply.LabelSelectorRequirement().
478+
networkingv1ac.NetworkPolicyPeer().WithNamespaceSelector(metav1ac.LabelSelector().
479+
WithMatchExpressions(metav1ac.LabelSelectorRequirement().
399480
WithKey(corev1.LabelMetadataName).
400481
WithOperator(metav1.LabelSelectorOpIn).
401482
WithValues("openshift-monitoring"))),
402483
),
403-
networkingapply.NetworkPolicyIngressRule().
484+
networkingv1ac.NetworkPolicyIngressRule().
404485
WithPorts(
405-
networkingapply.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8443)),
486+
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8443)),
406487
),
407488
),
408489
).
409490
WithOwnerReferences(
410-
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
491+
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
411492
)
412493
}
413494

0 commit comments

Comments
 (0)