From e3a8750edfd3b2b30125bf6289c9361bebc0fdec Mon Sep 17 00:00:00 2001 From: David Grove Date: Mon, 24 Jun 2024 12:13:53 -0400 Subject: [PATCH] bug fix: ensure ComponentStatus is initialized in ExpectedPodCount --- .../appwrapper/appwrapper_controller.go | 9 ++-- .../appwrapper/appwrapper_controller_test.go | 20 ++++--- internal/controller/awstatus/status_utils.go | 54 ------------------- .../workload/workload_controller.go | 4 +- pkg/utils/utils.go | 36 ++++++++++++- test/e2e/util_test.go | 12 +++-- 6 files changed, 63 insertions(+), 72 deletions(-) delete mode 100644 internal/controller/awstatus/status_utils.go diff --git a/internal/controller/appwrapper/appwrapper_controller.go b/internal/controller/appwrapper/appwrapper_controller.go index 5ddf66b..7484749 100644 --- a/internal/controller/appwrapper/appwrapper_controller.go +++ b/internal/controller/appwrapper/appwrapper_controller.go @@ -39,7 +39,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" - "github.com/project-codeflare/appwrapper/internal/controller/awstatus" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/utils" ) @@ -155,7 +154,7 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } - if err := awstatus.EnsureComponentStatusInitialized(ctx, aw); err != nil { + if err := utils.EnsureComponentStatusInitialized(aw); err != nil { return ctrl.Result{}, err } @@ -491,7 +490,11 @@ func (r *AppWrapperReconciler) getPodStatus(ctx context.Context, aw *workloadv1b client.MatchingLabels{AppWrapperLabel: aw.Name}); err != nil { return nil, err } - summary := &podStatusSummary{expected: utils.ExpectedPodCount(aw)} + pc, err := utils.ExpectedPodCount(aw) + if err != nil { + return nil, err + } + summary := &podStatusSummary{expected: pc} for _, pod := range pods.Items { switch pod.Status.Phase { diff --git a/internal/controller/appwrapper/appwrapper_controller_test.go b/internal/controller/appwrapper/appwrapper_controller_test.go index 7755cb3..5c1227a 100644 --- a/internal/controller/appwrapper/appwrapper_controller_test.go +++ b/internal/controller/appwrapper/appwrapper_controller_test.go @@ -106,7 +106,7 @@ var _ = Describe("AppWrapper Controller", func() { Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) podStatus, err := awReconciler.getPodStatus(ctx, aw) Expect(err).NotTo(HaveOccurred()) - Expect(podStatus.pending).Should(Equal(utils.ExpectedPodCount(aw))) + Expect(utils.ExpectedPodCount(aw)).Should(Equal(podStatus.pending)) By("Simulating first Pod Running") Expect(setPodStatus(aw, v1.PodRunning, 1)).To(Succeed()) @@ -123,15 +123,18 @@ var _ = Describe("AppWrapper Controller", func() { Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) podStatus, err = awReconciler.getPodStatus(ctx, aw) Expect(err).NotTo(HaveOccurred()) - Expect(podStatus.pending).Should(Equal(utils.ExpectedPodCount(aw) - 1)) + Expect(podStatus.running).Should(Equal(int32(1))) + Expect(utils.ExpectedPodCount(aw)).Should(Equal(podStatus.pending + podStatus.running)) } fullyRunning := func() { aw := getAppWrapper(awName) By("Simulating all Pods Running") - Expect(setPodStatus(aw, v1.PodRunning, utils.ExpectedPodCount(aw))).To(Succeed()) + pc, err := utils.ExpectedPodCount(aw) + Expect(err).NotTo(HaveOccurred()) + Expect(setPodStatus(aw, v1.PodRunning, pc)).To(Succeed()) By("Reconciling: Running -> Running") - _, err := awReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: awName}) + _, err = awReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: awName}) Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) @@ -144,7 +147,7 @@ var _ = Describe("AppWrapper Controller", func() { Expect((*workload.AppWrapper)(aw).PodsReady()).Should(BeTrue()) podStatus, err := awReconciler.getPodStatus(ctx, aw) Expect(err).NotTo(HaveOccurred()) - Expect(podStatus.running).Should(Equal(utils.ExpectedPodCount(aw))) + Expect(podStatus.running).Should(Equal(pc)) _, finished := (*workload.AppWrapper)(aw).Finished() Expect(finished).Should(BeFalse()) } @@ -184,13 +187,16 @@ var _ = Describe("AppWrapper Controller", func() { Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) + pc, err := utils.ExpectedPodCount(aw) + Expect(err).NotTo(HaveOccurred()) + Expect(pc).Should(Equal(int32(2))) podStatus, err := awReconciler.getPodStatus(ctx, aw) Expect(err).NotTo(HaveOccurred()) - Expect(podStatus.running).Should(Equal(utils.ExpectedPodCount(aw) - 1)) + Expect(podStatus.running).Should(Equal(int32(1))) Expect(podStatus.succeeded).Should(Equal(int32(1))) By("Simulating all Pods Completing") - Expect(setPodStatus(aw, v1.PodSucceeded, utils.ExpectedPodCount(aw))).To(Succeed()) + Expect(setPodStatus(aw, v1.PodSucceeded, 2)).To(Succeed()) By("Reconciling: Running -> Succeeded") _, err = awReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: awName}) Expect(err).NotTo(HaveOccurred()) diff --git a/internal/controller/awstatus/status_utils.go b/internal/controller/awstatus/status_utils.go deleted file mode 100644 index b17df17..0000000 --- a/internal/controller/awstatus/status_utils.go +++ /dev/null @@ -1,54 +0,0 @@ -/* -Copyright 2024 IBM Corporation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package awstatus - -import ( - "context" - - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" - "github.com/project-codeflare/appwrapper/pkg/utils" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" -) - -// EnsureComponentStatusInitialized initializes aw.Status.ComponenetStatus, including performing PodSet inference for known GVKs -func EnsureComponentStatusInitialized(ctx context.Context, aw *workloadv1beta2.AppWrapper) error { - if len(aw.Status.ComponentStatus) == len(aw.Spec.Components) { - return nil - } - - // Construct definitive PodSets from the Spec + InferPodSets and cache in the Status (to avoid clashing with user updates to the Spec via apply) - compStatus := make([]workloadv1beta2.AppWrapperComponentStatus, len(aw.Spec.Components)) - for idx := range aw.Spec.Components { - if len(aw.Spec.Components[idx].DeclaredPodSets) > 0 { - compStatus[idx].PodSets = aw.Spec.Components[idx].DeclaredPodSets - } else { - obj := &unstructured.Unstructured{} - if _, _, err := unstructured.UnstructuredJSONScheme.Decode(aw.Spec.Components[idx].Template.Raw, nil, obj); err != nil { - // Transient error; Template.Raw was validated by our AdmissionController - return err - } - podSets, err := utils.InferPodSets(obj) - if err != nil { - // Transient error; InferPodSets was validated by our AdmissionController - return err - } - compStatus[idx].PodSets = podSets - } - } - aw.Status.ComponentStatus = compStatus - return nil -} diff --git a/internal/controller/workload/workload_controller.go b/internal/controller/workload/workload_controller.go index 04fd8c5..17a36e6 100644 --- a/internal/controller/workload/workload_controller.go +++ b/internal/controller/workload/workload_controller.go @@ -17,7 +17,6 @@ limitations under the License. package workload import ( - "context" "fmt" "k8s.io/apimachinery/pkg/api/meta" @@ -33,7 +32,6 @@ import ( "sigs.k8s.io/kueue/pkg/podset" workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" - "github.com/project-codeflare/appwrapper/internal/controller/awstatus" "github.com/project-codeflare/appwrapper/pkg/utils" ) @@ -79,7 +77,7 @@ func (aw *AppWrapper) GVK() schema.GroupVersionKind { func (aw *AppWrapper) PodSets() []kueue.PodSet { podSets := []kueue.PodSet{} - if err := awstatus.EnsureComponentStatusInitialized(context.Background(), (*workloadv1beta2.AppWrapper)(aw)); err != nil { + if err := utils.EnsureComponentStatusInitialized((*workloadv1beta2.AppWrapper)(aw)); err != nil { // Kueue will raise an error on zero length PodSet. Unfortunately, the Kueue API prevents propagating the actual error return podSets } diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 9700b76..b0b4bc2 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -285,14 +285,46 @@ func Replicas(ps workloadv1beta2.AppWrapperPodSet) int32 { } } -func ExpectedPodCount(aw *workloadv1beta2.AppWrapper) int32 { +func ExpectedPodCount(aw *workloadv1beta2.AppWrapper) (int32, error) { + if err := EnsureComponentStatusInitialized(aw); err != nil { + return 0, err + } var expected int32 for _, c := range aw.Status.ComponentStatus { for _, s := range c.PodSets { expected += Replicas(s) } } - return expected + return expected, nil +} + +// EnsureComponentStatusInitialized initializes aw.Status.ComponenetStatus, including performing PodSet inference for known GVKs +func EnsureComponentStatusInitialized(aw *workloadv1beta2.AppWrapper) error { + if len(aw.Status.ComponentStatus) == len(aw.Spec.Components) { + return nil + } + + // Construct definitive PodSets from the Spec + InferPodSets and cache in the Status (to avoid clashing with user updates to the Spec via apply) + compStatus := make([]workloadv1beta2.AppWrapperComponentStatus, len(aw.Spec.Components)) + for idx := range aw.Spec.Components { + if len(aw.Spec.Components[idx].DeclaredPodSets) > 0 { + compStatus[idx].PodSets = aw.Spec.Components[idx].DeclaredPodSets + } else { + obj := &unstructured.Unstructured{} + if _, _, err := unstructured.UnstructuredJSONScheme.Decode(aw.Spec.Components[idx].Template.Raw, nil, obj); err != nil { + // Transient error; Template.Raw was validated by our AdmissionController + return err + } + podSets, err := InferPodSets(obj) + if err != nil { + // Transient error; InferPodSets was validated by our AdmissionController + return err + } + compStatus[idx].PodSets = podSets + } + } + aw.Status.ComponentStatus = compStatus + return nil } // inferReplicas parses the value at the given path within obj as an int or return 1 or error diff --git a/test/e2e/util_test.go b/test/e2e/util_test.go index befa809..343cde2 100644 --- a/test/e2e/util_test.go +++ b/test/e2e/util_test.go @@ -253,15 +253,21 @@ func waitAWPodsDeleted(ctx context.Context, awNamespace string, awName string) e } func waitAWPodsReady(ctx context.Context, aw *workloadv1beta2.AppWrapper) error { - numExpected := utils.ExpectedPodCount(aw) + numExpected, err := utils.ExpectedPodCount(aw) + if err != nil { + return err + } phases := []v1.PodPhase{v1.PodRunning, v1.PodSucceeded} return wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 120*time.Second, true, podsInPhase(aw.Namespace, aw.Name, phases, numExpected)) } func checkAllAWPodsReady(ctx context.Context, aw *workloadv1beta2.AppWrapper) bool { - numExpected := utils.ExpectedPodCount(aw) + numExpected, err := utils.ExpectedPodCount(aw) + if err != nil { + return false + } phases := []v1.PodPhase{v1.PodRunning, v1.PodSucceeded} - err := wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 100*time.Millisecond, true, podsInPhase(aw.Namespace, aw.Name, phases, numExpected)) + err = wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 100*time.Millisecond, true, podsInPhase(aw.Namespace, aw.Name, phases, numExpected)) return err == nil }