@@ -511,12 +511,32 @@ func (r *AppWrapperReconciler) getPodStatus(ctx context.Context, aw *workloadv1b
511
511
return summary , nil
512
512
}
513
513
514
+ //gocyclo:ignore
514
515
func (r * AppWrapperReconciler ) getComponentStatus (ctx context.Context , aw * workloadv1beta2.AppWrapper ) (* componentStatusSummary , error ) {
515
516
summary := & componentStatusSummary {expected : int32 (len (aw .Status .ComponentStatus ))}
516
517
517
518
for componentIdx := range aw .Status .ComponentStatus {
518
519
cs := & aw .Status .ComponentStatus [componentIdx ]
519
520
switch cs .APIVersion + ":" + cs .Kind {
521
+
522
+ case "batch/v1:Job" :
523
+ obj := & batchv1.Job {}
524
+ if err := r .Get (ctx , types.NamespacedName {Name : cs .Name , Namespace : aw .Namespace }, obj ); err == nil {
525
+ if obj .GetDeletionTimestamp ().IsZero () {
526
+ summary .deployed += 1
527
+
528
+ // batch/v1 Jobs are failed when status.Conditions contains an entry with type "Failed" and status "True"
529
+ for _ , jc := range obj .Status .Conditions {
530
+ if jc .Type == batchv1 .JobFailed && jc .Status == v1 .ConditionTrue {
531
+ summary .failed += 1
532
+ }
533
+ }
534
+ }
535
+
536
+ } else if ! apierrors .IsNotFound (err ) {
537
+ return nil , err
538
+ }
539
+
520
540
case "kubeflow.org/v1:PyTorchJob" :
521
541
obj := & unstructured.Unstructured {}
522
542
obj .SetAPIVersion (cs .APIVersion )
@@ -552,20 +572,52 @@ func (r *AppWrapperReconciler) getComponentStatus(ctx context.Context, aw *workl
552
572
return nil , err
553
573
}
554
574
555
- case "batch/v1:Job" :
556
- obj := & batchv1.Job {}
575
+ case "ray.io/v1:RayCluster" :
576
+ obj := & unstructured.Unstructured {}
577
+ obj .SetAPIVersion (cs .APIVersion )
578
+ obj .SetKind (cs .Kind )
557
579
if err := r .Get (ctx , types.NamespacedName {Name : cs .Name , Namespace : aw .Namespace }, obj ); err == nil {
558
580
if obj .GetDeletionTimestamp ().IsZero () {
559
581
summary .deployed += 1
560
582
561
- // batch/v1 Jobs are failed when status.Conditions contains an entry with type "Failed" and status "True"
562
- for _ , jc := range obj .Status .Conditions {
563
- if jc .Type == batchv1 .JobFailed && jc .Status == v1 .ConditionTrue {
564
- summary .failed += 1
565
- }
583
+ // RayCluster is failed if status.State is "failed"
584
+ status , ok := obj .UnstructuredContent ()["status" ]
585
+ if ! ok {
586
+ continue
587
+ }
588
+ state , ok := status .(map [string ]interface {})["state" ]
589
+ if ! ok {
590
+ continue
591
+ }
592
+ if state .(string ) == "failed" {
593
+ summary .failed += 1
566
594
}
567
595
}
596
+ } else if ! apierrors .IsNotFound (err ) {
597
+ return nil , err
598
+ }
599
+
600
+ case "ray.io/v1:RayJob" :
601
+ obj := & unstructured.Unstructured {}
602
+ obj .SetAPIVersion (cs .APIVersion )
603
+ obj .SetKind (cs .Kind )
604
+ if err := r .Get (ctx , types.NamespacedName {Name : cs .Name , Namespace : aw .Namespace }, obj ); err == nil {
605
+ if obj .GetDeletionTimestamp ().IsZero () {
606
+ summary .deployed += 1
568
607
608
+ // RayJob is failed if status.jobsStatus is "FAILED"
609
+ status , ok := obj .UnstructuredContent ()["status" ]
610
+ if ! ok {
611
+ continue
612
+ }
613
+ jobStatus , ok := status .(map [string ]interface {})["jobStatus" ]
614
+ if ! ok {
615
+ continue
616
+ }
617
+ if jobStatus .(string ) == "FAILED" {
618
+ summary .failed += 1
619
+ }
620
+ }
569
621
} else if ! apierrors .IsNotFound (err ) {
570
622
return nil , err
571
623
}
0 commit comments