Skip to content

Commit d587352

Browse files
authored
avoid preempt of failed co-schd pods (#317)
1 parent a83bcf7 commit d587352

File tree

1 file changed

+36
-32
lines changed
  • pkg/controller/queuejobresources

1 file changed

+36
-32
lines changed

pkg/controller/queuejobresources/utils.go

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ func PendingPodsFailedSchd(pods []*v1.Pod) map[string][]v1.PodCondition {
6161
if strings.Contains(cond.Message, "pgName") && strings.Contains(cond.Message, "last") && strings.Contains(cond.Message, "failed") && strings.Contains(cond.Message, "deny") {
6262
//ignore co-scheduled pending pods for coscheduler version:0.22.6
6363
continue
64+
} else if strings.Contains(cond.Message, "optimistic") && strings.Contains(cond.Message, "rejection") && strings.Contains(cond.Message, "PostFilter") ||
65+
strings.Contains(cond.Message, "cannot") && strings.Contains(cond.Message, "find") && strings.Contains(cond.Message, "enough") && strings.Contains(cond.Message, "sibling") {
66+
//ignore co-scheduled pending pods for coscheduler version:0.23.10
67+
continue
6468
} else {
6569
podName := string(pods[i].Name)
6670
podCondition[podName] = append(podCondition[podName], *cond.DeepCopy())
@@ -74,41 +78,41 @@ func PendingPodsFailedSchd(pods []*v1.Pod) map[string][]v1.PodCondition {
7478

7579
// filterPods returns pods based on their phase.
7680
func GetPodResourcesByPhase(phase v1.PodPhase, pods []*v1.Pod) *clusterstateapi.Resource {
77-
req := clusterstateapi.EmptyResource()
78-
for i := range pods {
79-
if pods[i].Status.Phase == phase {
80-
for _, c := range pods[i].Spec.Containers {
81-
req.Add(clusterstateapi.NewResource(c.Resources.Requests))
82-
}
83-
}
84-
}
85-
return req
81+
req := clusterstateapi.EmptyResource()
82+
for i := range pods {
83+
if pods[i].Status.Phase == phase {
84+
for _, c := range pods[i].Spec.Containers {
85+
req.Add(clusterstateapi.NewResource(c.Resources.Requests))
86+
}
87+
}
88+
}
89+
return req
8690
}
8791

8892
func GetPodResources(template *v1.PodTemplateSpec) *clusterstateapi.Resource {
89-
total := clusterstateapi.EmptyResource()
90-
req := clusterstateapi.EmptyResource()
91-
limit := clusterstateapi.EmptyResource()
92-
spec := template.Spec
93+
total := clusterstateapi.EmptyResource()
94+
req := clusterstateapi.EmptyResource()
95+
limit := clusterstateapi.EmptyResource()
96+
spec := template.Spec
9397

94-
if &spec == nil {
95-
klog.Errorf("Pod Spec not found in Pod Template: %+v. Aggregated resources set to 0.", template)
96-
return total
97-
}
98+
if &spec == nil {
99+
klog.Errorf("Pod Spec not found in Pod Template: %+v. Aggregated resources set to 0.", template)
100+
return total
101+
}
98102

99-
for _, c := range template.Spec.Containers {
100-
req.Add(clusterstateapi.NewResource(c.Resources.Requests))
101-
limit.Add(clusterstateapi.NewResource(c.Resources.Limits))
102-
}
103-
if req.MilliCPU < limit.MilliCPU {
104-
req.MilliCPU = limit.MilliCPU
105-
}
106-
if req.Memory < limit.Memory {
107-
req.Memory = limit.Memory
108-
}
109-
if req.GPU < limit.GPU {
110-
req.GPU = limit.GPU
111-
}
112-
total = total.Add(req)
113-
return total
103+
for _, c := range template.Spec.Containers {
104+
req.Add(clusterstateapi.NewResource(c.Resources.Requests))
105+
limit.Add(clusterstateapi.NewResource(c.Resources.Limits))
106+
}
107+
if req.MilliCPU < limit.MilliCPU {
108+
req.MilliCPU = limit.MilliCPU
109+
}
110+
if req.Memory < limit.Memory {
111+
req.Memory = limit.Memory
112+
}
113+
if req.GPU < limit.GPU {
114+
req.GPU = limit.GPU
115+
}
116+
total = total.Add(req)
117+
return total
114118
}

0 commit comments

Comments
 (0)