@@ -31,6 +31,7 @@ limitations under the License.
31
31
package queuejob
32
32
33
33
import (
34
+ "errors"
34
35
"fmt"
35
36
"math"
36
37
"math/rand"
@@ -69,7 +70,7 @@ import (
69
70
70
71
"k8s.io/apimachinery/pkg/runtime"
71
72
"k8s.io/apimachinery/pkg/runtime/schema"
72
- "k8s.io/apimachinery/pkg/runtime/serializer/json"
73
+ runtimeJson "k8s.io/apimachinery/pkg/runtime/serializer/json"
73
74
74
75
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/queuejobresources"
75
76
resconfigmap "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/queuejobresources/configmap" // ConfigMap
@@ -675,7 +676,7 @@ func GetPodTemplate(qjobRes *arbv1.AppWrapperResource) (*v1.PodTemplateSpec, err
675
676
rtScheme := runtime .NewScheme ()
676
677
v1 .AddToScheme (rtScheme )
677
678
678
- jsonSerializer := json .NewYAMLSerializer (json .DefaultMetaFactory , rtScheme , rtScheme )
679
+ jsonSerializer := runtimeJson .NewYAMLSerializer (runtimeJson .DefaultMetaFactory , rtScheme , rtScheme )
679
680
680
681
podGVK := schema.GroupVersion {Group : v1 .GroupName , Version : "v1" }.WithKind ("PodTemplate" )
681
682
@@ -1903,20 +1904,19 @@ func (cc *XController) worker() {
1903
1904
1904
1905
// sync AppWrapper
1905
1906
if err := cc .syncQueueJob (queuejob ); err != nil {
1906
- klog .Errorf ("[worker] Failed to sync AppWrapper '%s/%s', err %#v" , queuejob .Namespace , queuejob .Name , err )
1907
1907
// If any error, requeue it.
1908
1908
return err
1909
1909
}
1910
1910
1911
1911
klog .V (10 ).Infof ("[worker] Ending %s Delay=%.6f seconds &newQJ=%p Version=%s Status=%+v" , queuejob .Name , time .Now ().Sub (queuejob .Status .ControllerFirstTimestamp .Time ).Seconds (), queuejob , queuejob .ResourceVersion , queuejob .Status )
1912
1912
return nil
1913
1913
})
1914
- if err != nil {
1914
+ if err != nil && ! CanIgnoreAPIError ( err ) && ! IsJsonSyntaxError ( err ) {
1915
1915
klog .Warningf ("[worker] Fail to process item from eventQueue, err %v. Attempting to re-enqueque..." , err )
1916
1916
if err00 := cc .enqueueIfNotPresent (item ); err00 != nil {
1917
- klog .Errorf ("[worker] Fatal error railed to re-enqueue item, err %v" , err00 )
1917
+ klog .Errorf ("[worker] Fatal error trying to re-enqueue item, err = %v" , err00 )
1918
1918
} else {
1919
- klog .Warning ("[worker] Item re-enqueued" )
1919
+ klog .Warning ("[worker] Item re-enqueued. " )
1920
1920
}
1921
1921
return
1922
1922
}
@@ -2140,7 +2140,6 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool
2140
2140
klog .Errorf ("[manageQueueJob] Error dispatching generic item for app wrapper='%s/%s' type=%v err=%v" , qj .Namespace , qj .Name , err00 )
2141
2141
}
2142
2142
dispatchFailureMessage = fmt .Sprintf ("%s/%s creation failure: %+v" , qj .Namespace , qj .Name , err00 )
2143
- klog .Errorf ("[manageQueueJob] Error dispatching job=%s Status=%+v err=%+v" , qj .Name , qj .Status , err00 )
2144
2143
dispatched = false
2145
2144
}
2146
2145
}
@@ -2337,7 +2336,7 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error {
2337
2336
// we call clean-up for each controller
2338
2337
for _ , ar := range appwrapper .Spec .AggrResources .Items {
2339
2338
err00 := cc .qjobResControls [ar .Type ].Cleanup (appwrapper , & ar )
2340
- if err00 != nil && ! apierrors . IsNotFound (err00 ) {
2339
+ if err00 != nil && ! CanIgnoreAPIError ( err00 ) && ! IsJsonSyntaxError (err00 ) {
2341
2340
klog .Errorf ("[Cleanup] Error deleting item %s from app wrapper='%s/%s' err=%v." ,
2342
2341
ar .Type , appwrapper .Namespace , appwrapper .Name , err00 )
2343
2342
err = multierror .Append (err , err00 )
@@ -2350,14 +2349,19 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error {
2350
2349
if appwrapper .Spec .AggrResources .GenericItems != nil {
2351
2350
for _ , ar := range appwrapper .Spec .AggrResources .GenericItems {
2352
2351
genericResourceName , gvk , err00 := cc .genericresources .Cleanup (appwrapper , & ar )
2353
- if err00 != nil && ! apierrors . IsNotFound (err00 ) {
2352
+ if err00 != nil && ! CanIgnoreAPIError ( err00 ) && ! IsJsonSyntaxError (err00 ) {
2354
2353
klog .Errorf ("[Cleanup] Error deleting generic item %s, from app wrapper='%s/%s' err=%v." ,
2355
2354
genericResourceName , appwrapper .Namespace , appwrapper .Name , err00 )
2356
2355
err = multierror .Append (err , err00 )
2357
2356
continue
2358
2357
}
2359
- klog .V (3 ).Infof ("[Cleanup] Deleted generic item %s, GVK=%s.%s.%s from app wrapper='%s/%s'" ,
2360
- genericResourceName , gvk .Group , gvk .Version , gvk .Kind , appwrapper .Namespace , appwrapper .Name )
2358
+ if gvk != nil {
2359
+ klog .V (3 ).Infof ("[Cleanup] Deleted generic item '%s', GVK=%s.%s.%s from app wrapper='%s/%s'" ,
2360
+ genericResourceName , gvk .Group , gvk .Version , gvk .Kind , appwrapper .Namespace , appwrapper .Name )
2361
+ } else {
2362
+ klog .V (3 ).Infof ("[Cleanup] Deleted generic item '%s' from app wrapper='%s/%s'" ,
2363
+ genericResourceName , appwrapper .Namespace , appwrapper .Name )
2364
+ }
2361
2365
}
2362
2366
}
2363
2367
@@ -2443,3 +2447,21 @@ func (qjm *XController) schedulingAWAtomicSet(qj *arbv1.AppWrapper) {
2443
2447
qjm .schedulingAW = qj
2444
2448
qjm .schedulingMutex .Unlock ()
2445
2449
}
2450
+
2451
+ func IsJsonSyntaxError (err error ) bool {
2452
+ var tt * jsons.SyntaxError
2453
+ if err == nil {
2454
+ return false
2455
+ } else if err .Error () == "Job resource template item not define as a PodTemplate" {
2456
+ return true
2457
+ } else if err .Error () == "name is required" {
2458
+ return true
2459
+ } else if errors .As (err , & tt ) {
2460
+ return true
2461
+ } else {
2462
+ return false
2463
+ }
2464
+ }
2465
+ func CanIgnoreAPIError (err error ) bool {
2466
+ return err == nil || apierrors .IsNotFound (err ) || apierrors .IsInvalid (err )
2467
+ }
0 commit comments