7
7
absolute_import )
8
8
from builtins import range , object , open
9
9
10
+ import sys
10
11
from copy import deepcopy
11
12
from glob import glob
12
13
import os
13
14
import shutil
14
- import sys
15
15
from time import sleep , time
16
- from traceback import format_exc
16
+ from traceback import format_exception
17
17
18
18
import numpy as np
19
19
import scipy .sparse as ssp
@@ -159,7 +159,7 @@ def run(self, graph, config, updatehash=False):
159
159
graph ,
160
160
result = {
161
161
'result' : None ,
162
- 'traceback' : format_exc ( )
162
+ 'traceback' : ' \n ' . join ( format_exception ( * sys . exc_info ()) )
163
163
}))
164
164
else :
165
165
if result :
@@ -244,7 +244,7 @@ def _submit_mapnode(self, jobid):
244
244
mapnodesubids = self .procs [jobid ].get_subnodes ()
245
245
numnodes = len (mapnodesubids )
246
246
logger .debug ('Adding %d jobs for mapnode %s' , numnodes ,
247
- self .procs [jobid ]. _id )
247
+ self .procs [jobid ])
248
248
for i in range (numnodes ):
249
249
self .mapnodesubids [self .depidx .shape [0 ] + i ] = jobid
250
250
self .procs .extend (mapnodesubids )
@@ -274,7 +274,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
274
274
slots = None
275
275
else :
276
276
slots = max (0 , self .max_jobs - num_jobs )
277
- logger .debug ('Slots available: %s' % slots )
277
+ logger .debug ('Slots available: %s' , slots )
278
278
if (num_jobs >= self .max_jobs ) or (slots == 0 ):
279
279
break
280
280
@@ -303,14 +303,14 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
303
303
self .proc_done [jobid ] = True
304
304
self .proc_pending [jobid ] = True
305
305
# Send job to task manager and add to pending tasks
306
- logger .info ('Submitting: %s ID: %d' %
307
- ( self .procs [jobid ]. _id , jobid ) )
306
+ logger .info ('Submitting: %s ID: %d' ,
307
+ self .procs [jobid ], jobid )
308
308
if self ._status_callback :
309
309
self ._status_callback (self .procs [jobid ], 'start' )
310
310
311
311
if not self ._local_hash_check (jobid , graph ):
312
312
if self .procs [jobid ].run_without_submitting :
313
- logger .debug ('Running node %s on master thread' %
313
+ logger .debug ('Running node %s on master thread' ,
314
314
self .procs [jobid ])
315
315
try :
316
316
self .procs [jobid ].run ()
@@ -327,8 +327,8 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
327
327
self .proc_pending [jobid ] = False
328
328
else :
329
329
self .pending_tasks .insert (0 , (tid , jobid ))
330
- logger .info ('Finished submitting: %s ID: %d' %
331
- ( self .procs [jobid ]. _id , jobid ) )
330
+ logger .info ('Finished submitting: %s ID: %d' ,
331
+ self .procs [jobid ], jobid )
332
332
else :
333
333
break
334
334
@@ -337,22 +337,38 @@ def _local_hash_check(self, jobid, graph):
337
337
self .procs [jobid ].config ['execution' ]['local_hash_check' ]):
338
338
return False
339
339
340
- cached , updated = self .procs [jobid ].is_cached ()
340
+ try :
341
+ cached , updated = self .procs [jobid ].is_cached ()
342
+ except Exception :
343
+ logger .warning (
344
+ 'Error while checking node hash, forcing re-run. '
345
+ 'Although this error may not prevent the workflow from running, '
346
+ 'it could indicate a major problem. Please report a new issue '
347
+ 'at https://github.com/nipy/nipype/issues adding the following '
348
+ 'information:\n \n \t Node: %s\n \t Interface: %s.%s\n \t Traceback:\n %s' ,
349
+ self .procs [jobid ],
350
+ self .procs [jobid ].interface .__module__ ,
351
+ self .procs [jobid ].interface .__class__ .__name__ ,
352
+ '\n ' .join (format_exception (* sys .exc_info ()))
353
+ )
354
+ return False
355
+
341
356
logger .debug ('Checking hash "%s" locally: cached=%s, updated=%s.' ,
342
- self .procs [jobid ]. fullname , cached , updated )
357
+ self .procs [jobid ], cached , updated )
343
358
overwrite = self .procs [jobid ].overwrite
344
- always_run = self .procs [jobid ]._interface .always_run
359
+ always_run = self .procs [jobid ].interface .always_run
345
360
346
361
if cached and updated and (overwrite is False or
347
362
overwrite is None and not always_run ):
348
363
logger .debug ('Skipping cached node %s with ID %s.' ,
349
- self .procs [jobid ]. _id , jobid )
364
+ self .procs [jobid ], jobid )
350
365
try :
351
366
self ._task_finished_cb (jobid , cached = True )
352
367
self ._remove_node_dirs ()
353
368
except Exception :
354
- logger .debug ('Error skipping cached node %s (%s).' ,
355
- self .procs [jobid ]._id , jobid )
369
+ logger .debug ('Error skipping cached node %s (%s).\n \n %s' ,
370
+ self .procs [jobid ], jobid ,
371
+ '\n ' .join (format_exception (* sys .exc_info ())))
356
372
self ._clean_queue (jobid , graph )
357
373
self .proc_pending [jobid ] = False
358
374
return True
@@ -364,7 +380,7 @@ def _task_finished_cb(self, jobid, cached=False):
364
380
This is called when a job is completed.
365
381
"""
366
382
logger .info ('[Job %d] %s (%s).' , jobid , 'Cached'
367
- if cached else 'Completed' , self .procs [jobid ]. fullname )
383
+ if cached else 'Completed' , self .procs [jobid ])
368
384
if self ._status_callback :
369
385
self ._status_callback (self .procs [jobid ], 'end' )
370
386
# Update job and worker queues
@@ -481,7 +497,7 @@ def _get_result(self, taskid):
481
497
taskid , timeout , node_dir ))
482
498
raise IOError (error_message )
483
499
except IOError as e :
484
- result_data ['traceback' ] = format_exc ( )
500
+ result_data ['traceback' ] = ' \n ' . join ( format_exception ( * sys . exc_info ()) )
485
501
else :
486
502
results_file = glob (os .path .join (node_dir , 'result_*.pklz' ))[0 ]
487
503
result_data = loadpkl (results_file )
0 commit comments