From 686aef9f6beac904b260347f92bfbfb5ee3e8ea6 Mon Sep 17 00:00:00 2001 From: oesteban Date: Tue, 10 Sep 2019 22:40:46 -0700 Subject: [PATCH 1/3] ENH: Lightweight node cache checking Generating the hashvalue when outputs are not ready at cache check stage when the node's directory does not exist (or no results file is in there) leads to #3014. This PR preempts those problems by delaying the hashval calculation. --- nipype/pipeline/engine/nodes.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/nipype/pipeline/engine/nodes.py b/nipype/pipeline/engine/nodes.py index 57c566f890..ce89026ad0 100644 --- a/nipype/pipeline/engine/nodes.py +++ b/nipype/pipeline/engine/nodes.py @@ -293,27 +293,29 @@ def is_cached(self, rm_outdated=False): """ outdir = self.output_dir() - # Update hash - hashed_inputs, hashvalue = self._get_hashval() - # The output folder does not exist: not cached - if not op.exists(outdir): - logger.debug('[Node] Directory not found "%s".', outdir) + if not op.exists(outdir) or \ + not op.exists(op.join(outdir, 'result_%s.pklz' % self.name)): + logger.debug('[Node] Not cached "%s".', outdir) return False, False - hashfile = op.join(outdir, '_0x%s.json' % hashvalue) - cached = op.exists(hashfile) - - # Check if updated + # Check if there are hashfiles globhashes = glob(op.join(outdir, '_0x*.json')) unfinished = [ path for path in globhashes if path.endswith('_unfinished.json') ] hashfiles = list(set(globhashes) - set(unfinished)) + + # Update hash + hashed_inputs, hashvalue = self._get_hashval() + + hashfile = op.join(outdir, '_0x%s.json' % hashvalue) logger.debug('[Node] Hashes: %s, %s, %s, %s', hashed_inputs, hashvalue, hashfile, hashfiles) + cached = op.exists(hashfile) + # No previous hashfiles found, we're all set. if cached and len(hashfiles) == 1: assert(hashfile == hashfiles[0]) @@ -441,6 +443,7 @@ def run(self, updatehash=False): for outdatedhash in glob(op.join(self.output_dir(), '_0x*.json')): os.remove(outdatedhash) + self._get_hashval() # Hashfile while running hashfile_unfinished = op.join( outdir, '_0x%s_unfinished.json' % self._hashvalue) From 57a8db2cd99f2c58138d4a29e7ed646349757677 Mon Sep 17 00:00:00 2001 From: oesteban Date: Wed, 11 Sep 2019 09:16:57 -0700 Subject: [PATCH 2/3] maint: add relevant code comment + minimal stylistic changes [skip ci] --- nipype/pipeline/engine/nodes.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/nipype/pipeline/engine/nodes.py b/nipype/pipeline/engine/nodes.py index ce89026ad0..8fda6c775e 100644 --- a/nipype/pipeline/engine/nodes.py +++ b/nipype/pipeline/engine/nodes.py @@ -389,17 +389,17 @@ def hash_exists(self, updatehash=False): return cached, self._hashvalue, hashfile, self._hashed_inputs def run(self, updatehash=False): - """Execute the node in its directory. + """ + Execute the node in its directory. Parameters ---------- - updatehash: boolean When the hash stored in the output directory as a result of a previous run does not match that calculated for this execution, updatehash=True only updates the hash without re-running. - """ + """ if self.config is None: self.config = {} self.config = merge_dict(deepcopy(config._sections), self.config) @@ -443,6 +443,10 @@ def run(self, updatehash=False): for outdatedhash in glob(op.join(self.output_dir(), '_0x*.json')): os.remove(outdatedhash) + # _get_hashval needs to be called before running. When there is a valid (or seemingly + # valid cache), the is_cached() member updates the hashval via _get_hashval. + # However, if this node's folder doesn't exist or the result file is not found, then + # the hashval needs to be generated here. See #3026 for a larger context. self._get_hashval() # Hashfile while running hashfile_unfinished = op.join( From e7a62006fe4f39c44eba02c73b21514bc070d151 Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Wed, 11 Sep 2019 11:12:32 -0700 Subject: [PATCH 3/3] Update nipype/pipeline/engine/nodes.py Co-Authored-By: Chris Markiewicz --- nipype/pipeline/engine/nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nipype/pipeline/engine/nodes.py b/nipype/pipeline/engine/nodes.py index 8fda6c775e..0e3b61ffd2 100644 --- a/nipype/pipeline/engine/nodes.py +++ b/nipype/pipeline/engine/nodes.py @@ -314,7 +314,7 @@ def is_cached(self, rm_outdated=False): logger.debug('[Node] Hashes: %s, %s, %s, %s', hashed_inputs, hashvalue, hashfile, hashfiles) - cached = op.exists(hashfile) + cached = hashfile in hashfiles # No previous hashfiles found, we're all set. if cached and len(hashfiles) == 1: