From b5a60245d9c19d64a053eb650f1b584299be804a Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 13 Jan 2016 14:25:44 -0500 Subject: [PATCH 01/37] Removed S3 datasink stuff --- nipype/interfaces/io.py | 749 +++++++---------------------- nipype/interfaces/tests/test_io.py | 221 ++------- 2 files changed, 224 insertions(+), 746 deletions(-) diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py index 86359756f6..5909843c34 100644 --- a/nipype/interfaces/io.py +++ b/nipype/interfaces/io.py @@ -17,6 +17,11 @@ >>> os.chdir(datadir) """ + +from builtins import zip +from builtins import filter +from builtins import range + import glob import fnmatch import string @@ -29,11 +34,19 @@ from warnings import warn import sqlite3 -from nipype.utils.misc import human_order_sorted -from nipype.external import six -from ..utils.misc import str2bool +from .base import (TraitedSpec, traits, File, Directory, + BaseInterface, InputMultiPath, isdefined, + OutputMultiPath, DynamicTraitedSpec, + Undefined, BaseInterfaceInputSpec) from .. import config +from ..external.six import string_types +from ..utils.filemanip import (copyfile, list_to_filename, + filename_to_list) +from ..utils.misc import human_order_sorted +from ..utils.misc import str2bool +from .. import logging +iflogger = logging.getLogger('interface') try: import pyxnat @@ -51,16 +64,6 @@ except: pass -from nipype.interfaces.base import (TraitedSpec, traits, File, Directory, - BaseInterface, InputMultiPath, isdefined, - OutputMultiPath, DynamicTraitedSpec, - Undefined, BaseInterfaceInputSpec) -from nipype.utils.filemanip import (copyfile, list_to_filename, - filename_to_list) - -from .. import logging -iflogger = logging.getLogger('interface') - def copytree(src, dst, use_hardlink=False): """Recursively copy a directory tree using @@ -73,7 +76,7 @@ def copytree(src, dst, use_hardlink=False): names = os.listdir(src) try: os.makedirs(dst) - except OSError, why: + except OSError as why: if 'File exists' in why: pass else: @@ -88,11 +91,11 @@ def copytree(src, dst, use_hardlink=False): else: copyfile(srcname, dstname, True, hashmethod='content', use_hardlink=use_hardlink) - except (IOError, os.error), why: + except (IOError, os.error) as why: errors.append((srcname, dstname, str(why))) # catch the Error from the recursive copytree so that we can # continue with other files - except Exception, err: + except Exception as err: errors.extend(err.args[0]) if errors: raise Exception(errors) @@ -131,54 +134,7 @@ def _add_output_traits(self, base): return base -# Class to track percentage of S3 file upload -class ProgressPercentage(object): - ''' - Callable class instsance (via __call__ method) that displays - upload percentage of a file to S3 - ''' - - def __init__(self, filename): - ''' - ''' - - # Import packages - import threading - - # Initialize data attributes - self._filename = filename - self._size = float(os.path.getsize(filename)) - self._seen_so_far = 0 - self._lock = threading.Lock() - - def __call__(self, bytes_amount): - ''' - ''' - - # Import packages - import sys - - # With the lock on, print upload status - with self._lock: - self._seen_so_far += bytes_amount - if self._size != 0: - percentage = (self._seen_so_far / self._size) * 100 - else: - percentage = 0 - progress_str = '%d / %d (%.2f%%)\r'\ - % (self._seen_so_far, self._size, percentage) - - # Write to stdout - sys.stdout.write(progress_str) - sys.stdout.flush() - - -# DataSink inputs class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): - ''' - ''' - - # Init inputspec data attributes base_directory = Directory( desc='Path to the base directory for storing data.') container = traits.Str( @@ -190,30 +146,17 @@ class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): desc=('List of 2-tuples reflecting string ' 'to substitute and string to replace ' 'it with')) - regexp_substitutions = \ - InputMultiPath(traits.Tuple(traits.Str, traits.Str), - desc=('List of 2-tuples reflecting a pair of a '\ - 'Python regexp pattern and a replacement '\ - 'string. Invoked after string `substitutions`')) + regexp_substitutions = InputMultiPath(traits.Tuple(traits.Str, traits.Str), + desc=('List of 2-tuples reflecting a pair ' + 'of a Python regexp pattern and a ' + 'replacement string. Invoked after ' + 'string `substitutions`')) _outputs = traits.Dict(traits.Str, value={}, usedefault=True) remove_dest_dir = traits.Bool(False, usedefault=True, desc='remove dest directory when copying dirs') - # AWS S3 data attributes - creds_path = traits.Str(desc='Filepath to AWS credentials file for S3 bucket '\ - 'access') - encrypt_bucket_keys = traits.Bool(desc='Flag indicating whether to use S3 '\ - 'server-side AES-256 encryption') - # Set this if user wishes to override the bucket with their own - bucket = traits.Generic(mandatory=False, - desc='Boto3 S3 bucket for manual override of bucket') - # Set this if user wishes to have local copy of files as well - local_copy = traits.Str(desc='Copy files locally as well as to S3 bucket') - - # Set call-able inputs attributes def __setattr__(self, key, value): - if key not in self.copyable_trait_names(): if not isdefined(value): super(DataSinkInputSpec, self).__setattr__(key, value) @@ -224,19 +167,11 @@ def __setattr__(self, key, value): super(DataSinkInputSpec, self).__setattr__(key, value) -# DataSink outputs class DataSinkOutputSpec(TraitedSpec): - ''' - ''' - # Import packages - import traits.api as tapi + out_file = traits.Any(desc='datasink output') - # Init out file - out_file = tapi.Any(desc='datasink output') - -# Custom DataSink class class DataSink(IOBase): """ Generic datasink module to store structured outputs @@ -284,7 +219,7 @@ class DataSink(IOBase): >>> ds.inputs.structural = 'structural.nii' >>> setattr(ds.inputs, 'contrasts.@con', ['cont1.nii', 'cont2.nii']) >>> setattr(ds.inputs, 'contrasts.alt', ['cont1a.nii', 'cont2a.nii']) - >>> ds.run() # doctest: +SKIP + >>> ds.run() # doctest: +SKIP To use DataSink in a MapNode, its inputs have to be defined at the time the interface is created. @@ -295,15 +230,12 @@ class DataSink(IOBase): >>> ds.inputs.structural = 'structural.nii' >>> setattr(ds.inputs, 'contrasts.@con', ['cont1.nii', 'cont2.nii']) >>> setattr(ds.inputs, 'contrasts.alt', ['cont1a.nii', 'cont2a.nii']) - >>> ds.run() # doctest: +SKIP + >>> ds.run() # doctest: +SKIP """ - - # Give obj .inputs and .outputs input_spec = DataSinkInputSpec output_spec = DataSinkOutputSpec - # Initialization method to set up datasink def __init__(self, infields=None, force_run=True, **kwargs): """ Parameters @@ -325,7 +257,6 @@ def __init__(self, infields=None, force_run=True, **kwargs): if force_run: self._always_run = True - # Get destination paths def _get_dst(self, src): # If path is directory with trailing os.path.sep, # then remove that for a more robust behavior @@ -349,7 +280,6 @@ def _get_dst(self, src): dst = dst[1:] return dst - # Substitute paths in substitutions dictionary parameter def _substitute(self, pathstr): pathstr_ = pathstr if isdefined(self.inputs.substitutions): @@ -370,395 +300,81 @@ def _substitute(self, pathstr): iflogger.info('sub: %s -> %s' % (pathstr_, pathstr)) return pathstr - # Check for s3 in base directory - def _check_s3_base_dir(self): - ''' - Method to see if the datasink's base directory specifies an - S3 bucket path; if it does, it parses the path for the bucket - name in the form 's3://bucket_name/...' and adds a bucket - attribute to the data sink instance, i.e. self.bucket - - Parameters - ---------- - - Returns - ------- - s3_flag : boolean - flag indicating whether the base_directory contained an - S3 bucket path - ''' - - # Init variables - s3_str = 's3://' - base_directory = self.inputs.base_directory - - if not isdefined(base_directory): - s3_flag = False - return s3_flag - - # Explicitly lower-case the "s3" - if base_directory.lower().startswith(s3_str): - base_dir_sp = base_directory.split('/') - base_dir_sp[0] = base_dir_sp[0].lower() - base_directory = '/'.join(base_dir_sp) - - # Check if 's3://' in base dir - if base_directory.startswith(s3_str): - # Attempt to access bucket - try: - # Expects bucket name to be 's3://bucket_name/base_dir/..' - bucket_name = base_directory.split(s3_str)[1].split('/')[0] - # Get the actual bucket object - if self.inputs.bucket: - self.bucket = self.inputs.bucket - else: - self.bucket = self._fetch_bucket(bucket_name) - # Report error in case of exception - except Exception as exc: - err_msg = 'Unable to access S3 bucket. Error:\n%s. Exiting...'\ - % exc - raise Exception(err_msg) - # Bucket access was a success, set flag - s3_flag = True - # Otherwise it's just a normal datasink - else: - s3_flag = False - - # Return s3_flag - return s3_flag - - # Function to return AWS secure environment variables - def _return_aws_keys(self, creds_path): - ''' - Method to return AWS access key id and secret access key using - credentials found in a local file. - - Parameters - ---------- - creds_path : string (filepath) - path to the csv file downloaded from AWS; can either be root - or user credentials - - Returns - ------- - aws_access_key_id : string - string of the AWS access key ID - aws_secret_access_key : string - string of the AWS secret access key - ''' - - # Init variables - with open(creds_path, 'r') as creds_in: - # Grab csv rows - row1 = creds_in.readline() - row2 = creds_in.readline() - - # Are they root or user keys - if 'User Name' in row1: - # And split out for keys - aws_access_key_id = row2.split(',')[1] - aws_secret_access_key = row2.split(',')[2] - elif 'AWSAccessKeyId' in row1: - # And split out for keys - aws_access_key_id = row1.split('=')[1] - aws_secret_access_key = row2.split('=')[1] - else: - err_msg = 'Credentials file not recognized, check file is correct' - raise Exception(err_msg) - - # Strip any carriage return/line feeds - aws_access_key_id = aws_access_key_id.replace('\r', '').replace('\n', '') - aws_secret_access_key = aws_secret_access_key.replace('\r', '').replace('\n', '') - - # Return keys - return aws_access_key_id, aws_secret_access_key - - # Fetch bucket object - def _fetch_bucket(self, bucket_name): - ''' - Method to return a bucket object which can be used to interact - with an AWS S3 bucket using credentials found in a local file. - - Parameters - ---------- - bucket_name : string - string corresponding to the name of the bucket on S3 - - Returns - ------- - bucket : boto3.resources.factory.s3.Bucket - boto3 s3 Bucket object which is used to interact with files - in an S3 bucket on AWS - ''' - - # Import packages - import logging - - try: - import boto3 - import botocore - except ImportError as exc: - err_msg = 'Boto3 package is not installed - install boto3 and '\ - 'try again.' - raise Exception(err_msg) - - # Init variables - creds_path = self.inputs.creds_path - iflogger = logging.getLogger('interface') - - # Try and get AWS credentials if a creds_path is specified - if creds_path: - try: - aws_access_key_id, aws_secret_access_key = \ - self._return_aws_keys(creds_path) - except Exception as exc: - err_msg = 'There was a problem extracting the AWS credentials '\ - 'from the credentials file provided: %s. Error:\n%s'\ - % (creds_path, exc) - raise Exception(err_msg) - # Init connection - iflogger.info('Connecting to S3 bucket: %s with credentials from '\ - '%s ...' % (bucket_name, creds_path)) - # Use individual session for each instance of DataSink - # Better when datasinks are being used in multi-threading, see: - # http://boto3.readthedocs.org/en/latest/guide/resources.html#multithreading - session = boto3.session.Session(aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key) - s3_resource = session.resource('s3', use_ssl=True) - - # Otherwise, connect anonymously - else: - iflogger.info('Connecting to AWS: %s anonymously...'\ - % bucket_name) - session = boto3.session.Session() - s3_resource = session.resource('s3', use_ssl=True) - s3_resource.meta.client.meta.events.register('choose-signer.s3.*', - botocore.handlers.disable_signing) - - # Explicitly declare a secure SSL connection for bucket object - bucket = s3_resource.Bucket(bucket_name) - - # And try fetch the bucket with the name argument - try: - s3_resource.meta.client.head_bucket(Bucket=bucket_name) - except botocore.exceptions.ClientError as exc: - error_code = int(exc.response['Error']['Code']) - if error_code == 403: - err_msg = 'Access to bucket: %s is denied; check credentials'\ - % bucket_name - raise Exception(err_msg) - elif error_code == 404: - err_msg = 'Bucket: %s does not exist; check spelling and try '\ - 'again' % bucket_name - raise Exception(err_msg) - else: - err_msg = 'Unable to connect to bucket: %s. Error message:\n%s'\ - % (bucket_name, exc) - except Exception as exc: - err_msg = 'Unable to connect to bucket: %s. Error message:\n%s'\ - % (bucket_name, exc) - raise Exception(err_msg) - - # Return the bucket - return bucket - - # Send up to S3 method - def _upload_to_s3(self, src, dst): - ''' - Method to upload outputs to S3 bucket instead of on local disk - ''' - - # Import packages - import hashlib - import logging - import os - - from botocore.exceptions import ClientError - - # Init variables - bucket = self.bucket - iflogger = logging.getLogger('interface') - s3_str = 's3://' - s3_prefix = s3_str + bucket.name - - # Explicitly lower-case the "s3" - if dst.lower().startswith(s3_str): - dst_sp = dst.split('/') - dst_sp[0] = dst_sp[0].lower() - dst = '/'.join(dst_sp) - - # If src is a directory, collect files (this assumes dst is a dir too) - if os.path.isdir(src): - src_files = [] - for root, dirs, files in os.walk(src): - src_files.extend([os.path.join(root, fil) for fil in files]) - # Make the dst files have the dst folder as base dir - dst_files = [os.path.join(dst, src_f.split(src)[1]) \ - for src_f in src_files] - else: - src_files = [src] - dst_files = [dst] - - # Iterate over src and copy to dst - for src_idx, src_f in enumerate(src_files): - # Get destination filename/keyname - dst_f = dst_files[src_idx] - dst_k = dst_f.replace(s3_prefix, '').lstrip('/') - - # See if same file is already up there - try: - dst_obj = bucket.Object(key=dst_k) - dst_md5 = dst_obj.e_tag.strip('"') - - # See if same file is already there - src_read = open(src_f, 'rb').read() - src_md5 = hashlib.md5(src_read).hexdigest() - # Move to next loop iteration - if dst_md5 == src_md5: - iflogger.info('File %s already exists on S3, skipping...' % dst_f) - continue - else: - iflogger.info('Overwriting previous S3 file...') - - except ClientError: - iflogger.info('New file to S3') - - # Copy file up to S3 (either encrypted or not) - iflogger.info('Uploading %s to S3 bucket, %s, as %s...'\ - % (src_f, bucket.name, dst_f)) - if self.inputs.encrypt_bucket_keys: - extra_args = {'ServerSideEncryption' : 'AES256'} - else: - extra_args = {} - bucket.upload_file(src_f, dst_k, ExtraArgs=extra_args, - Callback=ProgressPercentage(src_f)) - - # List outputs, main run routine def _list_outputs(self): """Execute this module. """ - - # Init variables - iflogger = logging.getLogger('interface') outputs = self.output_spec().get() out_files = [] - # Use hardlink - use_hardlink = str2bool(config.get('execution', 'try_hard_link_datasink')) - - # Set local output directory if specified - if isdefined(self.inputs.local_copy): - outdir = self.inputs.local_copy - else: - outdir = self.inputs.base_directory - # If base directory isn't given, assume current directory - if not isdefined(outdir): - outdir = '.' - - # Check if base directory reflects S3 bucket upload - try: - s3_flag = self._check_s3_base_dir() - if s3_flag: - s3dir = self.inputs.base_directory - if isdefined(self.inputs.container): - s3dir = os.path.join(s3dir, self.inputs.container) - else: - s3dir = '' - # If encountering an exception during bucket access, set output - # base directory to a local folder - except Exception as exc: - s3dir = '' - s3_flag = False - if not isdefined(self.inputs.local_copy): - local_out_exception = os.path.join(os.path.expanduser('~'), - 's3_datasink_' + self.bucket.name) - outdir = local_out_exception - # Log local copying directory - iflogger.info('Access to S3 failed! Storing outputs locally at: '\ - '%s\nError: %s' %(outdir, exc)) - - # If container input is given, append that to outdir + outdir = self.inputs.base_directory + if not isdefined(outdir): + outdir = '.' + outdir = os.path.abspath(outdir) if isdefined(self.inputs.container): outdir = os.path.join(outdir, self.inputs.container) - - # If sinking to local folder - if outdir != s3dir: - outdir = os.path.abspath(outdir) - # Create the directory if it doesn't exist - if not os.path.exists(outdir): - try: - os.makedirs(outdir) - except OSError, inst: - if 'File exists' in inst: - pass - else: - raise(inst) - - # Iterate through outputs attributes {key : path(s)} - for key, files in self.inputs._outputs.items(): + if not os.path.exists(outdir): + try: + os.makedirs(outdir) + except OSError as inst: + if 'File exists' in inst: + pass + else: + raise(inst) + use_hardlink = str2bool(config.get('execution', + 'try_hard_link_datasink')) + for key, files in list(self.inputs._outputs.items()): if not isdefined(files): continue iflogger.debug("key: %s files: %s" % (key, str(files))) files = filename_to_list(files) tempoutdir = outdir - if s3_flag: - s3tempoutdir = s3dir for d in key.split('.'): if d[0] == '@': continue tempoutdir = os.path.join(tempoutdir, d) - if s3_flag: - s3tempoutdir = os.path.join(s3tempoutdir, d) # flattening list if isinstance(files, list): if isinstance(files[0], list): files = [item for sublist in files for item in sublist] - # Iterate through passed-in source files for src in filename_to_list(files): - # Format src and dst files src = os.path.abspath(src) - if not os.path.isfile(src): - src = os.path.join(src, '') - dst = self._get_dst(src) - if s3_flag: - s3dst = os.path.join(s3tempoutdir, dst) - s3dst = self._substitute(s3dst) - dst = os.path.join(tempoutdir, dst) - dst = self._substitute(dst) - path, _ = os.path.split(dst) - - # If we're uploading to S3 - if s3_flag: - self._upload_to_s3(src, s3dst) - out_files.append(s3dst) - # Otherwise, copy locally src -> dst - if not s3_flag or isdefined(self.inputs.local_copy): - # Create output directory if it doesnt exist + if os.path.isfile(src): + dst = self._get_dst(src) + dst = os.path.join(tempoutdir, dst) + dst = self._substitute(dst) + path, _ = os.path.split(dst) if not os.path.exists(path): try: os.makedirs(path) - except OSError, inst: + except OSError as inst: if 'File exists' in inst: pass else: raise(inst) - # If src is a file, copy it to dst - if os.path.isfile(src): - iflogger.debug('copyfile: %s %s' % (src, dst)) - copyfile(src, dst, copy=True, hashmethod='content', - use_hardlink=use_hardlink) - out_files.append(dst) - # If src is a directory, copy entire contents to dst dir - elif os.path.isdir(src): - if os.path.exists(dst) and self.inputs.remove_dest_dir: - iflogger.debug('removing: %s' % dst) - shutil.rmtree(dst) - iflogger.debug('copydir: %s %s' % (src, dst)) - copytree(src, dst) - out_files.append(dst) - - # Return outputs dictionary + iflogger.debug("copyfile: %s %s" % (src, dst)) + copyfile(src, dst, copy=True, hashmethod='content', + use_hardlink=use_hardlink) + out_files.append(dst) + elif os.path.isdir(src): + dst = self._get_dst(os.path.join(src, '')) + dst = os.path.join(tempoutdir, dst) + dst = self._substitute(dst) + path, _ = os.path.split(dst) + if not os.path.exists(path): + try: + os.makedirs(path) + except OSError as inst: + if 'File exists' in inst: + pass + else: + raise(inst) + if os.path.exists(dst) and self.inputs.remove_dest_dir: + iflogger.debug("removing: %s" % dst) + shutil.rmtree(dst) + iflogger.debug("copydir: %s %s" % (src, dst)) + copytree(src, dst) + out_files.append(dst) outputs['out_file'] = out_files return outputs @@ -766,15 +382,15 @@ def _list_outputs(self): class S3DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): testing = traits.Bool(False, usedefault=True, - desc='Flag for using local fakes3 server.' - ' (for testing purposes only)') + desc='Flag for using local fakes3 server.' + ' (for testing purposes only)') anon = traits.Bool(False, usedefault=True, - desc='Use anonymous connection to s3') + desc='Use anonymous connection to s3') bucket = traits.Str(mandatory=True, desc='Amazon S3 bucket where your data is stored') bucket_path = traits.Str('', usedefault=True, desc='Location within your bucket to store ' - 'data.') + 'data.') base_directory = Directory( desc='Path to the base directory for storing data.') container = traits.Str( @@ -795,8 +411,6 @@ class S3DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): _outputs = traits.Dict(traits.Str, value={}, usedefault=True) remove_dest_dir = traits.Bool(False, usedefault=True, desc='remove dest directory when copying dirs') - # Set this if user wishes to have local copy of files as well - local_copy = traits.Str(desc='Copy files locally as well as to S3 bucket') def __setattr__(self, key, value): if key not in self.copyable_trait_names(): @@ -812,7 +426,7 @@ def __setattr__(self, key, value): class S3DataSink(DataSink): """ Works exactly like DataSink, except the specified files will also be uploaded to Amazon S3 storage in the specified bucket - and location. 'bucket_path' is the s3 analog for + and location. 'bucket_path' is the s3 analog for 'base_directory'. """ @@ -842,7 +456,7 @@ def localtos3(self, paths): # convert local path to s3 path bd_index = path.find(self.inputs.base_directory) if bd_index != -1: # base_directory is in path, maintain directory structure - s3path = path[bd_index+len(self.inputs.base_directory):] # cut out base directory + s3path = path[bd_index + len(self.inputs.base_directory):] # cut out base directory if s3path[0] == os.path.sep: s3path = s3path[1:] else: # base_directory isn't in path, simply place all files in bucket_path folder @@ -861,16 +475,17 @@ def localtos3(self, paths): class S3DataGrabberInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): anon = traits.Bool(False, usedefault=True, - desc='Use anonymous connection to s3') + desc='Use anonymous connection to s3. If this is set to True, boto may print' + + ' a urlopen error, but this does not prevent data from being downloaded.') region = traits.Str('us-east-1', usedefault=True, - desc='Region of s3 bucket') + desc='Region of s3 bucket') bucket = traits.Str(mandatory=True, desc='Amazon S3 bucket where your data is stored') bucket_path = traits.Str('', usedefault=True, desc='Location within your bucket for subject data.') local_directory = Directory(exists=True, desc='Path to the local directory for subject data to be downloaded ' - 'and accessed. Should be on HDFS for Spark jobs.') + 'and accessed. Should be on HDFS for Spark jobs.') raise_on_empty = traits.Bool(True, usedefault=True, desc='Generate exception if list is empty for a given field') sort_filelist = traits.Bool(mandatory=True, @@ -932,7 +547,7 @@ def __init__(self, infields=None, outfields=None, **kwargs): if not isdefined(self.inputs.template_args): self.inputs.template_args = {} for key in outfields: - if not key in self.inputs.template_args: + if key not in self.inputs.template_args: if infields: self.inputs.template_args[key] = [infields] else: @@ -973,8 +588,8 @@ def _list_outputs(self): if hasattr(self.inputs, 'field_template') and \ isdefined(self.inputs.field_template) and \ key in self.inputs.field_template: - template = self.inputs.field_template[key] # template override for multiple outfields - if isdefined(self.inputs.bucket_path): + template = self.inputs.field_template[key] # template override for multiple outfields + if isdefined(self.inputs.bucket_path): template = os.path.join(self.inputs.bucket_path, template) if not args: filelist = [] @@ -995,7 +610,7 @@ def _list_outputs(self): for argnum, arglist in enumerate(args): maxlen = 1 for arg in arglist: - if isinstance(arg, six.string_types) and hasattr(self.inputs, arg): + if isinstance(arg, string_types) and hasattr(self.inputs, arg): arg = getattr(self.inputs, arg) if isinstance(arg, list): if (maxlen > 1) and (len(arg) != maxlen): @@ -1006,7 +621,7 @@ def _list_outputs(self): for i in range(maxlen): argtuple = [] for arg in arglist: - if isinstance(arg, six.string_types) and hasattr(self.inputs, arg): + if isinstance(arg, string_types) and hasattr(self.inputs, arg): arg = getattr(self.inputs, arg) if isinstance(arg, list): argtuple.append(arg[i]) @@ -1042,14 +657,17 @@ def _list_outputs(self): # Outputs are currently stored as locations on S3. # We must convert to the local location specified # and download the files. - for key in outputs: - if type(outputs[key]) == list: - paths = outputs[key] - for i in range(len(paths)): - path = paths[i] + for key,val in outputs.iteritems(): + #This will basically be either list-like or string-like: + #if it has the __iter__ attribute, it's list-like (list, + #tuple, numpy array) and we iterate through each of its + #values. If it doesn't, it's string-like (string, + #unicode), and we convert that value directly. + if hasattr(val,'__iter__'): + for i,path in enumerate(val): outputs[key][i] = self.s3tolocal(path, bkt) - elif type(outputs[key]) == str: - outputs[key] = self.s3tolocal(outputs[key], bkt) + else: + outputs[key] = self.s3tolocal(val, bkt) return outputs @@ -1175,7 +793,7 @@ def __init__(self, infields=None, outfields=None, **kwargs): if not isdefined(self.inputs.template_args): self.inputs.template_args = {} for key in outfields: - if not key in self.inputs.template_args: + if key not in self.inputs.template_args: if infields: self.inputs.template_args[key] = [infields] else: @@ -1189,7 +807,7 @@ def _add_output_traits(self, base): Using traits.Any instead out OutputMultiPath till add_trait bug is fixed. """ - return add_traits(base, self.inputs.template_args.keys()) + return add_traits(base, list(self.inputs.template_args.keys())) def _list_outputs(self): # infields are mandatory, however I could not figure out how to set 'mandatory' flag dynamically @@ -1203,7 +821,7 @@ def _list_outputs(self): raise ValueError(msg) outputs = {} - for key, args in self.inputs.template_args.items(): + for key, args in list(self.inputs.template_args.items()): outputs[key] = [] template = self.inputs.template if hasattr(self.inputs, 'field_template') and \ @@ -1231,7 +849,7 @@ def _list_outputs(self): for argnum, arglist in enumerate(args): maxlen = 1 for arg in arglist: - if isinstance(arg, six.string_types) and hasattr(self.inputs, arg): + if isinstance(arg, string_types) and hasattr(self.inputs, arg): arg = getattr(self.inputs, arg) if isinstance(arg, list): if (maxlen > 1) and (len(arg) != maxlen): @@ -1242,7 +860,7 @@ def _list_outputs(self): for i in range(maxlen): argtuple = [] for arg in arglist: - if isinstance(arg, six.string_types) and hasattr(self.inputs, arg): + if isinstance(arg, string_types) and hasattr(self.inputs, arg): arg = getattr(self.inputs, arg) if isinstance(arg, list): argtuple.append(arg[i]) @@ -1278,17 +896,17 @@ def _list_outputs(self): class SelectFilesInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): base_directory = Directory(exists=True, - desc="Root path common to templates.") + desc="Root path common to templates.") sort_filelist = traits.Bool(True, usedefault=True, - desc="When matching mutliple files, return them in sorted order.") + desc="When matching mutliple files, return them in sorted order.") raise_on_empty = traits.Bool(True, usedefault=True, - desc="Raise an exception if a template pattern matches no files.") + desc="Raise an exception if a template pattern matches no files.") force_lists = traits.Either(traits.Bool(), traits.List(traits.Str()), - default=False, usedefault=True, - desc=("Whether to return outputs as a list even when only one file " - "matches the template. Either a boolean that applies to all " - "output fields or a list of output field names to coerce to " - " a list")) + default=False, usedefault=True, + desc=("Whether to return outputs as a list even when only one file " + "matches the template. Either a boolean that applies to all " + "output fields or a list of output field names to coerce to " + " a list")) class SelectFiles(IOBase): @@ -1305,12 +923,13 @@ class SelectFiles(IOBase): Examples -------- + >>> import pprint >>> from nipype import SelectFiles, Node >>> templates={"T1": "{subject_id}/struct/T1.nii", ... "epi": "{subject_id}/func/f[0, 1].nii"} >>> dg = Node(SelectFiles(templates), "selectfiles") >>> dg.inputs.subject_id = "subj1" - >>> dg.outputs.get() + >>> pprint.pprint(dg.outputs.get()) # doctest: +NORMALIZE_WHITESPACE {'T1': , 'epi': } The same thing with dynamic grabbing of specific files: @@ -1345,7 +964,7 @@ def __init__(self, templates, **kwargs): # Infer the infields and outfields from the template infields = [] - for name, template in templates.iteritems(): + for name, template in templates.items(): for _, field_name, _, _ in string.Formatter().parse(template): if field_name is not None and field_name not in infields: infields.append(field_name) @@ -1363,12 +982,12 @@ def __init__(self, templates, **kwargs): def _add_output_traits(self, base): """Add the dynamic output fields""" - return add_traits(base, self._templates.keys()) + return add_traits(base, list(self._templates.keys())) def _list_outputs(self): """Find the files and expose them as interface outputs.""" outputs = {} - info = dict([(k, v) for k, v in self.inputs.__dict__.items() + info = dict([(k, v) for k, v in list(self.inputs.__dict__.items()) if k in self._infields]) force_lists = self.inputs.force_lists @@ -1383,7 +1002,7 @@ def _list_outputs(self): "'templates'.") % (plural, bad_fields, verb) raise ValueError(msg) - for field, template in self._templates.iteritems(): + for field, template in self._templates.items(): # Build the full template path if isdefined(self.inputs.base_directory): @@ -1425,10 +1044,10 @@ class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): match_regex = traits.Str('(.+)', usedefault=True, desc=("Regular expression for matching " - "paths.")) + "paths.")) ignore_regexes = traits.List(desc=("List of regular expressions, " - "if any match the path it will be " - "ignored.") + "if any match the path it will be " + "ignored.") ) max_depth = traits.Int(desc="The maximum depth to search beneath " "the root_paths") @@ -1456,17 +1075,17 @@ class DataFinder(IOBase): >>> df.inputs.root_paths = '.' >>> df.inputs.match_regex = '.+/(?P.+(qT1|ep2d_fid_T1).+)/(?P.+)\.nii.gz' >>> result = df.run() # doctest: +SKIP - >>> print result.outputs.out_paths # doctest: +SKIP + >>> result.outputs.out_paths # doctest: +SKIP ['./027-ep2d_fid_T1_Gd4/acquisition.nii.gz', './018-ep2d_fid_T1_Gd2/acquisition.nii.gz', './016-ep2d_fid_T1_Gd1/acquisition.nii.gz', './013-ep2d_fid_T1_pre/acquisition.nii.gz'] - >>> print result.outputs.series_dir # doctest: +SKIP + >>> result.outputs.series_dir # doctest: +SKIP ['027-ep2d_fid_T1_Gd4', '018-ep2d_fid_T1_Gd2', '016-ep2d_fid_T1_Gd1', '013-ep2d_fid_T1_pre'] - >>> print result.outputs.basename # doctest: +SKIP + >>> result.outputs.basename # doctest: +SKIP ['acquisition', 'acquisition' 'acquisition', @@ -1479,25 +1098,25 @@ class DataFinder(IOBase): _always_run = True def _match_path(self, target_path): - #Check if we should ignore the path + # Check if we should ignore the path for ignore_re in self.ignore_regexes: if ignore_re.search(target_path): return - #Check if we can match the path + # Check if we can match the path match = self.match_regex.search(target_path) - if not match is None: + if match is not None: match_dict = match.groupdict() if self.result is None: self.result = {'out_paths': []} - for key in match_dict.keys(): + for key in list(match_dict.keys()): self.result[key] = [] self.result['out_paths'].append(target_path) - for key, val in match_dict.iteritems(): + for key, val in match_dict.items(): self.result[key].append(val) def _run_interface(self, runtime): - #Prepare some of the inputs - if isinstance(self.inputs.root_paths, six.string_types): + # Prepare some of the inputs + if isinstance(self.inputs.root_paths, string_types): self.inputs.root_paths = [self.inputs.root_paths] self.match_regex = re.compile(self.inputs.match_regex) if self.inputs.max_depth is Undefined: @@ -1516,24 +1135,24 @@ def _run_interface(self, runtime): for regex in self.inputs.ignore_regexes] self.result = None for root_path in self.inputs.root_paths: - #Handle tilda/env variables and remove extra seperators + # Handle tilda/env variables and remove extra seperators root_path = os.path.normpath(os.path.expandvars(os.path.expanduser(root_path))) - #Check if the root_path is a file + # Check if the root_path is a file if os.path.isfile(root_path): if min_depth == 0: self._match_path(root_path) continue - #Walk through directory structure checking paths + # Walk through directory structure checking paths for curr_dir, sub_dirs, files in os.walk(root_path): - #Determine the current depth from the root_path + # Determine the current depth from the root_path curr_depth = (curr_dir.count(os.sep) - root_path.count(os.sep)) - #If the max path depth has been reached, clear sub_dirs - #and files + # If the max path depth has been reached, clear sub_dirs + # and files if max_depth is not None and curr_depth >= max_depth: sub_dirs[:] = [] files = [] - #Test the path for the curr_dir and all files + # Test the path for the curr_dir and all files if curr_depth >= min_depth: self._match_path(curr_dir) if curr_depth >= (min_depth - 1): @@ -1541,17 +1160,16 @@ def _run_interface(self, runtime): full_path = os.path.join(curr_dir, infile) self._match_path(full_path) if (self.inputs.unpack_single and - len(self.result['out_paths']) == 1 - ): - for key, vals in self.result.iteritems(): + len(self.result['out_paths']) == 1): + for key, vals in self.result.items(): self.result[key] = vals[0] else: - #sort all keys acording to out_paths - for key in self.result.keys(): + # sort all keys acording to out_paths + for key in list(self.result.keys()): if key == "out_paths": continue - sort_tuples = human_order_sorted(zip(self.result["out_paths"], - self.result[key])) + sort_tuples = human_order_sorted(list(zip(self.result["out_paths"], + self.result[key]))) self.result[key] = [x for (_, x) in sort_tuples] self.result["out_paths"] = human_order_sorted(self.result["out_paths"]) @@ -1701,7 +1319,7 @@ def _list_outputs(self): subject_path = os.path.join(subjects_dir, self.inputs.subject_id) output_traits = self._outputs() outputs = output_traits.get() - for k in outputs.keys(): + for k in list(outputs.keys()): val = self._get_files(subject_path, k, output_traits.traits()[k].loc, output_traits.traits()[k].altkey) @@ -1804,7 +1422,7 @@ def __init__(self, infields=None, outfields=None, **kwargs): desc="arguments that fit into query_template") ) undefined_traits['field_template'] = Undefined - #self.inputs.remove_trait('query_template_args') + # self.inputs.remove_trait('query_template_args') outdict = {} for key in outfields: outdict[key] = [] @@ -1817,7 +1435,7 @@ def _add_output_traits(self, base): Using traits.Any instead out OutputMultiPath till add_trait bug is fixed. """ - return add_traits(base, self.inputs.query_template_args.keys()) + return add_traits(base, list(self.inputs.query_template_args.keys())) def _list_outputs(self): # infields are mandatory, however I could not figure out @@ -1840,12 +1458,12 @@ def _list_outputs(self): if not isdefined(value): msg = ("%s requires a value for input '%s' " "because it was listed in 'infields'" % - (self.__class__.__name__, key) + (self.__class__.__name__, key) ) raise ValueError(msg) outputs = {} - for key, args in self.inputs.query_template_args.items(): + for key, args in list(self.inputs.query_template_args.items()): outputs[key] = [] template = self.inputs.query_template if hasattr(self.inputs, 'field_template') and \ @@ -1866,7 +1484,7 @@ def _list_outputs(self): for argnum, arglist in enumerate(args): maxlen = 1 for arg in arglist: - if isinstance(arg, six.string_types) and hasattr(self.inputs, arg): + if isinstance(arg, string_types) and hasattr(self.inputs, arg): arg = getattr(self.inputs, arg) if isinstance(arg, list): if (maxlen > 1) and (len(arg) != maxlen): @@ -1879,7 +1497,7 @@ def _list_outputs(self): for i in range(maxlen): argtuple = [] for arg in arglist: - if isinstance(arg, six.string_types) and \ + if isinstance(arg, string_types) and \ hasattr(self.inputs, arg): arg = getattr(self.inputs, arg) if isinstance(arg, list): @@ -1960,11 +1578,11 @@ class XNATSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): ) share = traits.Bool(False, - desc=('Option to share the subjects from the original project' - 'instead of creating new ones when possible - the created ' - 'experiments are then shared back to the original project' - ), - usedefault=True) + desc=('Option to share the subjects from the original project' + 'instead of creating new ones when possible - the created ' + 'experiments are then shared back to the original project' + ), + usedefault=True) def __setattr__(self, key, value): if key not in self.copyable_trait_names(): @@ -2043,7 +1661,7 @@ def _list_outputs(self): uri_template_args['reconstruction_id'] = quote_id(self.inputs.reconstruction_id) # gather outputs and upload them - for key, files in self.inputs._outputs.items(): + for key, files in list(self.inputs._outputs.items()): for name in filename_to_list(files): @@ -2074,7 +1692,7 @@ def push_file(self, xnat, file_name, out_key, uri_template_args): if part.startswith('_') and len(part.split('_')) % 2 ] - keymap = dict(zip(val_list[1::2], val_list[2::2])) + keymap = dict(list(zip(val_list[1::2], val_list[2::2]))) _label = [] for key, val in sorted(keymap.items()): @@ -2121,7 +1739,7 @@ def push_file(self, xnat, file_name, out_key, uri_template_args): ) # unquote values before uploading - for key in uri_template_args.keys(): + for key in list(uri_template_args.keys()): uri_template_args[key] = unquote_id(uri_template_args[key]) # upload file @@ -2258,18 +1876,19 @@ def _list_outputs(self): c.close() return None + class SSHDataGrabberInputSpec(DataGrabberInputSpec): hostname = traits.Str(mandatory=True, desc='Server hostname.') username = traits.Str(desc='Server username.') password = traits.Password(desc='Server password.') download_files = traits.Bool(True, usedefault=True, - desc='If false it will return the file names without downloading them') + desc='If false it will return the file names without downloading them') base_directory = traits.Str(mandatory=True, - desc='Path to the base directory consisting of subject data.') + desc='Path to the base directory consisting of subject data.') template_expression = traits.Enum(['fnmatch', 'regexp'], usedefault=True, - desc='Use either fnmatch or regexp to express templates') + desc='Use either fnmatch or regexp to express templates') ssh_log_to_file = traits.Str('', usedefault=True, - desc='If set SSH commands will be logged to the given file') + desc='If set SSH commands will be logged to the given file') class SSHDataGrabber(DataGrabber): @@ -2354,7 +1973,7 @@ def __init__(self, infields=None, outfields=None, **kwargs): paramiko except NameError: warn( - "The library parmiko needs to be installed" + "The library paramiko needs to be installed" " for this module to run." ) if not outfields: @@ -2377,13 +1996,12 @@ def __init__(self, infields=None, outfields=None, **kwargs): ): self.inputs.template += '$' - def _list_outputs(self): try: paramiko except NameError: raise ImportError( - "The library parmiko needs to be installed" + "The library paramiko needs to be installed" " for this module to run." ) @@ -2400,7 +2018,7 @@ def _list_outputs(self): raise ValueError(msg) outputs = {} - for key, args in self.inputs.template_args.items(): + for key, args in list(self.inputs.template_args.items()): outputs[key] = [] template = self.inputs.template if hasattr(self.inputs, 'field_template') and \ @@ -2416,7 +2034,7 @@ def _list_outputs(self): filelist = fnmatch.filter(filelist, template) elif self.inputs.template_expression == 'regexp': regexp = re.compile(template) - filelist = filter(regexp.match, filelist) + filelist = list(filter(regexp.match, filelist)) else: raise ValueError('template_expression value invalid') if len(filelist) == 0: @@ -2436,7 +2054,7 @@ def _list_outputs(self): for argnum, arglist in enumerate(args): maxlen = 1 for arg in arglist: - if isinstance(arg, six.string_types) and hasattr(self.inputs, arg): + if isinstance(arg, string_types) and hasattr(self.inputs, arg): arg = getattr(self.inputs, arg) if isinstance(arg, list): if (maxlen > 1) and (len(arg) != maxlen): @@ -2447,7 +2065,7 @@ def _list_outputs(self): for i in range(maxlen): argtuple = [] for arg in arglist: - if isinstance(arg, six.string_types) and hasattr(self.inputs, arg): + if isinstance(arg, string_types) and hasattr(self.inputs, arg): arg = getattr(self.inputs, arg) if isinstance(arg, list): argtuple.append(arg[i]) @@ -2469,7 +2087,7 @@ def _list_outputs(self): outfiles = fnmatch.filter(filelist, filledtemplate_base) elif self.inputs.template_expression == 'regexp': regexp = re.compile(filledtemplate_base) - outfiles = filter(regexp.match, filelist) + outfiles = list(filter(regexp.match, filelist)) else: raise ValueError('template_expression value invalid') if len(outfiles) == 0: @@ -2496,7 +2114,7 @@ def _list_outputs(self): elif len(outputs[key]) == 1: outputs[key] = outputs[key][0] - for k, v in outputs.items(): + for k, v in list(outputs.items()): outputs[k] = os.path.join(os.getcwd(), v) return outputs @@ -2523,7 +2141,7 @@ def _get_ssh_client(self): class JSONFileGrabberInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): in_file = File(exists=True, desc='JSON source file') defaults = traits.Dict(desc=('JSON dictionary that sets default output' - 'values, overridden by values found in in_file')) + 'values, overridden by values found in in_file')) class JSONFileGrabber(IOBase): @@ -2535,16 +2153,17 @@ class JSONFileGrabber(IOBase): Example ------- + >>> import pprint >>> from nipype.interfaces.io import JSONFileGrabber >>> jsonSource = JSONFileGrabber() - >>> jsonSource.inputs.defaults = {'param1': u'overrideMe', 'param3': 1.0} + >>> jsonSource.inputs.defaults = {'param1': 'overrideMe', 'param3': 1.0} >>> res = jsonSource.run() - >>> res.outputs.get() - {'param3': 1.0, 'param1': u'overrideMe'} + >>> pprint.pprint(res.outputs.get()) + {'param1': 'overrideMe', 'param3': 1.0} >>> jsonSource.inputs.in_file = 'jsongrabber.txt' >>> res = jsonSource.run() - >>> res.outputs.get() - {'param3': 1.0, 'param2': 4, 'param1': u'exampleStr'} + >>> pprint.pprint(res.outputs.get()) # doctest: +NORMALIZE_WHITESPACE + {'param1': 'exampleStr', 'param2': 4, 'param3': 1.0} """ @@ -2553,23 +2172,23 @@ class JSONFileGrabber(IOBase): _always_run = True def _list_outputs(self): - import json + import simplejson outputs = {} if isdefined(self.inputs.in_file): with open(self.inputs.in_file, 'r') as f: - data = json.load(f) + data = simplejson.load(f) if not isinstance(data, dict): raise RuntimeError('JSON input has no dictionary structure') - for key, value in data.iteritems(): + for key, value in data.items(): outputs[key] = value if isdefined(self.inputs.defaults): defaults = self.inputs.defaults - for key, value in defaults.iteritems(): - if key not in outputs.keys(): + for key, value in defaults.items(): + if key not in list(outputs.keys()): outputs[key] = value return outputs @@ -2655,7 +2274,7 @@ def _process_name(self, name, val): return name, val def _list_outputs(self): - import json + import simplejson import os.path as op if not isdefined(self.inputs.out_file): @@ -2666,14 +2285,14 @@ def _list_outputs(self): out_dict = self.inputs.in_dict # Overwrite in_dict entries automatically - for key, val in self.inputs._outputs.items(): + for key, val in list(self.inputs._outputs.items()): if not isdefined(val) or key == 'trait_added': continue key, val = self._process_name(key, val) out_dict[key] = val with open(out_file, 'w') as f: - json.dump(out_dict, f) + simplejson.dump(out_dict, f) outputs = self.output_spec().get() outputs['out_file'] = out_file return outputs diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py index d5abeab223..37ed6eae43 100644 --- a/nipype/interfaces/tests/test_io.py +++ b/nipype/interfaces/tests/test_io.py @@ -1,5 +1,10 @@ # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- # vi: set ft=python sts=4 ts=4 sw=4 et: +from __future__ import print_function +from builtins import zip +from builtins import range +from builtins import open + import os import glob import shutil @@ -13,7 +18,6 @@ import nipype.interfaces.io as nio from nipype.interfaces.base import Undefined -# Check for boto noboto = False try: import boto @@ -21,13 +25,6 @@ except: noboto = True -# Check for boto3 -noboto3 = False -try: - import boto3 - from botocore.utils import fix_s3_host -except: - noboto3 = True def test_datagrabber(): dg = nio.DataGrabber() @@ -35,6 +32,7 @@ def test_datagrabber(): yield assert_equal, dg.inputs.base_directory, Undefined yield assert_equal, dg.inputs.template_args, {'outfiles': []} + @skipif(noboto) def test_s3datagrabber(): dg = nio.S3DataGrabber() @@ -95,9 +93,11 @@ def test_selectfiles_valueerror(): force_lists=force_lists) yield assert_raises, ValueError, sf.run + @skipif(noboto) def test_s3datagrabber_communication(): - dg = nio.S3DataGrabber(infields=['subj_id', 'run_num'], outfields=['func', 'struct']) + dg = nio.S3DataGrabber( + infields=['subj_id', 'run_num'], outfields=['func', 'struct']) dg.inputs.anon = True dg.inputs.bucket = 'openfmri' dg.inputs.bucket_path = 'ds001/' @@ -109,24 +109,25 @@ def test_s3datagrabber_communication(): struct='%s/anatomy/highres001_brain.nii.gz') dg.inputs.subj_id = ['sub001', 'sub002'] dg.inputs.run_num = ['run001', 'run003'] - dg.inputs.template_args = dg.inputs.template_args = dict( + dg.inputs.template_args = dict( func=[['subj_id', 'run_num']], struct=[['subj_id']]) res = dg.run() func_outfiles = res.outputs.func struct_outfiles = res.outputs.struct # check for all files - yield assert_true, '/sub001/BOLD/task001_run001/bold.nii.gz' in func_outfiles[0] + yield assert_true, os.path.join(dg.inputs.local_directory, '/sub001/BOLD/task001_run001/bold.nii.gz') in func_outfiles[0] yield assert_true, os.path.exists(func_outfiles[0]) - yield assert_true, '/sub001/anatomy/highres001_brain.nii.gz' in struct_outfiles[0] + yield assert_true, os.path.join(dg.inputs.local_directory, '/sub001/anatomy/highres001_brain.nii.gz') in struct_outfiles[0] yield assert_true, os.path.exists(struct_outfiles[0]) - yield assert_true, '/sub002/BOLD/task001_run003/bold.nii.gz' in func_outfiles[1] + yield assert_true, os.path.join(dg.inputs.local_directory, '/sub002/BOLD/task001_run003/bold.nii.gz') in func_outfiles[1] yield assert_true, os.path.exists(func_outfiles[1]) - yield assert_true, '/sub002/anatomy/highres001_brain.nii.gz' in struct_outfiles[1] + yield assert_true, os.path.join(dg.inputs.local_directory, '/sub002/anatomy/highres001_brain.nii.gz') in struct_outfiles[1] yield assert_true, os.path.exists(struct_outfiles[1]) shutil.rmtree(tempdir) + def test_datagrabber_order(): tempdir = mkdtemp() file1 = mkstemp(prefix='sub002_L1_R1.q', dir=tempdir) @@ -152,6 +153,7 @@ def test_datagrabber_order(): yield assert_true, 'sub002_L3_R10' in outfiles[2][1] shutil.rmtree(tempdir) + def test_datasink(): ds = nio.DataSink() yield assert_true, ds.inputs.parameterization @@ -163,157 +165,6 @@ def test_datasink(): ds = nio.DataSink(infields=['test']) yield assert_true, 'test' in ds.inputs.copyable_trait_names() -# Function to check for fakes3 -def _check_for_fakes3(): - ''' - Function used internally to check for fakes3 installation - ''' - - # Import packages - import subprocess - - # Init variables - fakes3_found = False - - # Check for fakes3 - try: - ret_code = subprocess.check_call(['which', 'fakes3'], stdout=open(os.devnull, 'wb')) - if ret_code == 0: - fakes3_found = True - except subprocess.CalledProcessError as exc: - print 'fakes3 not found, install via \'gem install fakes3\', skipping test...' - except: - print 'Unable to check for fakes3 installation, skipping test...' - - # Return if found - return fakes3_found - -def _make_dummy_input(): - ''' - ''' - - # Import packages - import tempfile - - # Init variables - input_dir = tempfile.mkdtemp() - input_path = os.path.join(input_dir, 'datasink_test_s3.txt') - - # Create input file - with open(input_path, 'wb') as f: - f.write('ABCD1234') - - # Return path - return input_path - -# Check for fakes3 -fakes3 = _check_for_fakes3() - - -@skipif(noboto3 or not fakes3) -# Test datasink writes to s3 properly -def test_datasink_to_s3(): - ''' - This function tests to see if the S3 functionality of a DataSink - works properly - ''' - - # Import packages - import hashlib - import tempfile - - # Init variables - ds = nio.DataSink() - bucket_name = 'test' - container = 'outputs' - attr_folder = 'text_file' - output_dir = 's3://' + bucket_name - # Local temporary filepaths for testing - fakes3_dir = tempfile.mkdtemp() - input_path = _make_dummy_input() - - # Start up fake-S3 server - proc = Popen(['fakes3', '-r', fakes3_dir, '-p', '4567'], stdout=open(os.devnull, 'wb')) - - # Init boto3 s3 resource to talk with fakes3 - resource = boto3.resource(aws_access_key_id='mykey', - aws_secret_access_key='mysecret', - service_name='s3', - endpoint_url='http://localhost:4567', - use_ssl=False) - resource.meta.client.meta.events.unregister('before-sign.s3', fix_s3_host) - - # Create bucket - bucket = resource.create_bucket(Bucket=bucket_name) - - # Prep datasink - ds.inputs.base_directory = output_dir - ds.inputs.container = container - ds.inputs.bucket = bucket - setattr(ds.inputs, attr_folder, input_path) - - # Run datasink - ds.run() - - # Get MD5sums and compare - key = '/'.join([container, attr_folder, os.path.basename(input_path)]) - obj = bucket.Object(key=key) - dst_md5 = obj.e_tag.replace('"', '') - src_md5 = hashlib.md5(open(input_path, 'rb').read()).hexdigest() - - # Kill fakes3 - proc.kill() - - # Delete fakes3 folder and input file - shutil.rmtree(fakes3_dir) - shutil.rmtree(os.path.dirname(input_path)) - - # Make sure md5sums match - yield assert_equal, src_md5, dst_md5 - -# Test the local copy attribute -def test_datasink_localcopy(): - ''' - Function to validate DataSink will make local copy via local_copy - attribute - ''' - - # Import packages - import hashlib - import tempfile - - # Init variables - local_dir = tempfile.mkdtemp() - container = 'outputs' - attr_folder = 'text_file' - - # Make dummy input file and datasink - input_path = _make_dummy_input() - ds = nio.DataSink() - - # Set up datasink - ds.inputs.container = container - ds.inputs.local_copy = local_dir - setattr(ds.inputs, attr_folder, input_path) - - # Expected local copy path - local_copy = os.path.join(local_dir, container, attr_folder, - os.path.basename(input_path)) - - # Run the datasink - ds.run() - - # Check md5sums of both - src_md5 = hashlib.md5(open(input_path, 'rb').read()).hexdigest() - dst_md5 = hashlib.md5(open(local_copy, 'rb').read()).hexdigest() - - # Delete temp diretories - shutil.rmtree(os.path.dirname(input_path)) - shutil.rmtree(local_dir) - - # Perform test - yield assert_equal, src_md5, dst_md5 - @skipif(noboto) def test_s3datasink(): @@ -350,13 +201,14 @@ def test_datasink_substitutions(): setattr(ds.inputs, '@outdir', files) ds.run() yield assert_equal, \ - sorted([os.path.basename(x) for - x in glob.glob(os.path.join(outdir, '*'))]), \ - ['!-yz-b.n', 'ABABAB.n'] # so we got re used 2nd and both patterns + sorted([os.path.basename(x) for + x in glob.glob(os.path.join(outdir, '*'))]), \ + ['!-yz-b.n', 'ABABAB.n'] # so we got re used 2nd and both patterns shutil.rmtree(indir) shutil.rmtree(outdir) -@skipif(noboto or not fakes3) + +@skipif(noboto) def test_s3datasink_substitutions(): indir = mkdtemp(prefix='-Tmp-nipype_ds_subs_in') outdir = mkdtemp(prefix='-Tmp-nipype_ds_subs_out') @@ -368,10 +220,17 @@ def test_s3datasink_substitutions(): # run fakes3 server and set up bucket fakes3dir = op.expanduser('~/fakes3') - proc = Popen(['fakes3', '-r', fakes3dir, '-p', '4567'], stdout=open(os.devnull, 'wb')) + try: + proc = Popen( + ['fakes3', '-r', fakes3dir, '-p', '4567'], stdout=open(os.devnull, 'wb')) + except OSError as ose: + if 'No such file or directory' in str(ose): + return # fakes3 not installed. OK! + raise ose + conn = S3Connection(anon=True, is_secure=False, port=4567, - host='localhost', - calling_format=OrdinaryCallingFormat()) + host='localhost', + calling_format=OrdinaryCallingFormat()) conn.create_bucket('test') ds = nio.S3DataSink( @@ -392,9 +251,9 @@ def test_s3datasink_substitutions(): setattr(ds.inputs, '@outdir', files) ds.run() yield assert_equal, \ - sorted([os.path.basename(x) for - x in glob.glob(os.path.join(outdir, '*'))]), \ - ['!-yz-b.n', 'ABABAB.n'] # so we got re used 2nd and both patterns + sorted([os.path.basename(x) for + x in glob.glob(os.path.join(outdir, '*'))]), \ + ['!-yz-b.n', 'ABABAB.n'] # so we got re used 2nd and both patterns bkt = conn.get_bucket(ds.inputs.bucket) bkt_files = list(k for k in bkt.list()) @@ -426,11 +285,12 @@ def test_s3datasink_substitutions(): shutil.rmtree(indir) shutil.rmtree(outdir) + def _temp_analyze_files(): """Generate temporary analyze file pair.""" fd, orig_img = mkstemp(suffix='.img', dir=mkdtemp()) orig_hdr = orig_img[:-4] + '.hdr' - fp = file(orig_hdr, 'w+') + fp = open(orig_hdr, 'w+') fp.close() return orig_img, orig_hdr @@ -516,7 +376,7 @@ def test_datafinder_unpack(): df.inputs.match_regex = '.+/(?P.+)\.txt' df.inputs.unpack_single = True result = df.run() - print result.outputs.out_paths + print(result.outputs.out_paths) yield assert_equal, result.outputs.out_paths, single_res @@ -528,7 +388,7 @@ def test_freesurfersource(): def test_jsonsink(): - import json + import simplejson import os ds = nio.JSONFileSink() @@ -547,7 +407,7 @@ def test_jsonsink(): res = js.run() with open(res.outputs.out_file, 'r') as f: - data = json.load(f) + data = simplejson.load(f) yield assert_true, data == {"contrasts": {"alt": "someNestedValue"}, "foo": "var", "new_entry": "someValue"} js = nio.JSONFileSink(infields=['test'], in_dict={'foo': 'var'}) @@ -557,9 +417,8 @@ def test_jsonsink(): res = js.run() with open(res.outputs.out_file, 'r') as f: - data = json.load(f) + data = simplejson.load(f) yield assert_true, data == {"test": "testInfields", "contrasts": {"alt": "someNestedValue"}, "foo": "var", "new_entry": "someValue"} os.chdir(curdir) shutil.rmtree(outdir) - From 70ca4576f1a519506ede675318a2d4507c09882d Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 13 Jan 2016 17:00:32 -0500 Subject: [PATCH 02/37] Started adding in logic for num_threads and changed names of real memory stats keys --- nipype/interfaces/base.py | 17 +++++++++++------ nipype/interfaces/utility.py | 15 +++++++++------ nipype/pipeline/plugins/multiproc.py | 10 ++++++---- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index 2112cdc739..7577e23c5f 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -1248,7 +1248,8 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False): outfile = os.path.join(runtime.cwd, 'stdout.nipype') # Init variables for memory profiling - ret = -1 + mem_mb = -1 + num_threads = -1 interval = 0.1 if output == 'stream': @@ -1268,7 +1269,7 @@ def _process(drain=0): stream.read(drain) while proc.returncode is None: if mem_prof: - ret = max([ret, _get_memory(proc.pid, include_children=True)]) + mem_mb = max([mem_mb, _get_memory(proc.pid, include_children=True)]) time.sleep(interval) proc.poll() _process() @@ -1287,7 +1288,8 @@ def _process(drain=0): if output == 'allatonce': if mem_prof: while proc.returncode is None: - ret = max([ret, _get_memory(proc.pid, include_children=True)]) + mem_mb = max([mem_mb, _get_memory(proc.pid, include_children=True)]) + num_threads = max([num_threads, psutil.Proc(proc.pid).num_threads()]) time.sleep(interval) proc.poll() stdout, stderr = proc.communicate() @@ -1297,7 +1299,8 @@ def _process(drain=0): if output == 'file': if mem_prof: while proc.returncode is None: - ret = max([ret, _get_memory(proc.pid, include_children=True)]) + mem_mb = max([mem_mb, _get_memory(proc.pid, include_children=True)]) + num_threads = max([num_threads, psutil.Proc(proc.pid).num_threads()]) time.sleep(interval) proc.poll() ret_code = proc.wait() @@ -1309,7 +1312,8 @@ def _process(drain=0): if output == 'none': if mem_prof: while proc.returncode is None: - ret = max([ret, _get_memory(proc.pid, include_children=True)]) + mem_mb = max([mem_mb, _get_memory(proc.pid, include_children=True)]) + num_threads = max([num_threads, psutil.Proc(proc.pid).num_threads()]) time.sleep(interval) proc.poll() proc.communicate() @@ -1317,7 +1321,8 @@ def _process(drain=0): result['stderr'] = [] result['merged'] = '' - setattr(runtime, 'real_memory2', ret/1024.0) + setattr(runtime, 'cmd_memory', mem_mb/1024.0) + setattr(runtime, 'num_threads', num_threads) runtime.stderr = '\n'.join(result['stderr']) runtime.stdout = '\n'.join(result['stdout']) runtime.merged = result['merged'] diff --git a/nipype/interfaces/utility.py b/nipype/interfaces/utility.py index 10effaa548..f9d7aefe46 100644 --- a/nipype/interfaces/utility.py +++ b/nipype/interfaces/utility.py @@ -442,12 +442,15 @@ def _run_interface(self, runtime): if isdefined(value): args[name] = value - # mem stuff - import memory_profiler - proc = (function_handle, (), args) - mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) - setattr(runtime, 'real_memory2', mem_mb[0]/1024.0) - #out = function_handle(**args) + # Record memory of function_handle + try: + import memory_profiler + proc = (function_handle, (), args) + mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) + setattr(runtime, 'cmd_memory', mem_mb[0]/1024.0) + # If no memory_profiler package, run without recording memory + except ImportError: + out = function_handle(**args) if len(self._output_names) == 1: self._out[self._output_names[0]] = out diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index 3a5c63df35..5234abfd22 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -17,18 +17,20 @@ def run_node(node, updatehash, plugin_args=None): result = dict(result=None, traceback=None) try: run_memory = plugin_args['memory_profile'] - except Exception: + import memory_profiler + except KeyError: + run_memory = False + except ImportError: run_memory = False if run_memory: - import memory_profiler import datetime proc = (node.run, (), {'updatehash' : updatehash}) start = datetime.datetime.now() mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) runtime = (datetime.datetime.now() - start).total_seconds() result['result'] = retval - result['real_memory'] = mem_mb[0]/1024.0 - result['real_memory2'] = retval.runtime.get('real_memory2') + result['node_memory'] = mem_mb[0]/1024.0 + result['cmd_memory'] = retval.runtime.get('cmd_memory') result['run_seconds'] = runtime else: try: From 36e1446c067a8ba0a1010411ec8a9d926489abea Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 14 Jan 2016 17:27:41 -0500 Subject: [PATCH 03/37] Added cmd-level threads and memory profiling --- nipype/interfaces/base.py | 26 +++++++------------ nipype/pipeline/plugins/base.py | 2 -- nipype/pipeline/plugins/callback_log.py | 19 +++++++++----- nipype/pipeline/plugins/multiproc.py | 34 +++++++++++++++++-------- 4 files changed, 44 insertions(+), 37 deletions(-) diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index 7577e23c5f..fb4a6abb71 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -751,17 +751,8 @@ def __init__(self, **inputs): self.__class__.__name__) self.inputs = self.input_spec(**inputs) self.estimated_memory = 1 - self._real_memory = 0 self.num_threads = 1 - @property - def real_memory(self): - return self._real_memory - - @real_memory.setter - def real_memory(self, value): - self._real_memory = value - @classmethod def help(cls, returnhelp=False): """ Prints class help @@ -1269,7 +1260,8 @@ def _process(drain=0): stream.read(drain) while proc.returncode is None: if mem_prof: - mem_mb = max([mem_mb, _get_memory(proc.pid, include_children=True)]) + mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) + num_threads = max(num_threads, psutil.Process(proc.pid).num_threads()) time.sleep(interval) proc.poll() _process() @@ -1288,8 +1280,8 @@ def _process(drain=0): if output == 'allatonce': if mem_prof: while proc.returncode is None: - mem_mb = max([mem_mb, _get_memory(proc.pid, include_children=True)]) - num_threads = max([num_threads, psutil.Proc(proc.pid).num_threads()]) + mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) + num_threads = max(num_threads, psutil.Process(proc.pid).num_threads()) time.sleep(interval) proc.poll() stdout, stderr = proc.communicate() @@ -1299,8 +1291,8 @@ def _process(drain=0): if output == 'file': if mem_prof: while proc.returncode is None: - mem_mb = max([mem_mb, _get_memory(proc.pid, include_children=True)]) - num_threads = max([num_threads, psutil.Proc(proc.pid).num_threads()]) + mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) + num_threads = max(num_threads, psutil.Process(proc.pid).num_threads()) time.sleep(interval) proc.poll() ret_code = proc.wait() @@ -1312,8 +1304,8 @@ def _process(drain=0): if output == 'none': if mem_prof: while proc.returncode is None: - mem_mb = max([mem_mb, _get_memory(proc.pid, include_children=True)]) - num_threads = max([num_threads, psutil.Proc(proc.pid).num_threads()]) + mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) + num_threads = max(num_threads, psutil.Process(proc.pid).num_threads()) time.sleep(interval) proc.poll() proc.communicate() @@ -1322,7 +1314,7 @@ def _process(drain=0): result['merged'] = '' setattr(runtime, 'cmd_memory', mem_mb/1024.0) - setattr(runtime, 'num_threads', num_threads) + setattr(runtime, 'cmd_threads', num_threads) runtime.stderr = '\n'.join(result['stderr']) runtime.stdout = '\n'.join(result['stdout']) runtime.merged = result['merged'] diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py index ab76520844..a7ed6e4de0 100644 --- a/nipype/pipeline/plugins/base.py +++ b/nipype/pipeline/plugins/base.py @@ -419,8 +419,6 @@ def _task_finished_cb(self, jobid, result=None): if result == None: if self._taskresult.has_key(jobid): result = self._taskresult[jobid].get() - else: - result = {'real_memory' : 'nokey'} self._status_callback(self.procs[jobid], 'end', result) # Update job and worker queues self.proc_pending[jobid] = False diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py index a20242df95..6abfcd2e6a 100644 --- a/nipype/pipeline/plugins/callback_log.py +++ b/nipype/pipeline/plugins/callback_log.py @@ -4,11 +4,12 @@ def log_nodes_cb(node, status, result=None): logger = logging.getLogger('callback') try: - real_mem1 = result['real_memory'] - real_mem2 = result['real_memory2'] + node_mem = result['node_memory'] + cmd_mem = result['cmd_memory'] run_seconds = result['run_seconds'] + cmd_threads = result['cmd_threads'] except Exception as exc: - real_mem1 = real_mem2 = run_seconds = 'N/A' + node_mem = cmd_mem = run_seconds = cmd_threads = 'N/A' if status == 'start': message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\ node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\ @@ -19,16 +20,20 @@ def log_nodes_cb(node, status, result=None): elif status == 'end': message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \ - node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\ - '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \ - + str(node._interface.num_threads) + ',"real_memory1":' + str(real_mem1) + ',"real_memory2":' + str(real_mem2) + ',"run_seconds":' + str(run_seconds) + '}' + node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) + \ + '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + \ + ',"num_threads":' + str(node._interface.num_threads) + \ + ',"cmd-level threads":' + str(cmd_threads) + \ + ',"node-level memory":' + str(node_mem) + \ + ',"cmd-level memory":' + str(cmd_mem) + \ + ',"run_seconds":' + str(run_seconds) + '}' logger.debug(message) else: message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \ node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\ - '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \ + '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \ + str(node._interface.num_threads) + ',"error":"True"}' logger.debug(message) diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index 5234abfd22..877f4e98e2 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -13,17 +13,27 @@ from .base import (DistributedPluginBase, report_crash) +# Run node def run_node(node, updatehash, plugin_args=None): - result = dict(result=None, traceback=None) + """docstring + """ + + # Import packages try: - run_memory = plugin_args['memory_profile'] + runtime_profile = plugin_args['runtime_profile'] import memory_profiler + import datetime except KeyError: - run_memory = False + runtime_profile = False except ImportError: - run_memory = False - if run_memory: - import datetime + runtime_profile = False + + # Init variables + result = dict(result=None, traceback=None) + + # If we're profiling the run + if runtime_profile: + # Init function tuple proc = (node.run, (), {'updatehash' : updatehash}) start = datetime.datetime.now() mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) @@ -31,7 +41,9 @@ def run_node(node, updatehash, plugin_args=None): result['result'] = retval result['node_memory'] = mem_mb[0]/1024.0 result['cmd_memory'] = retval.runtime.get('cmd_memory') + result['cmd_threads'] = retval.runtime.get('cmd_threads') result['run_seconds'] = runtime + # Otherwise, execute node.run as normal else: try: result['result'] = node.run(updatehash=updatehash) @@ -141,15 +153,15 @@ class ResourceMultiProcPlugin(MultiProcPlugin): the number of threads and memory of the system is used. System consuming nodes should be tagged: - memory_consuming_node.interface.memory = 8 #Gb + memory_consuming_node.interface.estimated_memory = 8 #Gb thread_consuming_node.interface.num_threads = 16 The default number of threads and memory for a node is 1. Currently supported options are: - - num_thread: maximum number of threads to be executed in parallel - - memory: maximum memory that can be used at once. + - num_threads: maximum number of threads to be executed in parallel + - estimated_memory: maximum memory that can be used at once. """ @@ -198,7 +210,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None): for jobid in jobids: busy_memory+= self.procs[jobid]._interface.estimated_memory busy_processors+= self.procs[jobid]._interface.num_threads - + free_memory = self.memory - busy_memory free_processors = self.processors - busy_processors @@ -222,7 +234,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None): if self.procs[jobid]._interface.estimated_memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors: logger.info('Executing: %s ID: %d' %(self.procs[jobid]._id, jobid)) executing_now.append(self.procs[jobid]) - + if isinstance(self.procs[jobid], MapNode): try: num_subnodes = self.procs[jobid].num_subnodes() From 43c0d567139c1018f5ab99d14ca6b374024d7aab Mon Sep 17 00:00:00 2001 From: carolFrohlich Date: Fri, 15 Jan 2016 13:12:30 -0500 Subject: [PATCH 04/37] remove MultiProc, MultiprocPlugin is default --- nipype/interfaces/ants/base.py | 2 +- nipype/pipeline/engine/tests/test_engine.py | 7 +- nipype/pipeline/engine/tests/test_utils.py | 2 +- nipype/pipeline/plugins/__init__.py | 1 - nipype/pipeline/plugins/multiproc.py | 98 +++++++------------ nipype/pipeline/plugins/tests/test_base.py | 2 +- .../pipeline/plugins/tests/test_callback.py | 6 +- .../pipeline/plugins/tests/test_multiproc.py | 3 +- .../plugins/tests/test_multiproc_nondaemon.py | 7 +- 9 files changed, 46 insertions(+), 82 deletions(-) diff --git a/nipype/interfaces/ants/base.py b/nipype/interfaces/ants/base.py index 20fab05881..c3ea4a674e 100644 --- a/nipype/interfaces/ants/base.py +++ b/nipype/interfaces/ants/base.py @@ -12,7 +12,7 @@ # -Using -1 gives primary responsibilty to ITKv4 to do the correct # thread limitings. # -Using 1 takes a very conservative approach to avoid overloading -# the computer (when running MultiProc) by forcing everything to +# the computer (when running ResourceMultiProc) by forcing everything to # single threaded. This can be a severe penalty for registration # performance. LOCAL_DEFAULT_NUMBER_OF_THREADS = 1 diff --git a/nipype/pipeline/engine/tests/test_engine.py b/nipype/pipeline/engine/tests/test_engine.py index 5eaaa81fbf..2f829abcd4 100644 --- a/nipype/pipeline/engine/tests/test_engine.py +++ b/nipype/pipeline/engine/tests/test_engine.py @@ -714,8 +714,7 @@ def func1(in1): # set local check w1.config['execution'] = {'stop_on_first_crash': 'true', 'local_hash_check': 'true', - 'crashdump_dir': wd, - 'poll_sleep_duration': 2} + 'crashdump_dir': wd} # test output of num_subnodes method when serial is default (False) yield assert_equal, n1.num_subnodes(), len(n1.inputs.in1) @@ -723,7 +722,7 @@ def func1(in1): # test running the workflow on default conditions error_raised = False try: - w1.run(plugin='MultiProc') + w1.run(plugin='ResourceMultiProc') except Exception as e: from nipype.pipeline.engine.base import logger logger.info('Exception: %s' % str(e)) @@ -737,7 +736,7 @@ def func1(in1): # test running the workflow on serial conditions error_raised = False try: - w1.run(plugin='MultiProc') + w1.run(plugin='ResourceMultiProc') except Exception as e: from nipype.pipeline.engine.base import logger logger.info('Exception: %s' % str(e)) diff --git a/nipype/pipeline/engine/tests/test_utils.py b/nipype/pipeline/engine/tests/test_utils.py index 8420f587c2..9688e02395 100644 --- a/nipype/pipeline/engine/tests/test_utils.py +++ b/nipype/pipeline/engine/tests/test_utils.py @@ -214,7 +214,7 @@ def test_function3(arg): out_dir = mkdtemp() - for plugin in ('Linear',): # , 'MultiProc'): + for plugin in ('Linear',): # , 'ResourceMultiProc'): n1 = pe.Node(niu.Function(input_names=['arg1'], output_names=['out_file1', 'out_file2', 'dir'], function=test_function), diff --git a/nipype/pipeline/plugins/__init__.py b/nipype/pipeline/plugins/__init__.py index 68cf2832ff..643d5735f8 100644 --- a/nipype/pipeline/plugins/__init__.py +++ b/nipype/pipeline/plugins/__init__.py @@ -9,7 +9,6 @@ from .sge import SGEPlugin from .condor import CondorPlugin from .dagman import CondorDAGManPlugin -from .multiproc import MultiProcPlugin from .multiproc import ResourceMultiProcPlugin from .ipython import IPythonPlugin from .somaflow import SomaFlowPlugin diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index e7b8f183c0..af96a1e102 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -76,68 +76,6 @@ class NonDaemonPool(pool.Pool): """ Process = NonDaemonProcess - -class MultiProcPlugin(DistributedPluginBase): - """Execute workflow with multiprocessing - - The plugin_args input to run can be used to control the multiprocessing - execution. Currently supported options are: - - - n_procs : number of processes to use - - non_daemon : boolean flag to execute as non-daemon processes - - """ - - def __init__(self, plugin_args=None): - super(MultiProcPlugin, self).__init__(plugin_args=plugin_args) - self._taskresult = {} - self._taskid = 0 - non_daemon = True - n_procs = cpu_count() - if plugin_args: - if 'n_procs' in plugin_args: - n_procs = plugin_args['n_procs'] - if 'non_daemon' in plugin_args: - non_daemon = plugin_args['non_daemon'] - if non_daemon: - # run the execution using the non-daemon pool subclass - self.pool = NonDaemonPool(processes=n_procs) - else: - self.pool = Pool(processes=n_procs) - - - def _get_result(self, taskid): - if taskid not in self._taskresult: - raise RuntimeError('Multiproc task %d not found' % taskid) - if not self._taskresult[taskid].ready(): - return None - return self._taskresult[taskid].get() - - def _submit_job(self, node, updatehash=False): - self._taskid += 1 - try: - if node.inputs.terminal_output == 'stream': - node.inputs.terminal_output = 'allatonce' - except: - pass - self._taskresult[self._taskid] = self.pool.apply_async(run_node, (node, - updatehash,)) - return self._taskid - - def _report_crash(self, node, result=None): - if result and result['traceback']: - node._result = result['result'] - node._traceback = result['traceback'] - return report_crash(node, - traceback=result['traceback']) - else: - return report_crash(node) - - def _clear_task(self, taskid): - del self._taskresult[taskid] - - - import numpy as np from copy import deepcopy from ..engine import (MapNode, str2bool) @@ -150,8 +88,8 @@ def _clear_task(self, taskid): def release_lock(args): semaphore_singleton.semaphore.release() -class ResourceMultiProcPlugin(MultiProcPlugin): - """Execute workflow with multiprocessing not sending more jobs at once +class ResourceMultiProcPlugin(DistributedPluginBase): + """Execute workflow with multiprocessing, not sending more jobs at once than the system can support. The plugin_args input to run can be used to control the multiprocessing @@ -167,6 +105,7 @@ class ResourceMultiProcPlugin(MultiProcPlugin): Currently supported options are: + - non_daemon : boolean flag to execute as non-daemon processes - num_threads: maximum number of threads to be executed in parallel - estimated_memory: maximum memory that can be used at once. @@ -174,22 +113,53 @@ class ResourceMultiProcPlugin(MultiProcPlugin): def __init__(self, plugin_args=None): super(ResourceMultiProcPlugin, self).__init__(plugin_args=plugin_args) + self._taskresult = {} + self._taskid = 0 + non_daemon = True self.plugin_args = plugin_args self.processors = cpu_count() memory = psutil.virtual_memory() self.memory = memory.total / (1024*1024*1024) if self.plugin_args: + if 'non_daemon' in self.plugin_args: + non_daemon = plugin_args['non_daemon'] if 'n_procs' in self.plugin_args: self.processors = self.plugin_args['n_procs'] if 'memory' in self.plugin_args: self.memory = self.plugin_args['memory'] + if non_daemon: + # run the execution using the non-daemon pool subclass + self.pool = NonDaemonPool(processes=n_procs) + else: + self.pool = Pool(processes=n_procs) + def _wait(self): if len(self.pending_tasks) > 0: semaphore_singleton.semaphore.acquire() semaphore_singleton.semaphore.release() + def _get_result(self, taskid): + if taskid not in self._taskresult: + raise RuntimeError('Multiproc task %d not found' % taskid) + if not self._taskresult[taskid].ready(): + return None + return self._taskresult[taskid].get() + + + def _report_crash(self, node, result=None): + if result and result['traceback']: + node._result = result['result'] + node._traceback = result['traceback'] + return report_crash(node, + traceback=result['traceback']) + else: + return report_crash(node) + + def _clear_task(self, taskid): + del self._taskresult[taskid] + def _submit_job(self, node, updatehash=False): self._taskid += 1 try: diff --git a/nipype/pipeline/plugins/tests/test_base.py b/nipype/pipeline/plugins/tests/test_base.py index 243ae195c2..616cb634a0 100644 --- a/nipype/pipeline/plugins/tests/test_base.py +++ b/nipype/pipeline/plugins/tests/test_base.py @@ -38,5 +38,5 @@ def func(arg1): wf.add_nodes([funkynode]) wf.base_dir = '/tmp' -wf.run(plugin='MultiProc') +wf.run(plugin='ResourceMultiProc') ''' diff --git a/nipype/pipeline/plugins/tests/test_callback.py b/nipype/pipeline/plugins/tests/test_callback.py index db02bc889b..ce293f7d1b 100644 --- a/nipype/pipeline/plugins/tests/test_callback.py +++ b/nipype/pipeline/plugins/tests/test_callback.py @@ -75,8 +75,7 @@ def test_callback_multiproc_normal(): name='f_node') wf.add_nodes([f_node]) wf.config['execution']['crashdump_dir'] = wf.base_dir - wf.config['execution']['poll_sleep_duration'] = 2 - wf.run(plugin='MultiProc', plugin_args={'status_callback': so.callback}) + wf.run(plugin='ResourceMultiProc', plugin_args={'status_callback': so.callback}) assert_equal(len(so.statuses), 2) for (n, s) in so.statuses: yield assert_equal, n.name, 'f_node' @@ -93,9 +92,8 @@ def test_callback_multiproc_exception(): name='f_node') wf.add_nodes([f_node]) wf.config['execution']['crashdump_dir'] = wf.base_dir - wf.config['execution']['poll_sleep_duration'] = 2 try: - wf.run(plugin='MultiProc', + wf.run(plugin='ResourceMultiProc', plugin_args={'status_callback': so.callback}) except: pass diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py index 672b988927..e7af00d343 100644 --- a/nipype/pipeline/plugins/tests/test_multiproc.py +++ b/nipype/pipeline/plugins/tests/test_multiproc.py @@ -43,8 +43,7 @@ def test_run_multiproc(): pipe.connect([(mod1, mod2, [('output1', 'input1')])]) pipe.base_dir = os.getcwd() mod1.inputs.input1 = 1 - pipe.config['execution']['poll_sleep_duration'] = 2 - execgraph = pipe.run(plugin="MultiProc") + execgraph = pipe.run(plugin="ResourceMultiProc") names = ['.'.join((node._hierarchy, node.name)) for node in execgraph.nodes()] node = execgraph.nodes()[names.index('pipe.mod1')] result = node.get_output('output1') diff --git a/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py b/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py index 89336c2026..429eff0f26 100644 --- a/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py +++ b/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py @@ -84,7 +84,7 @@ def dummyFunction(filename): def run_multiproc_nondaemon_with_flag(nondaemon_flag): ''' - Start a pipe with two nodes using the multiproc plugin and passing the nondaemon_flag. + Start a pipe with two nodes using the resource multiproc plugin and passing the nondaemon_flag. ''' cur_dir = os.getcwd() @@ -107,11 +107,10 @@ def run_multiproc_nondaemon_with_flag(nondaemon_flag): f1.inputs.insum = 0 pipe.config['execution']['stop_on_first_crash'] = True - pipe.config['execution']['poll_sleep_duration'] = 2 - # execute the pipe using the MultiProc plugin with 2 processes and the non_daemon flag + # execute the pipe using the ResourceMultiProc plugin with 2 processes and the non_daemon flag # to enable child processes which start other multiprocessing jobs - execgraph = pipe.run(plugin="MultiProc", + execgraph = pipe.run(plugin="ResourceMultiProc", plugin_args={'n_procs': 2, 'non_daemon': nondaemon_flag}) From 0bb6d792081f88ce13fd3ab963168aab9f1645ef Mon Sep 17 00:00:00 2001 From: carolFrohlich Date: Tue, 19 Jan 2016 15:25:06 -0500 Subject: [PATCH 05/37] change old namespaces --- nipype/interfaces/base.py | 1 + nipype/interfaces/tests/test_io.py | 2 +- nipype/pipeline/plugins/callback_log.py | 18 +++---- nipype/pipeline/plugins/multiproc.py | 21 ++++---- .../pipeline/plugins/tests/test_multiproc.py | 6 ++- nipype/utils/draw_gantt_chart.py | 51 +++++++++++-------- 6 files changed, 55 insertions(+), 44 deletions(-) diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index 12832ead15..f63a8ae2e1 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -1213,6 +1213,7 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False): # Import packages try: from memory_profiler import _get_memory + import psutil mem_prof = True except: mem_prof = False diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py index 37ed6eae43..2b69448133 100644 --- a/nipype/interfaces/tests/test_io.py +++ b/nipype/interfaces/tests/test_io.py @@ -94,7 +94,7 @@ def test_selectfiles_valueerror(): yield assert_raises, ValueError, sf.run -@skipif(noboto) +@skip def test_s3datagrabber_communication(): dg = nio.S3DataGrabber( infields=['subj_id', 'run_num'], outfields=['func', 'struct']) diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py index 6abfcd2e6a..854a217957 100644 --- a/nipype/pipeline/plugins/callback_log.py +++ b/nipype/pipeline/plugins/callback_log.py @@ -13,27 +13,27 @@ def log_nodes_cb(node, status, result=None): if status == 'start': message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\ node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\ - '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \ + '"' + ',"estimated_memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \ + str(node._interface.num_threads) + '}' logger.debug(message) elif status == 'end': message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \ - node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) + \ - '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + \ - ',"num_threads":' + str(node._interface.num_threads) + \ - ',"cmd-level threads":' + str(cmd_threads) + \ - ',"node-level memory":' + str(node_mem) + \ - ',"cmd-level memory":' + str(cmd_mem) + \ - ',"run_seconds":' + str(run_seconds) + '}' + node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) + \ + '"' + ',"estimated_memory":' + '"'+ str(node._interface.estimated_memory) + '"'+ \ + ',"num_threads":' + '"'+ str(node._interface.num_threads) + '"'+ \ + ',"cmd-level_threads":' + '"'+ str(cmd_threads) + '"'+ \ + ',"node-level_memory":' + '"'+ str(node_mem) + '"'+ \ + ',"cmd-level_memory":' + '"'+ str(cmd_mem) + '"' + \ + ',"run_seconds":' + '"'+ str(run_seconds) + '"'+ '}' logger.debug(message) else: message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \ node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\ - '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \ + '"' + ',"estimated_memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \ + str(node._interface.num_threads) + ',"error":"True"}' logger.debug(message) diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index af96a1e102..291ad15fea 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -9,7 +9,14 @@ from multiprocessing import Process, Pool, cpu_count, pool from traceback import format_exception import sys - +import numpy as np +from copy import deepcopy +from ..engine import MapNode +from ...utils.misc import str2bool +import datetime +import psutil +from ... import logging +import semaphore_singleton from .base import (DistributedPluginBase, report_crash) @@ -76,13 +83,7 @@ class NonDaemonPool(pool.Pool): """ Process = NonDaemonProcess -import numpy as np -from copy import deepcopy -from ..engine import (MapNode, str2bool) -import datetime -import psutil -from ... import logging -import semaphore_singleton + logger = logging.getLogger('workflow') def release_lock(args): @@ -130,9 +131,9 @@ def __init__(self, plugin_args=None): if non_daemon: # run the execution using the non-daemon pool subclass - self.pool = NonDaemonPool(processes=n_procs) + self.pool = NonDaemonPool(processes=self.processors) else: - self.pool = Pool(processes=n_procs) + self.pool = Pool(processes=self.processors) def _wait(self): if len(self.pending_tasks) > 0: diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py index e7af00d343..60548d7217 100644 --- a/nipype/pipeline/plugins/tests/test_multiproc.py +++ b/nipype/pipeline/plugins/tests/test_multiproc.py @@ -6,7 +6,7 @@ from nipype.testing import assert_equal, assert_less_equal import nipype.pipeline.engine as pe - + class InputSpec(nib.TraitedSpec): input1 = nib.traits.Int(desc='a random int') input2 = nib.traits.Int(desc='a random int') @@ -182,6 +182,8 @@ def test_do_not_use_more_memory_then_specified(): os.remove(LOG_FILENAME) + + def test_do_not_use_more_threads_then_specified(): LOG_FILENAME = 'callback.log' my_logger = logging.getLogger('callback') @@ -231,4 +233,4 @@ def test_do_not_use_more_threads_then_specified(): break yield assert_equal, result, True, "using more memory than system has (memory is not specified by user)" - os.remove(LOG_FILENAME) + os.remove(LOG_FILENAME) \ No newline at end of file diff --git a/nipype/utils/draw_gantt_chart.py b/nipype/utils/draw_gantt_chart.py index 84bbc033a0..fcf8d95fe6 100644 --- a/nipype/utils/draw_gantt_chart.py +++ b/nipype/utils/draw_gantt_chart.py @@ -3,32 +3,39 @@ import datetime import random - + def log_to_json(logfile): result = [] with open(logfile, 'r') as content: - #read file separating each line - content = content.read() - lines = content.split('\n') - - lines = [ json.loads(x) for x in lines[:-1]] - - last_node = [ x for x in lines if x.has_key('finish')][-1] - - for i, line in enumerate(lines): - #get first start it finds - if not line.has_key('start'): - continue - - #fint the end node for that start - for j in range(i+1, len(lines)): - if lines[j].has_key('finish'): - if lines[j]['id'] == line['id'] and lines[j]['name'] == line['name']: - line['finish'] = lines[j]['finish'] - line['duration'] = (parser.parse(line['finish']) - parser.parse(line['start'])).total_seconds() - result.append(line) - break + #read file separating each line + content = content.read() + lines = content.split('\n') + l = [] + for i in lines: + try: + y = json.loads(i) + l.append(y) + except Exception, e: + pass + + lines = l + + last_node = [ x for x in lines if x.has_key('finish')][-1] + + for i, line in enumerate(lines): + #get first start it finds + if not line.has_key('start'): + continue + + #fint the end node for that start + for j in range(i+1, len(lines)): + if lines[j].has_key('finish'): + if lines[j]['id'] == line['id'] and lines[j]['name'] == line['name']: + line['finish'] = lines[j]['finish'] + line['duration'] = (parser.parse(line['finish']) - parser.parse(line['start'])).total_seconds() + result.append(line) + break return result, last_node From a68e0e68a6639b36f53964058ab4e5010c4d44dc Mon Sep 17 00:00:00 2001 From: dclark87 Date: Tue, 19 Jan 2016 15:27:59 -0500 Subject: [PATCH 06/37] Added initial num_threads monitoring code --- nipype/interfaces/base.py | 34 ++++++++++++++++++++-------- nipype/pipeline/plugins/multiproc.py | 7 +++--- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index 12832ead15..f05e61bc9c 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -1204,6 +1204,25 @@ def _read(self, drain): self._lastidx = len(self._rows) +def _get_num_threads(proc): + ''' + ''' + + # Import packages + import psutil + + # Init variables + num_threads = proc.num_threads() + try: + for child in proc.children(): + num_threads = max(num_threads, child.num_threads(), + len(child.children()), _get_num_threads(child)) + except psutil.NoSuchProcess: + dummy = 1 + + return num_threads + + def run_command(runtime, output=None, timeout=0.01, redirect_x=False): """Run a command, read stdout and stderr, prefix with timestamp. @@ -1213,6 +1232,7 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False): # Import packages try: from memory_profiler import _get_memory + import psutil mem_prof = True except: mem_prof = False @@ -1253,7 +1273,7 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False): # Init variables for memory profiling mem_mb = -1 num_threads = -1 - interval = 0.1 + interval = 1 if output == 'stream': streams = [Stream('stdout', proc.stdout), Stream('stderr', proc.stderr)] @@ -1273,8 +1293,7 @@ def _process(drain=0): while proc.returncode is None: if mem_prof: mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) - num_threads = max(num_threads, psutil.Process(proc.pid).num_threads()) - time.sleep(interval) + num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) proc.poll() _process() _process(drain=1) @@ -1293,8 +1312,7 @@ def _process(drain=0): if mem_prof: while proc.returncode is None: mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) - num_threads = max(num_threads, psutil.Process(proc.pid).num_threads()) - time.sleep(interval) + num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) proc.poll() stdout, stderr = proc.communicate() if stdout and isinstance(stdout, bytes): @@ -1315,8 +1333,7 @@ def _process(drain=0): if mem_prof: while proc.returncode is None: mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) - num_threads = max(num_threads, psutil.Process(proc.pid).num_threads()) - time.sleep(interval) + num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) proc.poll() ret_code = proc.wait() stderr.flush() @@ -1328,8 +1345,7 @@ def _process(drain=0): if mem_prof: while proc.returncode is None: mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) - num_threads = max(num_threads, psutil.Process(proc.pid).num_threads()) - time.sleep(interval) + num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) proc.poll() proc.communicate() result['stdout'] = [] diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index af96a1e102..afdbef6936 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -78,7 +78,8 @@ class NonDaemonPool(pool.Pool): import numpy as np from copy import deepcopy -from ..engine import (MapNode, str2bool) +from ..engine import MapNode +from ...utils.misc import str2bool import datetime import psutil from ... import logging @@ -130,9 +131,9 @@ def __init__(self, plugin_args=None): if non_daemon: # run the execution using the non-daemon pool subclass - self.pool = NonDaemonPool(processes=n_procs) + self.pool = NonDaemonPool(processes=self.processors) else: - self.pool = Pool(processes=n_procs) + self.pool = Pool(processes=self.processors) def _wait(self): if len(self.pending_tasks) > 0: From 97e7333ef61317de1c20fb74c4713f6ce4206388 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 3 Feb 2016 15:49:12 -0500 Subject: [PATCH 07/37] Manual merge of s3_datasink and resource_multiproc branch for cpac run --- nipype/interfaces/base.py | 73 +++++- nipype/interfaces/utility.py | 10 +- nipype/pipeline/engine/tests/test_engine.py | 4 +- nipype/pipeline/engine/tests/test_utils.py | 2 +- nipype/pipeline/plugins/__init__.py | 4 +- nipype/pipeline/plugins/base.py | 17 +- nipype/pipeline/plugins/multiproc.py | 233 +++++++++++++++--- nipype/pipeline/plugins/tests/test_base.py | 2 +- .../pipeline/plugins/tests/test_callback.py | 4 +- .../pipeline/plugins/tests/test_multiproc.py | 189 +++++++++++++- .../plugins/tests/test_multiproc_nondaemon.py | 4 +- 11 files changed, 490 insertions(+), 52 deletions(-) diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index e831fc67ce..1404110bf1 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -764,6 +764,8 @@ def __init__(self, **inputs): raise Exception('No input_spec in class: %s' % self.__class__.__name__) self.inputs = self.input_spec(**inputs) + self.estimated_memory = 1 + self.num_threads = 1 @classmethod def help(cls, returnhelp=False): @@ -1202,14 +1204,43 @@ def _read(self, drain): self._lastidx = len(self._rows) +def _get_num_threads(proc): + ''' + ''' + + # Import packages + import psutil + + # Init variables + num_threads = proc.num_threads() + try: + for child in proc.children(): + num_threads = max(num_threads, child.num_threads(), + len(child.children()), _get_num_threads(child)) + except psutil.NoSuchProcess: + dummy = 1 + + return num_threads + + def run_command(runtime, output=None, timeout=0.01, redirect_x=False): """Run a command, read stdout and stderr, prefix with timestamp. The returned runtime contains a merged stdout+stderr log with timestamps """ - PIPE = subprocess.PIPE + # Import packages + try: + from memory_profiler import _get_memory + import psutil + mem_proc = True + except: + mem_prof = False + + # Init variables + PIPE = subprocess.PIPE cmdline = runtime.cmdline + if redirect_x: exist_xvfb, _ = _exists_in_path('xvfb-run', runtime.environ) if not exist_xvfb: @@ -1238,6 +1269,12 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False): result = {} errfile = os.path.join(runtime.cwd, 'stderr.nipype') outfile = os.path.join(runtime.cwd, 'stdout.nipype') + + # Init variables for memory profiling + mem_mb = -1 + num_threads = -1 + interval = 1 + if output == 'stream': streams = [Stream('stdout', proc.stdout), Stream('stderr', proc.stderr)] @@ -1253,8 +1290,10 @@ def _process(drain=0): else: for stream in res[0]: stream.read(drain) - while proc.returncode is None: + if mem_prof: + mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) + num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) proc.poll() _process() _process(drain=1) @@ -1268,16 +1307,34 @@ def _process(drain=0): result[stream._name] = [r[2] for r in rows] temp.sort() result['merged'] = [r[1] for r in temp] + if output == 'allatonce': + if mem_prof: + while proc.returncode is None: + mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) + num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) + proc.poll() stdout, stderr = proc.communicate() if stdout and isinstance(stdout, bytes): - stdout = stdout.decode() + try: + stdout = stdout.decode() + except UnicodeDecodeError: + stdout = stdout.decode("ISO-8859-1") if stderr and isinstance(stderr, bytes): - stderr = stderr.decode() + try: + stderr = stderr.decode() + except UnicodeDecodeError: + stdout = stdout.decode("ISO-8859-1") + result['stdout'] = str(stdout).split('\n') result['stderr'] = str(stderr).split('\n') result['merged'] = '' if output == 'file': + if mem_prof: + while proc.returncode is None: + mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) + num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) + proc.poll() ret_code = proc.wait() stderr.flush() stdout.flush() @@ -1285,10 +1342,18 @@ def _process(drain=0): result['stderr'] = [line.strip() for line in open(errfile).readlines()] result['merged'] = '' if output == 'none': + if mem_prof: + while proc.returncode is None: + mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) + num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) + proc.poll() proc.communicate() result['stdout'] = [] result['stderr'] = [] result['merged'] = '' + + setattr(runtime, 'cmd_memory', mem_mb/1024.0) + setattr(runtime, 'cmd_threads', num_threads) runtime.stderr = '\n'.join(result['stderr']) runtime.stdout = '\n'.join(result['stdout']) runtime.merged = result['merged'] diff --git a/nipype/interfaces/utility.py b/nipype/interfaces/utility.py index 37883d4e5c..2eb5c78fe5 100644 --- a/nipype/interfaces/utility.py +++ b/nipype/interfaces/utility.py @@ -449,7 +449,15 @@ def _run_interface(self, runtime): if isdefined(value): args[name] = value - out = function_handle(**args) + # Record memory of function_handle + try: + import memory_profiler + proc = (function_handle, (), args) + mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) + setattr(runtime, 'cmd_memory', mem_mb[0]/1024.0) + # If no memory_profiler package, run without recording memory + except: + out = function_handle(**args) if len(self._output_names) == 1: self._out[self._output_names[0]] = out diff --git a/nipype/pipeline/engine/tests/test_engine.py b/nipype/pipeline/engine/tests/test_engine.py index 5eaaa81fbf..ce618abf27 100644 --- a/nipype/pipeline/engine/tests/test_engine.py +++ b/nipype/pipeline/engine/tests/test_engine.py @@ -723,7 +723,7 @@ def func1(in1): # test running the workflow on default conditions error_raised = False try: - w1.run(plugin='MultiProc') + w1.run(plugin='ResourceMultiProc') except Exception as e: from nipype.pipeline.engine.base import logger logger.info('Exception: %s' % str(e)) @@ -737,7 +737,7 @@ def func1(in1): # test running the workflow on serial conditions error_raised = False try: - w1.run(plugin='MultiProc') + w1.run(plugin='ResourceMultiProc') except Exception as e: from nipype.pipeline.engine.base import logger logger.info('Exception: %s' % str(e)) diff --git a/nipype/pipeline/engine/tests/test_utils.py b/nipype/pipeline/engine/tests/test_utils.py index 8420f587c2..9688e02395 100644 --- a/nipype/pipeline/engine/tests/test_utils.py +++ b/nipype/pipeline/engine/tests/test_utils.py @@ -214,7 +214,7 @@ def test_function3(arg): out_dir = mkdtemp() - for plugin in ('Linear',): # , 'MultiProc'): + for plugin in ('Linear',): # , 'ResourceMultiProc'): n1 = pe.Node(niu.Function(input_names=['arg1'], output_names=['out_file1', 'out_file2', 'dir'], function=test_function), diff --git a/nipype/pipeline/plugins/__init__.py b/nipype/pipeline/plugins/__init__.py index 26d1577f55..643d5735f8 100644 --- a/nipype/pipeline/plugins/__init__.py +++ b/nipype/pipeline/plugins/__init__.py @@ -9,7 +9,7 @@ from .sge import SGEPlugin from .condor import CondorPlugin from .dagman import CondorDAGManPlugin -from .multiproc import MultiProcPlugin +from .multiproc import ResourceMultiProcPlugin from .ipython import IPythonPlugin from .somaflow import SomaFlowPlugin from .pbsgraph import PBSGraphPlugin @@ -17,3 +17,5 @@ from .lsf import LSFPlugin from .slurm import SLURMPlugin from .slurmgraph import SLURMGraphPlugin + +from .callback_log import log_nodes_cb diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py index 162ddd9df4..092c1883f1 100644 --- a/nipype/pipeline/plugins/base.py +++ b/nipype/pipeline/plugins/base.py @@ -20,7 +20,6 @@ import numpy as np import scipy.sparse as ssp - from ...utils.filemanip import savepkl, loadpkl from ...utils.misc import str2bool from ..engine.utils import (nx, dfs_preorder, topological_sort) @@ -246,7 +245,7 @@ def run(self, graph, config, updatehash=False): notrun.append(self._clean_queue(jobid, graph, result=result)) else: - self._task_finished_cb(jobid) + self._task_finished_cb(jobid, result) self._remove_node_dirs() self._clear_task(taskid) else: @@ -265,10 +264,15 @@ def run(self, graph, config, updatehash=False): graph=graph) else: logger.debug('Not submitting') - sleep(float(self._config['execution']['poll_sleep_duration'])) + self._wait() self._remove_node_dirs() report_nodes_not_run(notrun) + + + def _wait(self): + sleep(float(self._config['execution']['poll_sleep_duration'])) + def _get_result(self, taskid): raise NotImplementedError @@ -410,7 +414,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None): else: break - def _task_finished_cb(self, jobid): + def _task_finished_cb(self, jobid, result=None): """ Extract outputs and assign to inputs of dependent tasks This is called when a job is completed. @@ -418,7 +422,10 @@ def _task_finished_cb(self, jobid): logger.info('[Job finished] jobname: %s jobid: %d' % (self.procs[jobid]._id, jobid)) if self._status_callback: - self._status_callback(self.procs[jobid], 'end') + if result == None: + if self._taskresult.has_key(jobid): + result = self._taskresult[jobid].get() + self._status_callback(self.procs[jobid], 'end', result) # Update job and worker queues self.proc_pending[jobid] = False # update the job dependency structure diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index 861e2cc507..2e446ced57 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -9,18 +9,60 @@ from multiprocessing import Process, Pool, cpu_count, pool from traceback import format_exception import sys - +import numpy as np +from copy import deepcopy +from ..engine import MapNode +from ...utils.misc import str2bool +import datetime +import psutil +from ... import logging +import semaphore_singleton from .base import (DistributedPluginBase, report_crash) -def run_node(node, updatehash): - result = dict(result=None, traceback=None) +# Run node +def run_node(node, updatehash, plugin_args=None): + """docstring + """ + + # Import packages try: - result['result'] = node.run(updatehash=updatehash) - except: - etype, eval, etr = sys.exc_info() - result['traceback'] = format_exception(etype, eval, etr) - result['result'] = node.result + runtime_profile = plugin_args['runtime_profile'] + import memory_profiler + import datetime + except KeyError: + runtime_profile = False + except ImportError: + runtime_profile = False + + # Init variables + result = dict(result=None, traceback=None) + + # If we're profiling the run + if runtime_profile: + try: + # Init function tuple + proc = (node.run, (), {'updatehash' : updatehash}) + start = datetime.datetime.now() + mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) + runtime = (datetime.datetime.now() - start).total_seconds() + result['result'] = retval + result['node_memory'] = mem_mb[0]/1024.0 + result['cmd_memory'] = retval.runtime.get('cmd_memory') + result['cmd_threads'] = retval.runtime.get('cmd_threads') + result['run_seconds'] = runtime + except: + etype, eval, etr = sys.exc_info() + result['traceback'] = format_exception(etype,eval,etr) + result['result'] = node.result + # Otherwise, execute node.run as normal + else: + try: + result['result'] = node.run(updatehash=updatehash) + except: + etype, eval, etr = sys.exc_info() + result['traceback'] = format_exception(etype,eval,etr) + result['result'] = node.result return result @@ -41,34 +83,62 @@ class NonDaemonPool(pool.Pool): """ Process = NonDaemonProcess +logger = logging.getLogger('workflow') -class MultiProcPlugin(DistributedPluginBase): - """Execute workflow with multiprocessing +def release_lock(args): + semaphore_singleton.semaphore.release() + +class ResourceMultiProcPlugin(DistributedPluginBase): + """Execute workflow with multiprocessing, not sending more jobs at once + than the system can support. The plugin_args input to run can be used to control the multiprocessing - execution. Currently supported options are: + execution and defining the maximum amount of memory and threads that + should be used. When those parameters are not specified, + the number of threads and memory of the system is used. + + System consuming nodes should be tagged: + memory_consuming_node.interface.estimated_memory = 8 #Gb + thread_consuming_node.interface.num_threads = 16 + + The default number of threads and memory for a node is 1. + + Currently supported options are: - - n_procs : number of processes to use - non_daemon : boolean flag to execute as non-daemon processes + - num_threads: maximum number of threads to be executed in parallel + - estimated_memory: maximum memory that can be used at once. """ def __init__(self, plugin_args=None): - super(MultiProcPlugin, self).__init__(plugin_args=plugin_args) + super(ResourceMultiProcPlugin, self).__init__(plugin_args=plugin_args) self._taskresult = {} self._taskid = 0 non_daemon = True - n_procs = cpu_count() - if plugin_args: - if 'n_procs' in plugin_args: - n_procs = plugin_args['n_procs'] - if 'non_daemon' in plugin_args: + self.plugin_args = plugin_args + self.processors = cpu_count() + memory = psutil.virtual_memory() + self.memory = memory.total / (1024*1024*1024) + if self.plugin_args: + if 'non_daemon' in self.plugin_args: non_daemon = plugin_args['non_daemon'] + if 'n_procs' in self.plugin_args: + self.processors = self.plugin_args['n_procs'] + if 'memory' in self.plugin_args: + self.memory = self.plugin_args['memory'] + if non_daemon: # run the execution using the non-daemon pool subclass - self.pool = NonDaemonPool(processes=n_procs) + self.pool = NonDaemonPool(processes=self.processors) else: - self.pool = Pool(processes=n_procs) + self.pool = Pool(processes=self.processors) + + def _wait(self): + if len(self.pending_tasks) > 0: + semaphore_singleton.semaphore.acquire() + semaphore_singleton.semaphore.release() + def _get_result(self, taskid): if taskid not in self._taskresult: @@ -77,17 +147,6 @@ def _get_result(self, taskid): return None return self._taskresult[taskid].get() - def _submit_job(self, node, updatehash=False): - self._taskid += 1 - try: - if node.inputs.terminal_output == 'stream': - node.inputs.terminal_output = 'allatonce' - except: - pass - self._taskresult[self._taskid] = self.pool.apply_async(run_node, - (node, - updatehash,)) - return self._taskid def _report_crash(self, node, result=None): if result and result['traceback']: @@ -100,3 +159,115 @@ def _report_crash(self, node, result=None): def _clear_task(self, taskid): del self._taskresult[taskid] + + def _submit_job(self, node, updatehash=False): + self._taskid += 1 + try: + if node.inputs.terminal_output == 'stream': + node.inputs.terminal_output = 'allatonce' + except: + pass + self._taskresult[self._taskid] = self.pool.apply_async(run_node, + (node, updatehash, self.plugin_args), + callback=release_lock) + return self._taskid + + def _send_procs_to_workers(self, updatehash=False, graph=None): + """ Sends jobs to workers when system resources are available. + Check memory (gb) and cores usage before running jobs. + """ + executing_now = [] + + # Check to see if a job is available + jobids = np.flatnonzero((self.proc_pending == True) & (self.depidx.sum(axis=0) == 0).__array__()) + + #check available system resources by summing all threads and memory used + busy_memory = 0 + busy_processors = 0 + for jobid in jobids: + busy_memory+= self.procs[jobid]._interface.estimated_memory + busy_processors+= self.procs[jobid]._interface.num_threads + + free_memory = self.memory - busy_memory + free_processors = self.processors - busy_processors + + + #check all jobs without dependency not run + jobids = np.flatnonzero((self.proc_done == False) & (self.depidx.sum(axis=0) == 0).__array__()) + + + #sort jobs ready to run first by memory and then by number of threads + #The most resource consuming jobs run first + jobids = sorted(jobids, key=lambda item: (self.procs[item]._interface.estimated_memory, self.procs[item]._interface.num_threads)) + + logger.debug('Free memory: %d, Free processors: %d', free_memory, free_processors) + + + #while have enough memory and processors for first job + #submit first job on the list + for jobid in jobids: + logger.debug('Next Job: %d, memory: %d, threads: %d' %(jobid, self.procs[jobid]._interface.estimated_memory, self.procs[jobid]._interface.num_threads)) + + if self.procs[jobid]._interface.estimated_memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors: + logger.info('Executing: %s ID: %d' %(self.procs[jobid]._id, jobid)) + executing_now.append(self.procs[jobid]) + + if isinstance(self.procs[jobid], MapNode): + try: + num_subnodes = self.procs[jobid].num_subnodes() + except Exception: + self._clean_queue(jobid, graph) + self.proc_pending[jobid] = False + continue + if num_subnodes > 1: + submit = self._submit_mapnode(jobid) + if not submit: + continue + + # change job status in appropriate queues + self.proc_done[jobid] = True + self.proc_pending[jobid] = True + + free_memory -= self.procs[jobid]._interface.estimated_memory + free_processors -= self.procs[jobid]._interface.num_threads + + # Send job to task manager and add to pending tasks + if self._status_callback: + self._status_callback(self.procs[jobid], 'start') + if str2bool(self.procs[jobid].config['execution']['local_hash_check']): + logger.debug('checking hash locally') + try: + hash_exists, _, _, _ = self.procs[ + jobid].hash_exists() + logger.debug('Hash exists %s' % str(hash_exists)) + if (hash_exists and (self.procs[jobid].overwrite == False or (self.procs[jobid].overwrite == None and not self.procs[jobid]._interface.always_run))): + self._task_finished_cb(jobid) + self._remove_node_dirs() + continue + except Exception: + self._clean_queue(jobid, graph) + self.proc_pending[jobid] = False + continue + logger.debug('Finished checking hash') + + if self.procs[jobid].run_without_submitting: + logger.debug('Running node %s on master thread' %self.procs[jobid]) + try: + self.procs[jobid].run() + except Exception: + self._clean_queue(jobid, graph) + self._task_finished_cb(jobid) + self._remove_node_dirs() + + else: + logger.debug('submitting', jobid) + tid = self._submit_job(deepcopy(self.procs[jobid]), updatehash=updatehash) + if tid is None: + self.proc_done[jobid] = False + self.proc_pending[jobid] = False + else: + self.pending_tasks.insert(0, (tid, jobid)) + else: + break + + logger.debug('No jobs waiting to execute') diff --git a/nipype/pipeline/plugins/tests/test_base.py b/nipype/pipeline/plugins/tests/test_base.py index 243ae195c2..616cb634a0 100644 --- a/nipype/pipeline/plugins/tests/test_base.py +++ b/nipype/pipeline/plugins/tests/test_base.py @@ -38,5 +38,5 @@ def func(arg1): wf.add_nodes([funkynode]) wf.base_dir = '/tmp' -wf.run(plugin='MultiProc') +wf.run(plugin='ResourceMultiProc') ''' diff --git a/nipype/pipeline/plugins/tests/test_callback.py b/nipype/pipeline/plugins/tests/test_callback.py index db02bc889b..036fd76090 100644 --- a/nipype/pipeline/plugins/tests/test_callback.py +++ b/nipype/pipeline/plugins/tests/test_callback.py @@ -76,7 +76,7 @@ def test_callback_multiproc_normal(): wf.add_nodes([f_node]) wf.config['execution']['crashdump_dir'] = wf.base_dir wf.config['execution']['poll_sleep_duration'] = 2 - wf.run(plugin='MultiProc', plugin_args={'status_callback': so.callback}) + wf.run(plugin='ResourceMultiProc', plugin_args={'status_callback': so.callback}) assert_equal(len(so.statuses), 2) for (n, s) in so.statuses: yield assert_equal, n.name, 'f_node' @@ -95,7 +95,7 @@ def test_callback_multiproc_exception(): wf.config['execution']['crashdump_dir'] = wf.base_dir wf.config['execution']['poll_sleep_duration'] = 2 try: - wf.run(plugin='MultiProc', + wf.run(plugin='ResourceMultiProc', plugin_args={'status_callback': so.callback}) except: pass diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py index efa9ec4161..ed101db7bf 100644 --- a/nipype/pipeline/plugins/tests/test_multiproc.py +++ b/nipype/pipeline/plugins/tests/test_multiproc.py @@ -3,7 +3,7 @@ from tempfile import mkdtemp from shutil import rmtree -from nipype.testing import assert_equal +from nipype.testing import assert_equal, assert_less_equal import nipype.pipeline.engine as pe @@ -44,10 +44,195 @@ def test_run_multiproc(): pipe.base_dir = os.getcwd() mod1.inputs.input1 = 1 pipe.config['execution']['poll_sleep_duration'] = 2 - execgraph = pipe.run(plugin="MultiProc") + execgraph = pipe.run(plugin="ResourceMultiProc") names = ['.'.join((node._hierarchy, node.name)) for node in execgraph.nodes()] node = execgraph.nodes()[names.index('pipe.mod1')] result = node.get_output('output1') yield assert_equal, result, [1, 1] os.chdir(cur_dir) rmtree(temp_dir) + + +################################ + + +class InputSpecSingleNode(nib.TraitedSpec): + input1 = nib.traits.Int(desc='a random int') + input2 = nib.traits.Int(desc='a random int') + +class OutputSpecSingleNode(nib.TraitedSpec): + output1 = nib.traits.Int(desc='a random int') + + +class TestInterfaceSingleNode(nib.BaseInterface): + input_spec = InputSpecSingleNode + output_spec = OutputSpecSingleNode + + def _run_interface(self, runtime): + runtime.returncode = 0 + return runtime + + def _list_outputs(self): + outputs = self._outputs().get() + outputs['output1'] = self.inputs.input1 + return outputs + + +def find_metrics(nodes, last_node): + import json + from dateutil.parser import parse + from datetime import datetime + import datetime as d + + + start = parse(nodes[0]['start']) + total_duration = int((parse(last_node['finish']) - start).total_seconds()) + + total_memory = [] + total_threads = [] + for i in range(total_duration): + total_memory.append(0) + total_threads.append(0) + + now = start + for i in range(total_duration): + start_index = 0 + node_start = None + node_finish = None + + x = now + + for j in range(start_index, len(nodes)): + node_start = parse(nodes[j]['start']) + node_finish = parse(nodes[j]['finish']) + + if node_start < x and node_finish > x: + total_memory[i] += nodes[j]['estimated_memory'] + total_threads[i] += nodes[j]['num_threads'] + start_index = j + + if node_start > x: + break + + now += d.timedelta(seconds=1) + + return total_memory, total_threads + + +import os +from nipype.pipeline.plugins.callback_log import log_nodes_cb +import logging +import logging.handlers +import psutil +from multiprocessing import cpu_count + +from nipype.utils import draw_gantt_chart + +def test_do_not_use_more_memory_then_specified(): + LOG_FILENAME = 'callback.log' + my_logger = logging.getLogger('callback') + my_logger.setLevel(logging.DEBUG) + + # Add the log message handler to the logger + handler = logging.FileHandler(LOG_FILENAME) + my_logger.addHandler(handler) + + max_memory = 10 + pipe = pe.Workflow(name='pipe') + n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1') + n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2') + n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3') + n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4') + + n1.interface.estimated_memory = 1 + n2.interface.estimated_memory = 1 + n3.interface.estimated_memory = 10 + n4.interface.estimated_memory = 1 + + pipe.connect(n1, 'output1', n2, 'input1') + pipe.connect(n1, 'output1', n3, 'input1') + pipe.connect(n2, 'output1', n4, 'input1') + pipe.connect(n3, 'output1', n4, 'input2') + n1.inputs.input1 = 10 + + pipe.run(plugin='ResourceMultiProc', plugin_args={'memory': max_memory, + 'status_callback': log_nodes_cb}) + + + nodes, last_node = draw_gantt_chart.log_to_json(LOG_FILENAME) + #usage in every second + memory, threads = find_metrics(nodes, last_node) + + result = True + for m in memory: + if m > max_memory: + result = False + break + + yield assert_equal, result, True + + max_threads = cpu_count() + + result = True + for t in threads: + if t > max_threads: + result = False + break + + yield assert_equal, result, True, "using more threads than system has (threads is not specified by user)" + + os.remove(LOG_FILENAME) + + + + +def test_do_not_use_more_threads_then_specified(): + LOG_FILENAME = 'callback.log' + my_logger = logging.getLogger('callback') + my_logger.setLevel(logging.DEBUG) + + # Add the log message handler to the logger + handler = logging.FileHandler(LOG_FILENAME) + my_logger.addHandler(handler) + + max_threads = 10 + pipe = pe.Workflow(name='pipe') + n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1') + n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2') + n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3') + n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4') + + n1.interface.num_threads = 1 + n2.interface.num_threads = 1 + n3.interface.num_threads = 10 + n4.interface.num_threads = 1 + + pipe.connect(n1, 'output1', n2, 'input1') + pipe.connect(n1, 'output1', n3, 'input1') + pipe.connect(n2, 'output1', n4, 'input1') + pipe.connect(n3, 'output1', n4, 'input2') + n1.inputs.input1 = 10 + pipe.config['execution']['poll_sleep_duration'] = 1 + pipe.run(plugin='ResourceMultiProc', plugin_args={'n_procs': max_threads, 'status_callback': log_nodes_cb}) + + nodes, last_node = draw_gantt_chart.log_to_json(LOG_FILENAME) + #usage in every second + memory, threads = find_metrics(nodes, last_node) + + result = True + for t in threads: + if t > max_threads: + result = False + break + + yield assert_equal, result, True, "using more threads than specified" + + max_memory = psutil.virtual_memory().total / (1024*1024) + result = True + for m in memory: + if m > max_memory: + result = False + break + yield assert_equal, result, True, "using more memory than system has (memory is not specified by user)" + + os.remove(LOG_FILENAME) diff --git a/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py b/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py index 89336c2026..427f5f02fe 100644 --- a/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py +++ b/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py @@ -84,7 +84,7 @@ def dummyFunction(filename): def run_multiproc_nondaemon_with_flag(nondaemon_flag): ''' - Start a pipe with two nodes using the multiproc plugin and passing the nondaemon_flag. + Start a pipe with two nodes using the resource multiproc plugin and passing the nondaemon_flag. ''' cur_dir = os.getcwd() @@ -111,7 +111,7 @@ def run_multiproc_nondaemon_with_flag(nondaemon_flag): # execute the pipe using the MultiProc plugin with 2 processes and the non_daemon flag # to enable child processes which start other multiprocessing jobs - execgraph = pipe.run(plugin="MultiProc", + execgraph = pipe.run(plugin="ResourceMultiProc", plugin_args={'n_procs': 2, 'non_daemon': nondaemon_flag}) From 08a485d634867c9030c77e3e9474c8d3740041f0 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 3 Feb 2016 16:04:16 -0500 Subject: [PATCH 08/37] Manual merge of s3_datasink and resource_multiproc branch for cpac run --- nipype/interfaces/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index 1404110bf1..1f80b62b63 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -1233,7 +1233,7 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False): try: from memory_profiler import _get_memory import psutil - mem_proc = True + mem_prof = True except: mem_prof = False From e5945e9b5ccfbdded9ed78c029a64045cfbbeed0 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 3 Feb 2016 17:54:08 -0500 Subject: [PATCH 09/37] Changed resources fetching to its function and try-blocked it in case of dying processes --- nipype/interfaces/base.py | 46 ++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index 1f80b62b63..726d93cf2d 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -1204,6 +1204,7 @@ def _read(self, drain): self._lastidx = len(self._rows) +# Get number of threads for process def _get_num_threads(proc): ''' ''' @@ -1223,6 +1224,29 @@ def _get_num_threads(proc): return num_threads +# Get max resources used for process +def _get_max_resources_used(proc, mem_mb, num_threads, poll=False): + ''' + docstring + ''' + + # Import packages + from memory_profiler import _get_memory + import psutil + + try: + mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) + num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) + if poll: + proc.poll() + except Exception as exc: + iflogger.info('Could not get resources used by process. Error: %s'\ + % exc) + + # Return resources + return mem_mb, num_threads + + def run_command(runtime, output=None, timeout=0.01, redirect_x=False): """Run a command, read stdout and stderr, prefix with timestamp. @@ -1231,7 +1255,7 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False): # Import packages try: - from memory_profiler import _get_memory + import memory_profiler import psutil mem_prof = True except: @@ -1273,7 +1297,6 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False): # Init variables for memory profiling mem_mb = -1 num_threads = -1 - interval = 1 if output == 'stream': streams = [Stream('stdout', proc.stdout), Stream('stderr', proc.stderr)] @@ -1292,8 +1315,8 @@ def _process(drain=0): stream.read(drain) while proc.returncode is None: if mem_prof: - mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) - num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) + mem_mb, num_threads = \ + _get_max_resources_used(proc, mem_mb, num_threads) proc.poll() _process() _process(drain=1) @@ -1311,9 +1334,8 @@ def _process(drain=0): if output == 'allatonce': if mem_prof: while proc.returncode is None: - mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) - num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) - proc.poll() + mem_mb, num_threads = \ + _get_max_resources_used(proc, mem_mb, num_threads, poll=True) stdout, stderr = proc.communicate() if stdout and isinstance(stdout, bytes): try: @@ -1332,9 +1354,8 @@ def _process(drain=0): if output == 'file': if mem_prof: while proc.returncode is None: - mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) - num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) - proc.poll() + mem_mb, num_threads = \ + _get_max_resources_used(proc, mem_mb, num_threads, poll=True) ret_code = proc.wait() stderr.flush() stdout.flush() @@ -1344,9 +1365,8 @@ def _process(drain=0): if output == 'none': if mem_prof: while proc.returncode is None: - mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) - num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) - proc.poll() + mem_mb, num_threads = \ + _get_max_resources_used(proc, mem_mb, num_threads, poll=True) proc.communicate() result['stdout'] = [] result['stderr'] = [] From 9cb7a68ca739aa8a8126545da9db5f39426e45d5 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 4 Feb 2016 14:11:16 -0500 Subject: [PATCH 10/37] Fixed pickling bug of instance method by passing profiling flag instead of complete plugin_args dict --- nipype/interfaces/utility.py | 14 ++++++------ nipype/pipeline/plugins/base.py | 2 +- nipype/pipeline/plugins/multiproc.py | 22 ++++++++++--------- .../pipeline/plugins/tests/test_callback.py | 6 ++++- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/nipype/interfaces/utility.py b/nipype/interfaces/utility.py index 2eb5c78fe5..39784d10c5 100644 --- a/nipype/interfaces/utility.py +++ b/nipype/interfaces/utility.py @@ -450,14 +450,14 @@ def _run_interface(self, runtime): args[name] = value # Record memory of function_handle - try: - import memory_profiler - proc = (function_handle, (), args) - mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) - setattr(runtime, 'cmd_memory', mem_mb[0]/1024.0) + #try: + # import memory_profiler + # proc = (function_handle, (), args) + # mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) + # setattr(runtime, 'cmd_memory', mem_mb[0]/1024.0) # If no memory_profiler package, run without recording memory - except: - out = function_handle(**args) + #except: + out = function_handle(**args) if len(self._output_names) == 1: self._out[self._output_names[0]] = out diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py index 092c1883f1..48d62aa49b 100644 --- a/nipype/pipeline/plugins/base.py +++ b/nipype/pipeline/plugins/base.py @@ -250,7 +250,7 @@ def run(self, graph, config, updatehash=False): self._clear_task(taskid) else: toappend.insert(0, (taskid, jobid)) - except Exception: + except Exception as exc: result = {'result': None, 'traceback': format_exc()} notrun.append(self._clean_queue(jobid, graph, diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index 2e446ced57..1726efd480 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -21,23 +21,20 @@ # Run node -def run_node(node, updatehash, plugin_args=None): +def run_node(node, updatehash, runtime_profile=False): """docstring """ - + # Import packages try: - runtime_profile = plugin_args['runtime_profile'] import memory_profiler import datetime - except KeyError: - runtime_profile = False except ImportError: runtime_profile = False - + # Init variables result = dict(result=None, traceback=None) - + runtime_profile = False # If we're profiling the run if runtime_profile: try: @@ -167,9 +164,14 @@ def _submit_job(self, node, updatehash=False): node.inputs.terminal_output = 'allatonce' except: pass - self._taskresult[self._taskid] = self.pool.apply_async(run_node, - (node, updatehash, self.plugin_args), - callback=release_lock) + try: + runtime_profile = self.plugin_args['runtime_profile'] + except: + runtime_profile = False + self._taskresult[self._taskid] = \ + self.pool.apply_async(run_node, + (node, updatehash, runtime_profile), + callback=release_lock) return self._taskid def _send_procs_to_workers(self, updatehash=False, graph=None): diff --git a/nipype/pipeline/plugins/tests/test_callback.py b/nipype/pipeline/plugins/tests/test_callback.py index 036fd76090..267b4e99c9 100644 --- a/nipype/pipeline/plugins/tests/test_callback.py +++ b/nipype/pipeline/plugins/tests/test_callback.py @@ -26,7 +26,7 @@ class Status(object): def __init__(self): self.statuses = [] - def callback(self, node, status): + def callback(self, node, status, result=None): self.statuses.append((node, status)) @@ -105,3 +105,7 @@ def test_callback_multiproc_exception(): yield assert_equal, so.statuses[0][1], 'start' yield assert_equal, so.statuses[1][1], 'exception' rmtree(wf.base_dir) + +if __name__ == '__main__': + import nose + nose.run() From fe0a35203f2f74efb293442d49256b56dd87b3cb Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 4 Feb 2016 14:29:38 -0500 Subject: [PATCH 11/37] Merged resource_multiproc into s3_multiproc --- nipype/pipeline/plugins/semaphore_singleton.py | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 nipype/pipeline/plugins/semaphore_singleton.py diff --git a/nipype/pipeline/plugins/semaphore_singleton.py b/nipype/pipeline/plugins/semaphore_singleton.py new file mode 100644 index 0000000000..99c7752b82 --- /dev/null +++ b/nipype/pipeline/plugins/semaphore_singleton.py @@ -0,0 +1,2 @@ +import threading +semaphore = threading.Semaphore(1) From 5733af9ac7ac8537760a9f602c4ef42c913e3966 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 4 Feb 2016 14:53:12 -0500 Subject: [PATCH 12/37] Fixed hsarc related to yrt blocking --- nipype/interfaces/tests/test_io.py | 2 +- nipype/interfaces/utility.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py index 6d60dfd951..c1f4ec35f5 100644 --- a/nipype/interfaces/tests/test_io.py +++ b/nipype/interfaces/tests/test_io.py @@ -113,7 +113,7 @@ def test_selectfiles_valueerror(): yield assert_raises, ValueError, sf.run -@skip +@skipif(noboto) def test_s3datagrabber_communication(): dg = nio.S3DataGrabber( infields=['subj_id', 'run_num'], outfields=['func', 'struct']) diff --git a/nipype/interfaces/utility.py b/nipype/interfaces/utility.py index 738cafff0e..5729d9e677 100644 --- a/nipype/interfaces/utility.py +++ b/nipype/interfaces/utility.py @@ -450,14 +450,14 @@ def _run_interface(self, runtime): args[name] = value # Record memory of function_handle - try: - import memory_profiler - proc = (function_handle, (), args) - mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) - setattr(runtime, 'cmd_memory', mem_mb[0]/1024.0) - # If no memory_profiler package, run without recording memory - except ImportError: - out = function_handle(**args) + #try: + # import memory_profiler + # proc = (function_handle, (), args) + # mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) + # setattr(runtime, 'cmd_memory', mem_mb[0]/1024.0) + ## If no memory_profiler package, run without recording memory + #except: + out = function_handle(**args) if len(self._output_names) == 1: self._out[self._output_names[0]] = out From 544dddf632f4c9343befb2f2a03c631098a9f68a Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 4 Feb 2016 16:25:30 -0500 Subject: [PATCH 13/37] Removed forcing of runtime_profile to be off: --- nipype/pipeline/plugins/multiproc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index 1ee3a9b81d..fa43b1ccd3 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -34,7 +34,6 @@ def run_node(node, updatehash, runtime_profile=False): # Init variables result = dict(result=None, traceback=None) - runtime_profile = False # If we're profiling the run if runtime_profile: From c07429983f85122ceb95c35e8ff81f397a299352 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 4 Feb 2016 17:18:41 -0500 Subject: [PATCH 14/37] Made when result is None that the end stats are N/A --- nipype/pipeline/plugins/callback_log.py | 6 +++--- nipype/pipeline/plugins/multiproc.py | 6 ++++-- nipype/pipeline/plugins/tests/test_callback.py | 4 ---- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py index 854a217957..44b2455b79 100644 --- a/nipype/pipeline/plugins/callback_log.py +++ b/nipype/pipeline/plugins/callback_log.py @@ -3,13 +3,13 @@ def log_nodes_cb(node, status, result=None): logger = logging.getLogger('callback') - try: + if result is None: + node_mem = cmd_mem = run_seconds = cmd_threads = 'N/A' + else: node_mem = result['node_memory'] cmd_mem = result['cmd_memory'] run_seconds = result['run_seconds'] cmd_threads = result['cmd_threads'] - except Exception as exc: - node_mem = cmd_mem = run_seconds = cmd_threads = 'N/A' if status == 'start': message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\ node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\ diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index fa43b1ccd3..bd68f72ade 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -41,13 +41,15 @@ def run_node(node, updatehash, runtime_profile=False): # Init function tuple proc = (node.run, (), {'updatehash' : updatehash}) start = datetime.datetime.now() - mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) + mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, + include_children=True, + max_usage=True) runtime = (datetime.datetime.now() - start).total_seconds() result['result'] = retval result['node_memory'] = mem_mb[0]/1024.0 + result['run_seconds'] = runtime result['cmd_memory'] = retval.runtime.get('cmd_memory') result['cmd_threads'] = retval.runtime.get('cmd_threads') - result['run_seconds'] = runtime except: etype, eval, etr = sys.exc_info() result['traceback'] = format_exception(etype,eval,etr) diff --git a/nipype/pipeline/plugins/tests/test_callback.py b/nipype/pipeline/plugins/tests/test_callback.py index 2de3a880d9..f173a9b30c 100644 --- a/nipype/pipeline/plugins/tests/test_callback.py +++ b/nipype/pipeline/plugins/tests/test_callback.py @@ -104,7 +104,3 @@ def test_callback_multiproc_exception(): yield assert_equal, so.statuses[0][1], 'start' yield assert_equal, so.statuses[1][1], 'exception' rmtree(wf.base_dir) - -if __name__ == '__main__': - import nose - nose.run() From a4e3ae69c8821bb7c6b6f13b51bc52c0048fd161 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 4 Feb 2016 17:56:18 -0500 Subject: [PATCH 15/37] Added try-blocks around the runtime profile stats in callback logger --- nipype/pipeline/plugins/callback_log.py | 33 +++++++++++++++---- .../pipeline/plugins/tests/test_multiproc.py | 3 +- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py index 44b2455b79..9495e77410 100644 --- a/nipype/pipeline/plugins/callback_log.py +++ b/nipype/pipeline/plugins/callback_log.py @@ -2,14 +2,35 @@ import logging def log_nodes_cb(node, status, result=None): + ''' + ''' + + # Init variables logger = logging.getLogger('callback') + + # Check runtime profile stats if result is None: node_mem = cmd_mem = run_seconds = cmd_threads = 'N/A' else: - node_mem = result['node_memory'] - cmd_mem = result['cmd_memory'] - run_seconds = result['run_seconds'] - cmd_threads = result['cmd_threads'] + try: + node_mem = result['node_memory'] + except KeyError: + node_mem = 'Unknown' + try: + cmd_mem = result['cmd_memory'] + except KeyError: + cmd_mem = 'Unknown' + try: + run_seconds = result['run_seconds'] + except KeyError: + run_seconds = 'Unknown' + try: + cmd_threads = result['cmd_threads'] + except: + cmd_threads = 'Unknown' + + # Check status and write to log + # Start if status == 'start': message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\ node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\ @@ -17,7 +38,7 @@ def log_nodes_cb(node, status, result=None): + str(node._interface.num_threads) + '}' logger.debug(message) - + # End elif status == 'end': message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \ node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) + \ @@ -29,7 +50,7 @@ def log_nodes_cb(node, status, result=None): ',"run_seconds":' + '"'+ str(run_seconds) + '"'+ '}' logger.debug(message) - + # Other else: message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \ node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\ diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py index ed7483e772..cd41bbb695 100644 --- a/nipype/pipeline/plugins/tests/test_multiproc.py +++ b/nipype/pipeline/plugins/tests/test_multiproc.py @@ -212,7 +212,8 @@ def test_do_not_use_more_threads_then_specified(): pipe.connect(n3, 'output1', n4, 'input2') n1.inputs.input1 = 10 pipe.config['execution']['poll_sleep_duration'] = 1 - pipe.run(plugin='ResourceMultiProc', plugin_args={'n_procs': max_threads, 'status_callback': log_nodes_cb}) + pipe.run(plugin='ResourceMultiProc', plugin_args={'n_procs': max_threads, + 'status_callback': log_nodes_cb}) nodes, last_node = draw_gantt_chart.log_to_json(LOG_FILENAME) #usage in every second From e25ac8cde62f2dec38f41d1aca40013b462e2ca2 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Fri, 5 Feb 2016 14:34:51 -0500 Subject: [PATCH 16/37] Cleaned up some code and removed recursion from get_num_threads --- nipype/interfaces/base.py | 8 +++++--- nipype/interfaces/utility.py | 8 -------- nipype/pipeline/plugins/multiproc.py | 6 +++--- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index b4c76ca782..ab27497bb4 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -1211,15 +1211,17 @@ def _get_num_threads(proc): # Import packages import psutil + import logging as lg # Init variables num_threads = proc.num_threads() try: + num_children = len(proc.children()) for child in proc.children(): - num_threads = max(num_threads, child.num_threads(), - len(child.children()), _get_num_threads(child)) + num_threads = max(num_threads, num_children, + child.num_threads(), len(child.children())) except psutil.NoSuchProcess: - dummy = 1 + pass return num_threads diff --git a/nipype/interfaces/utility.py b/nipype/interfaces/utility.py index 5729d9e677..37883d4e5c 100644 --- a/nipype/interfaces/utility.py +++ b/nipype/interfaces/utility.py @@ -449,14 +449,6 @@ def _run_interface(self, runtime): if isdefined(value): args[name] = value - # Record memory of function_handle - #try: - # import memory_profiler - # proc = (function_handle, (), args) - # mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) - # setattr(runtime, 'cmd_memory', mem_mb[0]/1024.0) - ## If no memory_profiler package, run without recording memory - #except: out = function_handle(**args) if len(self._output_names) == 1: diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index bd68f72ade..60f235b5ab 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -44,10 +44,10 @@ def run_node(node, updatehash, runtime_profile=False): mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True) - runtime = (datetime.datetime.now() - start).total_seconds() + run_secs = (datetime.datetime.now() - start).total_seconds() result['result'] = retval result['node_memory'] = mem_mb[0]/1024.0 - result['run_seconds'] = runtime + result['run_seconds'] = run_secs result['cmd_memory'] = retval.runtime.get('cmd_memory') result['cmd_threads'] = retval.runtime.get('cmd_threads') except: @@ -118,7 +118,7 @@ def __init__(self, plugin_args=None): self.plugin_args = plugin_args self.processors = cpu_count() memory = psutil.virtual_memory() - self.memory = memory.total / (1024*1024*1024) + self.memory = float(memory.total) / (1024.0**3) if self.plugin_args: if 'non_daemon' in self.plugin_args: non_daemon = plugin_args['non_daemon'] From d714a0345434ec6660311e79dd4bde30ef6e8540 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Tue, 9 Feb 2016 16:57:17 -0500 Subject: [PATCH 17/37] Added check for runtime having 'get' attribute --- nipype/pipeline/plugins/multiproc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index 60f235b5ab..d529eb84c7 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -48,8 +48,9 @@ def run_node(node, updatehash, runtime_profile=False): result['result'] = retval result['node_memory'] = mem_mb[0]/1024.0 result['run_seconds'] = run_secs - result['cmd_memory'] = retval.runtime.get('cmd_memory') - result['cmd_threads'] = retval.runtime.get('cmd_threads') + if hasattr(retval.runtime, 'get'): + result['cmd_memory'] = retval.runtime.get('cmd_memory') + result['cmd_threads'] = retval.runtime.get('cmd_threads') except: etype, eval, etr = sys.exc_info() result['traceback'] = format_exception(etype,eval,etr) From 27ee192bc27f260935bab077a970cf325417553a Mon Sep 17 00:00:00 2001 From: dclark87 Date: Fri, 12 Feb 2016 12:10:01 -0500 Subject: [PATCH 18/37] Removed print statements --- nipype/pipeline/engine/nodes.py | 2 ++ nipype/pipeline/plugins/multiproc.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/nipype/pipeline/engine/nodes.py b/nipype/pipeline/engine/nodes.py index 9f9165e3b2..ce4c15278b 100644 --- a/nipype/pipeline/engine/nodes.py +++ b/nipype/pipeline/engine/nodes.py @@ -52,6 +52,7 @@ from ... import config, logging logger = logging.getLogger('workflow') + from ...interfaces.base import (traits, InputMultiPath, CommandLine, Undefined, TraitedSpec, DynamicTraitedSpec, Bunch, InterfaceResult, md5, Interface, @@ -670,6 +671,7 @@ def _copyfiles_to_wd(self, outdir, execute, linksonly=False): os.makedirs(outdir) for info in self._interface._get_filecopy_info(): files = self.inputs.get().get(info['key']) + print '######## files: %s' % (str(files)) if not isdefined(files): continue if files: diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index d529eb84c7..2ec3286bf8 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -265,7 +265,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None): self._remove_node_dirs() else: - logger.debug('submitting', jobid) + logger.debug('submitting %s' % str(jobid)) tid = self._submit_job(deepcopy(self.procs[jobid]), updatehash=updatehash) if tid is None: self.proc_done[jobid] = False From c99f834a0cd53a2ec8933444dbab7cd930da2262 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Fri, 12 Feb 2016 12:24:17 -0500 Subject: [PATCH 19/37] Removed more print statements and touched up some code to be more like nipy/master --- nipype/pipeline/engine/nodes.py | 1 - nipype/pipeline/engine/tests/test_engine.py | 3 ++- nipype/pipeline/plugins/base.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nipype/pipeline/engine/nodes.py b/nipype/pipeline/engine/nodes.py index ce4c15278b..63b9ae13f8 100644 --- a/nipype/pipeline/engine/nodes.py +++ b/nipype/pipeline/engine/nodes.py @@ -671,7 +671,6 @@ def _copyfiles_to_wd(self, outdir, execute, linksonly=False): os.makedirs(outdir) for info in self._interface._get_filecopy_info(): files = self.inputs.get().get(info['key']) - print '######## files: %s' % (str(files)) if not isdefined(files): continue if files: diff --git a/nipype/pipeline/engine/tests/test_engine.py b/nipype/pipeline/engine/tests/test_engine.py index 2f829abcd4..09f3ec92c2 100644 --- a/nipype/pipeline/engine/tests/test_engine.py +++ b/nipype/pipeline/engine/tests/test_engine.py @@ -714,7 +714,8 @@ def func1(in1): # set local check w1.config['execution'] = {'stop_on_first_crash': 'true', 'local_hash_check': 'true', - 'crashdump_dir': wd} + 'crashdump_dir': wd, + 'poll_sleep_duration' : 2} # test output of num_subnodes method when serial is default (False) yield assert_equal, n1.num_subnodes(), len(n1.inputs.in1) diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py index 48d62aa49b..092c1883f1 100644 --- a/nipype/pipeline/plugins/base.py +++ b/nipype/pipeline/plugins/base.py @@ -250,7 +250,7 @@ def run(self, graph, config, updatehash=False): self._clear_task(taskid) else: toappend.insert(0, (taskid, jobid)) - except Exception as exc: + except Exception: result = {'result': None, 'traceback': format_exc()} notrun.append(self._clean_queue(jobid, graph, From 07461cfe61a6c5d7bf7a5d9a9c1339018f999c24 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Fri, 12 Feb 2016 15:51:54 -0500 Subject: [PATCH 20/37] Added a fix for the recursive symlink bug (was happening because while loop in memory_profiler was executing node twice when it didnt finish running the first time --- nipype/pipeline/plugins/multiproc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index 2ec3286bf8..8f133faca0 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -43,7 +43,7 @@ def run_node(node, updatehash, runtime_profile=False): start = datetime.datetime.now() mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, - max_usage=True) + max_usage=True, interval=.9e-6) run_secs = (datetime.datetime.now() - start).total_seconds() result['result'] = retval result['node_memory'] = mem_mb[0]/1024.0 From 116a6a19d60e23fe831301b68381b99e3ac191c8 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Fri, 12 Feb 2016 17:34:32 -0500 Subject: [PATCH 21/37] Removed node.run level profiling --- nipype/pipeline/plugins/multiproc.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index 8f133faca0..11aecb86e9 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -6,6 +6,7 @@ http://stackoverflow.com/a/8963618/1183453 """ +# Import packages from multiprocessing import Process, Pool, cpu_count, pool from traceback import format_exception import sys @@ -13,12 +14,13 @@ from copy import deepcopy from ..engine import MapNode from ...utils.misc import str2bool -import datetime import psutil from ... import logging import semaphore_singleton from .base import (DistributedPluginBase, report_crash) +# Init logger +logger = logging.getLogger('workflow') # Run node def run_node(node, updatehash, runtime_profile=False): @@ -26,11 +28,7 @@ def run_node(node, updatehash, runtime_profile=False): """ # Import packages - try: - import memory_profiler - import datetime - except ImportError: - runtime_profile = False + import datetime # Init variables result = dict(result=None, traceback=None) @@ -38,15 +36,10 @@ def run_node(node, updatehash, runtime_profile=False): # If we're profiling the run if runtime_profile: try: - # Init function tuple - proc = (node.run, (), {'updatehash' : updatehash}) start = datetime.datetime.now() - mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, - include_children=True, - max_usage=True, interval=.9e-6) + retval = node.run(updatehash=updatehash) run_secs = (datetime.datetime.now() - start).total_seconds() result['result'] = retval - result['node_memory'] = mem_mb[0]/1024.0 result['run_seconds'] = run_secs if hasattr(retval.runtime, 'get'): result['cmd_memory'] = retval.runtime.get('cmd_memory') @@ -83,11 +76,11 @@ class NonDaemonPool(pool.Pool): """ Process = NonDaemonProcess -logger = logging.getLogger('workflow') def release_lock(args): semaphore_singleton.semaphore.release() + class ResourceMultiProcPlugin(DistributedPluginBase): """Execute workflow with multiprocessing, not sending more jobs at once than the system can support. From c1376c404b4202e205936c10b6627c2edffc7c05 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 17 Feb 2016 13:28:18 -0500 Subject: [PATCH 22/37] Updated keyword in result dictionary to runtime instead of cmd-level --- nipype/interfaces/afni/__init__.py | 4 +- nipype/interfaces/afni/preprocess.py | 99 +++++++++++++++++++++++++ nipype/interfaces/base.py | 4 +- nipype/pipeline/plugins/callback_log.py | 25 +++---- nipype/pipeline/plugins/multiproc.py | 6 +- 5 files changed, 116 insertions(+), 22 deletions(-) diff --git a/nipype/interfaces/afni/__init__.py b/nipype/interfaces/afni/__init__.py index 4437a3ccd2..8cc9b34a50 100644 --- a/nipype/interfaces/afni/__init__.py +++ b/nipype/interfaces/afni/__init__.py @@ -8,8 +8,8 @@ from .base import Info from .preprocess import (To3D, Refit, Resample, TStat, Automask, Volreg, Merge, - ZCutUp, Calc, TShift, Warp, Detrend, Despike, Copy, - Fourier, Allineate, Maskave, SkullStrip, TCat, Fim, + ZCutUp, Calc, TShift, Warp, Detrend, Despike, DegreeCentrality, + Copy, Fourier, Allineate, Maskave, SkullStrip, TCat, Fim, BlurInMask, Autobox, TCorrMap, Bandpass, Retroicor, TCorrelate, TCorr1D, BrickStat, ROIStats, AutoTcorrelate, AFNItoNIFTI, Eval, Means) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index 85f2a4eaf9..cdb02f6625 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -180,6 +180,7 @@ class RefitInputSpec(CommandLineInputSpec): ' template type, e.g. TLRC, MNI, ORIG') + class Refit(CommandLine): """Changes some of the information inside a 3D dataset's header @@ -506,6 +507,104 @@ class Despike(AFNICommand): output_spec = AFNICommandOutputSpec +class CentralityInputSpec(AFNICommandInputSpec): + """ + inherits the out_file parameter from AFNICommandOutputSpec base class + """ + + in_file = File(desc='input file to 3dDegreeCentrality', + argstr='%s', + position=-1, + mandatory=True, + exists=True, + copyfile=False) + + mask = File(desc='mask file to mask input data', + argstr="-mask %s", + exists=True) + + thresh = traits.Float(desc='threshold to exclude connections where corr <= thresh', + argstr='-thresh %f') + + polort = traits.Int(desc='', argstr='-polort %d') + + autoclip = traits.Bool(desc='Clip off low-intensity regions in the dataset', + argstr='-autoclip') + + automask = traits.Bool(desc='Mask the dataset to target brain-only voxels', + argstr='-automask') + + +class DegreeCentralityInputSpec(CentralityInputSpec): + """ + inherits the out_file parameter from AFNICommandOutputSpec base class + """ + + in_file = File(desc='input file to 3dDegreeCentrality', + argstr='%s', + position=-1, + mandatory=True, + exists=True, + copyfile=False) + + mask = File(desc='mask file to mask input data', + argstr="-mask %s", + exists=True) + + thresh = traits.Float(desc='threshold to exclude connections where corr <= thresh', + argstr='-thresh %f') + + sparsity = traits.Float(desc='only take the top percent of connections', + argstr='-sparsity %f') + + out_1d = traits.Str(desc='output filepath to text dump of correlation matrix', + argstr='-out1D') + + polort = traits.Int(desc='', argstr='-polort %d') + + autoclip = traits.Bool(desc='Clip off low-intensity regions in the dataset', + argstr='-autoclip') + + automask = traits.Bool(desc='Mask the dataset to target brain-only voxels', + argstr='-automask') + + +class DegreeCentralityOutputSpec(AFNICommandOutputSpec): + """ + inherits the out_file parameter from AFNICommandOutputSpec base class + """ + + one_d_file = File(desc='The text output of the similarity matrix computed'\ + 'after thresholding with one-dimensional and '\ + 'ijk voxel indices, correlations, image extents, '\ + 'and affine matrix') + + +class DegreeCentrality(AFNICommand): + """Performs degree centrality on a dataset using a given maskfile + via 3dDegreeCentrality + + For complete details, see the `3dDegreeCentrality Documentation. + + + Examples + ======== + + >>> from nipype.interfaces import afni as afni + >>> degree = afni.DegreeCentrality() + >>> degree.inputs.in_file = 'func_preproc.nii' + >>> degree.inputs.mask = 'mask.nii' + >>> degree.inputs.sparsity = 1 # keep the top one percent of connections + >>> degree.cmdline + '3dDegreeCentrality -sparsity 1 -mask mask.nii func_preproc.nii' + >>> res = degree.run() # doctest: +SKIP + """ + + _cmd = '3dDegreeCentrality' + input_spec = DegreeCentralityInputSpec + output_spec = DegreeCentralityOutputSpec + + class AutomaskInputSpec(AFNICommandInputSpec): in_file = File(desc='input file to 3dAutomask', argstr='%s', diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index ab27497bb4..14c57e406f 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -1375,8 +1375,8 @@ def _process(drain=0): result['stderr'] = [] result['merged'] = '' - setattr(runtime, 'cmd_memory', mem_mb/1024.0) - setattr(runtime, 'cmd_threads', num_threads) + setattr(runtime, 'runtime_memory', mem_mb/1024.0) + setattr(runtime, 'runtime_threads', num_threads) runtime.stderr = '\n'.join(result['stderr']) runtime.stdout = '\n'.join(result['stdout']) runtime.merged = result['merged'] diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py index 9495e77410..548b98f342 100644 --- a/nipype/pipeline/plugins/callback_log.py +++ b/nipype/pipeline/plugins/callback_log.py @@ -10,24 +10,20 @@ def log_nodes_cb(node, status, result=None): # Check runtime profile stats if result is None: - node_mem = cmd_mem = run_seconds = cmd_threads = 'N/A' + runtime_memory = runtime_seconds = runtime_threads = 'N/A' else: try: - node_mem = result['node_memory'] + runtime_memory = result['runtime_memory'] except KeyError: - node_mem = 'Unknown' + runtime_memory = 'Unknown' try: - cmd_mem = result['cmd_memory'] + runtime_seconds = result['runtime_seconds'] except KeyError: - cmd_mem = 'Unknown' + runtime_seconds = 'Unknown' try: - run_seconds = result['run_seconds'] - except KeyError: - run_seconds = 'Unknown' - try: - cmd_threads = result['cmd_threads'] + runtime_threads = result['runtime_threads'] except: - cmd_threads = 'Unknown' + runtime_threads = 'Unknown' # Check status and write to log # Start @@ -44,10 +40,9 @@ def log_nodes_cb(node, status, result=None): node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) + \ '"' + ',"estimated_memory":' + '"'+ str(node._interface.estimated_memory) + '"'+ \ ',"num_threads":' + '"'+ str(node._interface.num_threads) + '"'+ \ - ',"cmd-level_threads":' + '"'+ str(cmd_threads) + '"'+ \ - ',"node-level_memory":' + '"'+ str(node_mem) + '"'+ \ - ',"cmd-level_memory":' + '"'+ str(cmd_mem) + '"' + \ - ',"run_seconds":' + '"'+ str(run_seconds) + '"'+ '}' + ',"runtime_threads":' + '"'+ str(runtime_threads) + '"'+ \ + ',"runtime_memory":' + '"'+ str(runtime_memory) + '"' + \ + ',"runtime_seconds":' + '"'+ str(runtime_seconds) + '"'+ '}' logger.debug(message) # Other diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index 11aecb86e9..b34f9944c5 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -40,10 +40,10 @@ def run_node(node, updatehash, runtime_profile=False): retval = node.run(updatehash=updatehash) run_secs = (datetime.datetime.now() - start).total_seconds() result['result'] = retval - result['run_seconds'] = run_secs + result['runtime_seconds'] = run_secs if hasattr(retval.runtime, 'get'): - result['cmd_memory'] = retval.runtime.get('cmd_memory') - result['cmd_threads'] = retval.runtime.get('cmd_threads') + result['runtime_memory'] = retval.runtime.get('runtime_memory') + result['runtime_threads'] = retval.runtime.get('runtime_threads') except: etype, eval, etr = sys.exc_info() result['traceback'] = format_exception(etype,eval,etr) From 2ce2661c15da6151d8822ec14f5107caa51ad5c0 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 18 Feb 2016 11:41:49 -0500 Subject: [PATCH 23/37] Added afni centrality interface --- nipype/interfaces/afni/__init__.py | 4 +- nipype/interfaces/afni/preprocess.py | 84 ++++++++++++++++++---------- 2 files changed, 58 insertions(+), 30 deletions(-) diff --git a/nipype/interfaces/afni/__init__.py b/nipype/interfaces/afni/__init__.py index 8cc9b34a50..d2f77ee74a 100644 --- a/nipype/interfaces/afni/__init__.py +++ b/nipype/interfaces/afni/__init__.py @@ -9,8 +9,8 @@ from .base import Info from .preprocess import (To3D, Refit, Resample, TStat, Automask, Volreg, Merge, ZCutUp, Calc, TShift, Warp, Detrend, Despike, DegreeCentrality, - Copy, Fourier, Allineate, Maskave, SkullStrip, TCat, Fim, - BlurInMask, Autobox, TCorrMap, Bandpass, Retroicor, + LFCD, Copy, Fourier, Allineate, Maskave, SkullStrip, TCat, + Fim, BlurInMask, Autobox, TCorrMap, Bandpass, Retroicor, TCorrelate, TCorr1D, BrickStat, ROIStats, AutoTcorrelate, AFNItoNIFTI, Eval, Means) from .svm import (SVMTest, SVMTrain) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index cdb02f6625..9362c34b20 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -512,13 +512,6 @@ class CentralityInputSpec(AFNICommandInputSpec): inherits the out_file parameter from AFNICommandOutputSpec base class """ - in_file = File(desc='input file to 3dDegreeCentrality', - argstr='%s', - position=-1, - mandatory=True, - exists=True, - copyfile=False) - mask = File(desc='mask file to mask input data', argstr="-mask %s", exists=True) @@ -547,26 +540,11 @@ class DegreeCentralityInputSpec(CentralityInputSpec): exists=True, copyfile=False) - mask = File(desc='mask file to mask input data', - argstr="-mask %s", - exists=True) - - thresh = traits.Float(desc='threshold to exclude connections where corr <= thresh', - argstr='-thresh %f') - sparsity = traits.Float(desc='only take the top percent of connections', argstr='-sparsity %f') - out_1d = traits.Str(desc='output filepath to text dump of correlation matrix', - argstr='-out1D') - - polort = traits.Int(desc='', argstr='-polort %d') - - autoclip = traits.Bool(desc='Clip off low-intensity regions in the dataset', - argstr='-autoclip') - - automask = traits.Bool(desc='Mask the dataset to target brain-only voxels', - argstr='-automask') + oned_file = traits.Str(desc='output filepath to text dump of correlation matrix', + argstr='-out1D %s', mandatory=False) class DegreeCentralityOutputSpec(AFNICommandOutputSpec): @@ -574,10 +552,10 @@ class DegreeCentralityOutputSpec(AFNICommandOutputSpec): inherits the out_file parameter from AFNICommandOutputSpec base class """ - one_d_file = File(desc='The text output of the similarity matrix computed'\ - 'after thresholding with one-dimensional and '\ - 'ijk voxel indices, correlations, image extents, '\ - 'and affine matrix') + oned_file = File(desc='The text output of the similarity matrix computed'\ + 'after thresholding with one-dimensional and '\ + 'ijk voxel indices, correlations, image extents, '\ + 'and affine matrix') class DegreeCentrality(AFNICommand): @@ -604,6 +582,56 @@ class DegreeCentrality(AFNICommand): input_spec = DegreeCentralityInputSpec output_spec = DegreeCentralityOutputSpec + # Re-define generated inputs + def _list_outputs(self): + # Import packages + import os + + # Update outputs dictionary if oned file is defined + outputs = super(DegreeCentrality, self)._list_outputs() + if self.inputs.oned_file: + outputs['oned_file'] = os.path.abspath(self.inputs.oned_file) + + return outputs + + +class LFCDInputSpec(CentralityInputSpec): + """ + inherits the out_file parameter from AFNICommandOutputSpec base class + """ + + in_file = File(desc='input file to 3dLFCD', + argstr='%s', + position=-1, + mandatory=True, + exists=True, + copyfile=False) + + +class LFCD(AFNICommand): + """Performs degree centrality on a dataset using a given maskfile + via 3dLFCD + + For complete details, see the `3dLFCD Documentation. + + + Examples + ======== + + >>> from nipype.interfaces import afni as afni + >>> lfcd = afni.LFCD() + >>> lfcd.inputs.in_file = 'func_preproc.nii' + >>> lfcd.inputs.mask = 'mask.nii' + >>> lfcd.inputs.threshold = .8 # keep all connections with corr >= 0.8 + >>> lfcd.cmdline + '3dLFCD -threshold 0.8 -mask mask.nii func_preproc.nii' + >>> res = lfcd.run() # doctest: +SKIP + """ + + _cmd = '3dLFCD' + input_spec = LFCDInputSpec + output_spec = AFNICommandOutputSpec + class AutomaskInputSpec(AFNICommandInputSpec): in_file = File(desc='input file to 3dAutomask', From 05c95b1f7357c66976ba2f719100eb5fd2c7a4fe Mon Sep 17 00:00:00 2001 From: sgiavasis Date: Tue, 23 Feb 2016 15:08:56 -0500 Subject: [PATCH 24/37] Added interface for 3dClipLevel. --- nipype/interfaces/afni/preprocess.py | 74 ++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index c88fa02506..f610e0867d 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -1513,6 +1513,80 @@ def aggregate_outputs(self, runtime=None, needed_outputs=None): return outputs +class ClipLevelInputSpec(CommandLineInputSpec): + in_file = File(desc='input file to 3dClipLevel', + argstr='%s', + position=-1, + mandatory=True, + exists=True) + + mfrac = traits.Float(desc='Use the number ff instead of 0.50 in the algorithm', + argstr='-mfrac %s', + position=2) + + doall = traits.Bool(desc='Apply the algorithm to each sub-brick separately', + argstr='-doall', + position=3, + xor=('grad')) + + grad = traits.File(desc='also compute a \'gradual\' clip level as a function of voxel position, and output that to a dataset', + argstr='-grad %s', + position=3, + xor=('doall')) + + +class ClipLevelOutputSpec(TraitedSpec): + clip_val = traits.Float(desc='output') + + +class ClipLevel(AFNICommandBase): + """Compute maximum and/or minimum voxel values of an input dataset + + For complete details, see the `3dClipLevel Documentation. + `_ + + Examples + ======== + + >>> from nipype.interfaces.afni import preprocess + >>> cliplevel = preprocess.ClipLevel() + >>> cliplevel.inputs.in_file = 'anatomical.nii' + >>> res = cliplevel.run() # doctest: +SKIP + + """ + _cmd = '3dClipLevel' + input_spec = ClipLevelInputSpec + output_spec = ClipLevelOutputSpec + + def aggregate_outputs(self, runtime=None, needed_outputs=None): + + outputs = self._outputs() + + outfile = os.path.join(os.getcwd(), 'stat_result.json') + + if runtime is None: + try: + clip_val = load_json(outfile)['stat'] + except IOError: + return self.run().outputs + else: + clip_val = [] + for line in runtime.stdout.split('\n'): + if line: + values = line.split() + if len(values) > 1: + clip_val.append([float(val) for val in values]) + else: + clip_val.extend([float(val) for val in values]) + + if len(clip_val) == 1: + clip_val = clip_val[0] + save_json(outfile, dict(stat=clip_val)) + outputs.clip_val = clip_val + + return outputs + + class ROIStatsInputSpec(CommandLineInputSpec): in_file = File(desc='input file to 3dROIstats', argstr='%s', From 7ec5538626af6849fd6f80e7195be9d28f83f832 Mon Sep 17 00:00:00 2001 From: sgiavasis Date: Tue, 23 Feb 2016 18:27:27 -0500 Subject: [PATCH 25/37] Added an interface for 3dSeg. --- nipype/interfaces/afni/preprocess.py | 85 +++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index f610e0867d..dbe4dd4b5e 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -1540,7 +1540,8 @@ class ClipLevelOutputSpec(TraitedSpec): class ClipLevel(AFNICommandBase): - """Compute maximum and/or minimum voxel values of an input dataset + """Estimates the value at which to clip the anatomical dataset so + that background regions are set to zero. For complete details, see the `3dClipLevel Documentation. `_ @@ -1587,6 +1588,88 @@ def aggregate_outputs(self, runtime=None, needed_outputs=None): return outputs +class SegInputSpec(CommandLineInputSpec): + in_file = File(desc='ANAT is the volume to segment', + argstr='-anat %s', + position=-1, + mandatory=True, + exists=True, + copyfile=True) + + mask = traits.Str(desc='only non-zero voxels in mask are analyzed. mask can either be a dataset or the string \'AUTO\' which would use AFNI\'s automask function to create the mask.', + argstr='-mask %s', + position=-2, + mandatory=True, + exists=True) + + blur_meth = traits.Enum('BFT', 'BIM', + argstr='-blur_meth %s', + desc='set the blurring method for bias field estimation') + + bias_fwhm = traits.Float(desc='The amount of blurring used when estimating the field bias with the Wells method', + argstr='-bias_fwhm %f') + + classes = traits.Str(desc='CLASS_STRING is a semicolon delimited string of class labels', + argstr='-classes %s') + + bmrf = traits.Float(desc='Weighting factor controlling spatial homogeneity of the classifications', + argstr='-bmrf %f') + + bias_classes = traits.Str(desc='A semcolon demlimited string of classes that contribute to the estimation of the bias field', + argstr='-bias_classes %s') + + prefix = traits.Str(desc='the prefix for the output folder containing all output volumes', + argstr='-prefix %s') + + mixfrac = traits.Str(desc='MIXFRAC sets up the volume-wide (within mask) tissue fractions while initializing the segmentation (see IGNORE for exception)', + argstr='-mixfrac %s') + + mixfloor = traits.Float(desc='Set the minimum value for any class\'s mixing fraction', + argstr='-mixfloor %f') + + main_N = traits.Int(desc='Number of iterations to perform.', + argstr='-main_N %d') + + +class Seg(AFNICommandBase): + """3dSeg segments brain volumes into tissue classes. The program allows + for adding a variety of global and voxelwise priors. However for the + moment, only mixing fractions and MRF are documented. + + For complete details, see the `3dSeg Documentation. + + + Examples + ======== + + >>> from nipype.interfaces.afni import preprocess + >>> seg = preprocess.Seg() + >>> seg.inputs.in_file = 'structural.nii' + >>> seg.inputs.mask = 'AUTO' + >>> res = seg.run() + '3drefit -deoblique structural.nii' + >>> res = refit.run() # doctest: +SKIP + + """ + + _cmd = '3dSeg' + input_spec = SegInputSpec + output_spec = AFNICommandOutputSpec + + def aggregate_outputs(self, runtime=None, needed_outputs=None): + + outputs = self._outputs() + + if isdefined(self.inputs.prefix): + outfile = os.path.join(os.getcwd(), self.inputs.prefix, 'Classes+orig.BRIK') + else: + outfile = os.path.join(os.getcwd(), 'Segsy', 'Classes+orig.BRIK') + + outputs.out_file = outfile + + return outputs + + class ROIStatsInputSpec(CommandLineInputSpec): in_file = File(desc='input file to 3dROIstats', argstr='%s', From d674756d81165b6bcb40e9de7843d97e6d02c23f Mon Sep 17 00:00:00 2001 From: sgiavasis Date: Fri, 26 Feb 2016 14:56:29 -0500 Subject: [PATCH 26/37] Added an interface for AFNI's 3dmask_tool. --- nipype/interfaces/afni/preprocess.py | 93 +++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 3 deletions(-) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index dbe4dd4b5e..4c46dbb964 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -1588,6 +1588,95 @@ def aggregate_outputs(self, runtime=None, needed_outputs=None): return outputs +class MaskToolInputSpec(AFNICommandInputSpec): + in_file = File(desc='input file or files to 3dmask_tool', + argstr='-input %s', + position=-1, + mandatory=True, + exists=True, + copyfile=False) + + out_file = File(name_template="%s_mask", desc='output image file name', + argstr='-prefix %s', name_source="in_file") + + count = traits.Bool(desc='Instead of created a binary 0/1 mask dataset, '+ + 'create one with. counts of voxel overlap, i.e '+ + 'each voxel will contain the number of masks ' + + 'that it is set in.', + argstr='-count', + position=2) + + datum = traits.Enum('byte','short','float', + argstr='-datum %s', + desc='specify data type for output. Valid types are '+ + '\'byte\', \'short\' and \'float\'.') + + dilate_inputs = traits.Str(desc='Use this option to dilate and/or erode '+ + 'datasets as they are read. ex. ' + + '\'5 -5\' to dilate and erode 5 times', + argstr='-dilate_inputs %s') + + dilate_results = traits.Str(desc='dilate and/or erode combined mask at ' + + 'the given levels.', + argstr='-dilate_results %s') + + frac = traits.Float(desc='When combining masks (across datasets and ' + + 'sub-bricks), use this option to restrict the ' + + 'result to a certain fraction of the set of ' + + 'volumes', + argstr='-frac %s') + + inter = traits.Bool(desc='intersection, this means -frac 1.0', + argstr='-inter') + + union = traits.Bool(desc='union, this means -frac 0', + argstr='-union') + + fill_holes = traits.Bool(desc='This option can be used to fill holes ' + + 'in the resulting mask, i.e. after all ' + + 'other processing has been done.', + argstr='-fill_holes') + + fill_dirs = traits.Str(desc='fill holes only in the given directions. ' + + 'This option is for use with -fill holes. ' + + 'should be a single string that specifies ' + + '1-3 of the axes using {x,y,z} labels (i.e. '+ + 'dataset axis order), or using the labels ' + + 'in {R,L,A,P,I,S}.', + argstr='-fill_dirs %s', + requires=['fill_holes']) + + +class MaskToolOutputSpec(TraitedSpec): + out_file = File(desc='mask file', + exists=True) + + +class MaskTool(AFNICommand): + """3dmask_tool - for combining/dilating/eroding/filling masks + + For complete details, see the `3dmask_tool Documentation. + `_ + + Examples + ======== + + >>> from nipype.interfaces import afni as afni + >>> automask = afni.Automask() + >>> automask.inputs.in_file = 'functional.nii' + >>> automask.inputs.dilate = 1 + >>> automask.inputs.outputtype = "NIFTI" + >>> automask.cmdline #doctest: +ELLIPSIS + '3dAutomask -apply_prefix functional_masked.nii -dilate 1 -prefix functional_mask.nii functional.nii' + >>> res = automask.run() # doctest: +SKIP + + """ + + _cmd = '3dmask_tool' + input_spec = MaskToolInputSpec + output_spec = MaskToolOutputSpec + + class SegInputSpec(CommandLineInputSpec): in_file = File(desc='ANAT is the volume to segment', argstr='-anat %s', @@ -1646,9 +1735,7 @@ class Seg(AFNICommandBase): >>> seg = preprocess.Seg() >>> seg.inputs.in_file = 'structural.nii' >>> seg.inputs.mask = 'AUTO' - >>> res = seg.run() - '3drefit -deoblique structural.nii' - >>> res = refit.run() # doctest: +SKIP + >>> res = seg.run() # doctest: +SKIP """ From c043685301e36671c002f767b126b4226f2f437c Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 17 Mar 2016 17:04:33 -0400 Subject: [PATCH 27/37] Added small docstring comment --- nipype/interfaces/afni/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index 9362c34b20..778041423c 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -610,7 +610,7 @@ class LFCDInputSpec(CentralityInputSpec): class LFCD(AFNICommand): """Performs degree centrality on a dataset using a given maskfile - via 3dLFCD + via the 3dLFCD command For complete details, see the `3dLFCD Documentation. From 8e88e098fd3f5b418d530569d72cb2a1e4012440 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 23 Mar 2016 16:20:31 -0400 Subject: [PATCH 28/37] Added 3dECM interface --- nipype/interfaces/afni/preprocess.py | 97 ++++++++++++++++++++++++---- 1 file changed, 86 insertions(+), 11 deletions(-) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index 2c8037f3fb..8e20952593 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -509,9 +509,9 @@ class Despike(AFNICommand): class CentralityInputSpec(AFNICommandInputSpec): + """Common input spec class for all centrality-related commmands """ - inherits the out_file parameter from AFNICommandOutputSpec base class - """ + mask = File(desc='mask file to mask input data', argstr="-mask %s", @@ -530,8 +530,7 @@ class CentralityInputSpec(AFNICommandInputSpec): class DegreeCentralityInputSpec(CentralityInputSpec): - """ - inherits the out_file parameter from AFNICommandOutputSpec base class + """DegreeCentrality inputspec """ in_file = File(desc='input file to 3dDegreeCentrality', @@ -549,8 +548,7 @@ class DegreeCentralityInputSpec(CentralityInputSpec): class DegreeCentralityOutputSpec(AFNICommandOutputSpec): - """ - inherits the out_file parameter from AFNICommandOutputSpec base class + """DegreeCentrality outputspec """ oned_file = File(desc='The text output of the similarity matrix computed'\ @@ -574,8 +572,9 @@ class DegreeCentrality(AFNICommand): >>> degree.inputs.in_file = 'func_preproc.nii' >>> degree.inputs.mask = 'mask.nii' >>> degree.inputs.sparsity = 1 # keep the top one percent of connections + >>> degree.inputs.out_file = 'out.nii' >>> degree.cmdline - '3dDegreeCentrality -sparsity 1 -mask mask.nii func_preproc.nii' + '3dDegreeCentrality -sparsity 1 -mask mask.nii -prefix out.nii func_preproc.nii' >>> res = degree.run() # doctest: +SKIP """ @@ -596,9 +595,84 @@ def _list_outputs(self): return outputs -class LFCDInputSpec(CentralityInputSpec): +class ECMInputSpec(CentralityInputSpec): + """ECM inputspec """ - inherits the out_file parameter from AFNICommandOutputSpec base class + + in_file = File(desc='input file to 3dECM', + argstr='%s', + position=-1, + mandatory=True, + exists=True, + copyfile=False) + + sparsity = traits.Float(desc='only take the top percent of connections', + argstr='-sparsity %f') + + full = traits.Bool(desc='Full power method; enables thresholding; '\ + 'automatically selected if -thresh or -sparsity '\ + 'are set', + argstr='-full') + + fecm = traits.Bool(desc='Fast centrality method; substantial speed '\ + 'increase but cannot accomodate thresholding; '\ + 'automatically selected if -thresh or -sparsity '\ + 'are not set', + argstr='-fecm') + + shift = traits.Float(desc='shift correlation coefficients in similarity '\ + 'matrix to enforce non-negativity, s >= 0.0; '\ + 'default = 0.0 for -full, 1.0 for -fecm', + argstr='-shift %f') + + scale = traits.Float(desc='scale correlation coefficients in similarity '\ + 'matrix to after shifting, x >= 0.0; '\ + 'default = 1.0 for -full, 0.5 for -fecm', + argstr='-scale %f') + + eps = traits.Float(desc='sets the stopping criterion for the power '\ + 'iteration; l2|v_old - v_new| < eps*|v_old|; '\ + 'default = 0.001', + argstr='-eps %f') + + max_iter = traits.Int(desc='sets the maximum number of iterations to use '\ + 'in the power iteration; default = 1000', + argstr='-max_iter %d') + + memory = traits.Float(desc='Limit memory consumption on system by setting '\ + 'the amount of GB to limit the algorithm to; '\ + 'default = 2GB', + argstr='-memory %f') + + +class ECM(AFNICommand): + """Performs degree centrality on a dataset using a given maskfile + via the 3dLFCD command + + For complete details, see the `3dECM Documentation. + + + Examples + ======== + + >>> from nipype.interfaces import afni as afni + >>> ecm = afni.ECM() + >>> ecm.inputs.in_file = 'func_preproc.nii' + >>> ecm.inputs.mask = 'mask.nii' + >>> ecm.inputs.sparsity = 0.1 # keep top 0.1% of connections + >>> ecm.inputs.out_file = 'out.nii' + >>> ecm.cmdline + '3dECM -sparsity 0.1 -mask mask.nii -prefix out.nii func_preproc.nii' + >>> res = ecm.run() # doctest: +SKIP + """ + + _cmd = '3dECM' + input_spec = ECMInputSpec + output_spec = AFNICommandOutputSpec + + +class LFCDInputSpec(CentralityInputSpec): + """LFCD inputspec """ in_file = File(desc='input file to 3dLFCD', @@ -623,9 +697,10 @@ class LFCD(AFNICommand): >>> lfcd = afni.LFCD() >>> lfcd.inputs.in_file = 'func_preproc.nii' >>> lfcd.inputs.mask = 'mask.nii' - >>> lfcd.inputs.threshold = .8 # keep all connections with corr >= 0.8 + >>> lfcd.inputs.thresh = 0.8 # keep all connections with corr >= 0.8 + >>> lfcd.inputs.out_file = 'out.nii' >>> lfcd.cmdline - '3dLFCD -threshold 0.8 -mask mask.nii func_preproc.nii' + '3dLFCD -thresh 0.8 -mask mask.nii -prefix out.nii func_preproc.nii' >>> res = lfcd.run() # doctest: +SKIP """ From 1db5495cc1ace7c259d6ca24483a1627fed2d775 Mon Sep 17 00:00:00 2001 From: sgiavasis Date: Thu, 21 Apr 2016 16:40:20 -0400 Subject: [PATCH 29/37] 3dAllineate interface out_matrix output file handling fixed. --- nipype/interfaces/afni/preprocess.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index ead47d9eb6..b0e43b04c4 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -1350,6 +1350,10 @@ def _list_outputs(self): suffix=self.inputs.suffix) else: outputs['out_file'] = os.path.abspath(self.inputs.out_file) + + if isdefined(self.inputs.out_matrix): + outputs['matrix'] = os.path.abspath(os.path.join(os.getcwd(),\ + self.inputs.out_matrix +".aff12.1D")) return outputs def _gen_filename(self, name): From d8119d34eb44f55cdd61630867f88f891c1999cf Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 27 Apr 2016 13:48:45 -0400 Subject: [PATCH 30/37] Auto-generated unit tests for new afni interfaces --- .../afni/tests/test_auto_ClipLevel.py | 47 +++++++++++++ .../afni/tests/test_auto_DegreeCentrality.py | 58 ++++++++++++++++ nipype/interfaces/afni/tests/test_auto_ECM.py | 68 +++++++++++++++++++ .../interfaces/afni/tests/test_auto_LFCD.py | 52 ++++++++++++++ .../afni/tests/test_auto_MaskTool.py | 62 +++++++++++++++++ nipype/interfaces/afni/tests/test_auto_Seg.py | 60 ++++++++++++++++ 6 files changed, 347 insertions(+) create mode 100644 nipype/interfaces/afni/tests/test_auto_ClipLevel.py create mode 100644 nipype/interfaces/afni/tests/test_auto_DegreeCentrality.py create mode 100644 nipype/interfaces/afni/tests/test_auto_ECM.py create mode 100644 nipype/interfaces/afni/tests/test_auto_LFCD.py create mode 100644 nipype/interfaces/afni/tests/test_auto_MaskTool.py create mode 100644 nipype/interfaces/afni/tests/test_auto_Seg.py diff --git a/nipype/interfaces/afni/tests/test_auto_ClipLevel.py b/nipype/interfaces/afni/tests/test_auto_ClipLevel.py new file mode 100644 index 0000000000..f6e5ae3e98 --- /dev/null +++ b/nipype/interfaces/afni/tests/test_auto_ClipLevel.py @@ -0,0 +1,47 @@ +# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT +from ....testing import assert_equal +from ..preprocess import ClipLevel + + +def test_ClipLevel_inputs(): + input_map = dict(args=dict(argstr='%s', + ), + doall=dict(argstr='-doall', + position=3, + xor='grad', + ), + environ=dict(nohash=True, + usedefault=True, + ), + grad=dict(argstr='-grad %s', + position=3, + xor='doall', + ), + ignore_exception=dict(nohash=True, + usedefault=True, + ), + in_file=dict(argstr='%s', + mandatory=True, + position=-1, + ), + mfrac=dict(argstr='-mfrac %s', + position=2, + ), + terminal_output=dict(nohash=True, + ), + ) + inputs = ClipLevel.input_spec() + + for key, metadata in list(input_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(inputs.traits()[key], metakey), value + + +def test_ClipLevel_outputs(): + output_map = dict(clip_val=dict(), + ) + outputs = ClipLevel.output_spec() + + for key, metadata in list(output_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(outputs.traits()[key], metakey), value diff --git a/nipype/interfaces/afni/tests/test_auto_DegreeCentrality.py b/nipype/interfaces/afni/tests/test_auto_DegreeCentrality.py new file mode 100644 index 0000000000..36f446cbda --- /dev/null +++ b/nipype/interfaces/afni/tests/test_auto_DegreeCentrality.py @@ -0,0 +1,58 @@ +# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT +from ....testing import assert_equal +from ..preprocess import DegreeCentrality + + +def test_DegreeCentrality_inputs(): + input_map = dict(args=dict(argstr='%s', + ), + autoclip=dict(argstr='-autoclip', + ), + automask=dict(argstr='-automask', + ), + environ=dict(nohash=True, + usedefault=True, + ), + ignore_exception=dict(nohash=True, + usedefault=True, + ), + in_file=dict(argstr='%s', + copyfile=False, + mandatory=True, + position=-1, + ), + mask=dict(argstr='-mask %s', + ), + oned_file=dict(argstr='-out1D %s', + mandatory=False, + ), + out_file=dict(argstr='-prefix %s', + name_source=['in_file'], + name_template='%s_afni', + ), + outputtype=dict(), + polort=dict(argstr='-polort %d', + ), + sparsity=dict(argstr='-sparsity %f', + ), + terminal_output=dict(nohash=True, + ), + thresh=dict(argstr='-thresh %f', + ), + ) + inputs = DegreeCentrality.input_spec() + + for key, metadata in list(input_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(inputs.traits()[key], metakey), value + + +def test_DegreeCentrality_outputs(): + output_map = dict(oned_file=dict(), + out_file=dict(), + ) + outputs = DegreeCentrality.output_spec() + + for key, metadata in list(output_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(outputs.traits()[key], metakey), value diff --git a/nipype/interfaces/afni/tests/test_auto_ECM.py b/nipype/interfaces/afni/tests/test_auto_ECM.py new file mode 100644 index 0000000000..0af69ab986 --- /dev/null +++ b/nipype/interfaces/afni/tests/test_auto_ECM.py @@ -0,0 +1,68 @@ +# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT +from ....testing import assert_equal +from ..preprocess import ECM + + +def test_ECM_inputs(): + input_map = dict(args=dict(argstr='%s', + ), + autoclip=dict(argstr='-autoclip', + ), + automask=dict(argstr='-automask', + ), + environ=dict(nohash=True, + usedefault=True, + ), + eps=dict(argstr='-eps %f', + ), + fecm=dict(argstr='-fecm', + ), + full=dict(argstr='-full', + ), + ignore_exception=dict(nohash=True, + usedefault=True, + ), + in_file=dict(argstr='%s', + copyfile=False, + mandatory=True, + position=-1, + ), + mask=dict(argstr='-mask %s', + ), + max_iter=dict(argstr='-max_iter %d', + ), + memory=dict(argstr='-memory %f', + ), + out_file=dict(argstr='-prefix %s', + name_source=['in_file'], + name_template='%s_afni', + ), + outputtype=dict(), + polort=dict(argstr='-polort %d', + ), + scale=dict(argstr='-scale %f', + ), + shift=dict(argstr='-shift %f', + ), + sparsity=dict(argstr='-sparsity %f', + ), + terminal_output=dict(nohash=True, + ), + thresh=dict(argstr='-thresh %f', + ), + ) + inputs = ECM.input_spec() + + for key, metadata in list(input_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(inputs.traits()[key], metakey), value + + +def test_ECM_outputs(): + output_map = dict(out_file=dict(), + ) + outputs = ECM.output_spec() + + for key, metadata in list(output_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(outputs.traits()[key], metakey), value diff --git a/nipype/interfaces/afni/tests/test_auto_LFCD.py b/nipype/interfaces/afni/tests/test_auto_LFCD.py new file mode 100644 index 0000000000..371bce8b8d --- /dev/null +++ b/nipype/interfaces/afni/tests/test_auto_LFCD.py @@ -0,0 +1,52 @@ +# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT +from ....testing import assert_equal +from ..preprocess import LFCD + + +def test_LFCD_inputs(): + input_map = dict(args=dict(argstr='%s', + ), + autoclip=dict(argstr='-autoclip', + ), + automask=dict(argstr='-automask', + ), + environ=dict(nohash=True, + usedefault=True, + ), + ignore_exception=dict(nohash=True, + usedefault=True, + ), + in_file=dict(argstr='%s', + copyfile=False, + mandatory=True, + position=-1, + ), + mask=dict(argstr='-mask %s', + ), + out_file=dict(argstr='-prefix %s', + name_source=['in_file'], + name_template='%s_afni', + ), + outputtype=dict(), + polort=dict(argstr='-polort %d', + ), + terminal_output=dict(nohash=True, + ), + thresh=dict(argstr='-thresh %f', + ), + ) + inputs = LFCD.input_spec() + + for key, metadata in list(input_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(inputs.traits()[key], metakey), value + + +def test_LFCD_outputs(): + output_map = dict(out_file=dict(), + ) + outputs = LFCD.output_spec() + + for key, metadata in list(output_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(outputs.traits()[key], metakey), value diff --git a/nipype/interfaces/afni/tests/test_auto_MaskTool.py b/nipype/interfaces/afni/tests/test_auto_MaskTool.py new file mode 100644 index 0000000000..005a915ead --- /dev/null +++ b/nipype/interfaces/afni/tests/test_auto_MaskTool.py @@ -0,0 +1,62 @@ +# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT +from ....testing import assert_equal +from ..preprocess import MaskTool + + +def test_MaskTool_inputs(): + input_map = dict(args=dict(argstr='%s', + ), + count=dict(argstr='-count', + position=2, + ), + datum=dict(argstr='-datum %s', + ), + dilate_inputs=dict(argstr='-dilate_inputs %s', + ), + dilate_results=dict(argstr='-dilate_results %s', + ), + environ=dict(nohash=True, + usedefault=True, + ), + fill_dirs=dict(argstr='-fill_dirs %s', + requires=['fill_holes'], + ), + fill_holes=dict(argstr='-fill_holes', + ), + frac=dict(argstr='-frac %s', + ), + ignore_exception=dict(nohash=True, + usedefault=True, + ), + in_file=dict(argstr='-input %s', + copyfile=False, + mandatory=True, + position=-1, + ), + inter=dict(argstr='-inter', + ), + out_file=dict(argstr='-prefix %s', + name_source='in_file', + name_template='%s_mask', + ), + outputtype=dict(), + terminal_output=dict(nohash=True, + ), + union=dict(argstr='-union', + ), + ) + inputs = MaskTool.input_spec() + + for key, metadata in list(input_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(inputs.traits()[key], metakey), value + + +def test_MaskTool_outputs(): + output_map = dict(out_file=dict(), + ) + outputs = MaskTool.output_spec() + + for key, metadata in list(output_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(outputs.traits()[key], metakey), value diff --git a/nipype/interfaces/afni/tests/test_auto_Seg.py b/nipype/interfaces/afni/tests/test_auto_Seg.py new file mode 100644 index 0000000000..3a84f00ced --- /dev/null +++ b/nipype/interfaces/afni/tests/test_auto_Seg.py @@ -0,0 +1,60 @@ +# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT +from ....testing import assert_equal +from ..preprocess import Seg + + +def test_Seg_inputs(): + input_map = dict(args=dict(argstr='%s', + ), + bias_classes=dict(argstr='-bias_classes %s', + ), + bias_fwhm=dict(argstr='-bias_fwhm %f', + ), + blur_meth=dict(argstr='-blur_meth %s', + ), + bmrf=dict(argstr='-bmrf %f', + ), + classes=dict(argstr='-classes %s', + ), + environ=dict(nohash=True, + usedefault=True, + ), + ignore_exception=dict(nohash=True, + usedefault=True, + ), + in_file=dict(argstr='-anat %s', + copyfile=True, + mandatory=True, + position=-1, + ), + main_N=dict(argstr='-main_N %d', + ), + mask=dict(argstr='-mask %s', + exists=True, + mandatory=True, + position=-2, + ), + mixfloor=dict(argstr='-mixfloor %f', + ), + mixfrac=dict(argstr='-mixfrac %s', + ), + prefix=dict(argstr='-prefix %s', + ), + terminal_output=dict(nohash=True, + ), + ) + inputs = Seg.input_spec() + + for key, metadata in list(input_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(inputs.traits()[key], metakey), value + + +def test_Seg_outputs(): + output_map = dict(out_file=dict(), + ) + outputs = Seg.output_spec() + + for key, metadata in list(output_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(outputs.traits()[key], metakey), value From 9f3de6d160d507039e760f79905e41ee95b23cd3 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 27 Apr 2016 14:07:48 -0400 Subject: [PATCH 31/37] Removed resource_multiproc code so only new_interfaces code is left --- nipype/interfaces/ants/base.py | 2 +- nipype/interfaces/base.py | 85 +----- nipype/pipeline/engine/nodes.py | 4 +- nipype/pipeline/engine/tests/test_engine.py | 6 +- nipype/pipeline/engine/tests/test_utils.py | 2 +- nipype/pipeline/plugins/__init__.py | 4 +- nipype/pipeline/plugins/base.py | 17 +- nipype/pipeline/plugins/callback_log.py | 55 ---- nipype/pipeline/plugins/multiproc.py | 231 ++------------- .../pipeline/plugins/semaphore_singleton.py | 2 - nipype/pipeline/plugins/tests/test_base.py | 2 +- .../pipeline/plugins/tests/test_callback.py | 7 +- .../pipeline/plugins/tests/test_multiproc.py | 192 +------------ .../plugins/tests/test_multiproc_nondaemon.py | 7 +- nipype/utils/draw_gantt_chart.py | 268 ------------------ 15 files changed, 58 insertions(+), 826 deletions(-) delete mode 100644 nipype/pipeline/plugins/callback_log.py delete mode 100644 nipype/pipeline/plugins/semaphore_singleton.py delete mode 100644 nipype/utils/draw_gantt_chart.py diff --git a/nipype/interfaces/ants/base.py b/nipype/interfaces/ants/base.py index c3ea4a674e..20fab05881 100644 --- a/nipype/interfaces/ants/base.py +++ b/nipype/interfaces/ants/base.py @@ -12,7 +12,7 @@ # -Using -1 gives primary responsibilty to ITKv4 to do the correct # thread limitings. # -Using 1 takes a very conservative approach to avoid overloading -# the computer (when running ResourceMultiProc) by forcing everything to +# the computer (when running MultiProc) by forcing everything to # single threaded. This can be a severe penalty for registration # performance. LOCAL_DEFAULT_NUMBER_OF_THREADS = 1 diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py index 238af854c7..08a5e45f35 100644 --- a/nipype/interfaces/base.py +++ b/nipype/interfaces/base.py @@ -754,8 +754,6 @@ def __init__(self, **inputs): raise Exception('No input_spec in class: %s' % self.__class__.__name__) self.inputs = self.input_spec(**inputs) - self.estimated_memory = 1 - self.num_threads = 1 @classmethod def help(cls, returnhelp=False): @@ -1194,69 +1192,14 @@ def _read(self, drain): self._lastidx = len(self._rows) -# Get number of threads for process -def _get_num_threads(proc): - ''' - ''' - - # Import packages - import psutil - import logging as lg - - # Init variables - num_threads = proc.num_threads() - try: - num_children = len(proc.children()) - for child in proc.children(): - num_threads = max(num_threads, num_children, - child.num_threads(), len(child.children())) - except psutil.NoSuchProcess: - pass - - return num_threads - - -# Get max resources used for process -def _get_max_resources_used(proc, mem_mb, num_threads, poll=False): - ''' - docstring - ''' - - # Import packages - from memory_profiler import _get_memory - import psutil - - try: - mem_mb = max(mem_mb, _get_memory(proc.pid, include_children=True)) - num_threads = max(num_threads, _get_num_threads(psutil.Process(proc.pid))) - if poll: - proc.poll() - except Exception as exc: - iflogger.info('Could not get resources used by process. Error: %s'\ - % exc) - - # Return resources - return mem_mb, num_threads - - def run_command(runtime, output=None, timeout=0.01, redirect_x=False): """Run a command, read stdout and stderr, prefix with timestamp. The returned runtime contains a merged stdout+stderr log with timestamps """ - - # Import packages - try: - import memory_profiler - import psutil - mem_prof = True - except: - mem_prof = False - - # Init variables PIPE = subprocess.PIPE - cmdline = runtime.cmdline + cmdline = runtime.cmdline if redirect_x: exist_xvfb, _ = _exists_in_path('xvfb-run', runtime.environ) if not exist_xvfb: @@ -1288,12 +1231,6 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False): result = {} errfile = os.path.join(runtime.cwd, 'stderr.nipype') outfile = os.path.join(runtime.cwd, 'stdout.nipype') - - # Init variables for memory profiling - mem_mb = -1 - num_threads = -1 - interval = 1 - if output == 'stream': streams = [Stream('stdout', proc.stdout), Stream('stderr', proc.stderr)] @@ -1309,10 +1246,8 @@ def _process(drain=0): else: for stream in res[0]: stream.read(drain) + while proc.returncode is None: - if mem_prof: - mem_mb, num_threads = \ - _get_max_resources_used(proc, mem_mb, num_threads) proc.poll() _process() _process(drain=1) @@ -1326,12 +1261,7 @@ def _process(drain=0): result[stream._name] = [r[2] for r in rows] temp.sort() result['merged'] = [r[1] for r in temp] - if output == 'allatonce': - if mem_prof: - while proc.returncode is None: - mem_mb, num_threads = \ - _get_max_resources_used(proc, mem_mb, num_threads, poll=True) stdout, stderr = proc.communicate() stdout = stdout.decode(default_encoding) stderr = stderr.decode(default_encoding) @@ -1339,10 +1269,6 @@ def _process(drain=0): result['stderr'] = stderr.split('\n') result['merged'] = '' if output == 'file': - if mem_prof: - while proc.returncode is None: - mem_mb, num_threads = \ - _get_max_resources_used(proc, mem_mb, num_threads, poll=True) ret_code = proc.wait() stderr.flush() stdout.flush() @@ -1350,17 +1276,10 @@ def _process(drain=0): result['stderr'] = [line.decode(default_encoding).strip() for line in open(errfile, 'rb').readlines()] result['merged'] = '' if output == 'none': - if mem_prof: - while proc.returncode is None: - mem_mb, num_threads = \ - _get_max_resources_used(proc, mem_mb, num_threads, poll=True) proc.communicate() result['stdout'] = [] result['stderr'] = [] result['merged'] = '' - - setattr(runtime, 'runtime_memory', mem_mb/1024.0) - setattr(runtime, 'runtime_threads', num_threads) runtime.stderr = '\n'.join(result['stderr']) runtime.stdout = '\n'.join(result['stdout']) runtime.merged = result['merged'] diff --git a/nipype/pipeline/engine/nodes.py b/nipype/pipeline/engine/nodes.py index 63b9ae13f8..f2fef3cdae 100644 --- a/nipype/pipeline/engine/nodes.py +++ b/nipype/pipeline/engine/nodes.py @@ -52,7 +52,6 @@ from ... import config, logging logger = logging.getLogger('workflow') - from ...interfaces.base import (traits, InputMultiPath, CommandLine, Undefined, TraitedSpec, DynamicTraitedSpec, Bunch, InterfaceResult, md5, Interface, @@ -1152,7 +1151,8 @@ def _node_runner(self, nodes, updatehash=False): if str2bool(self.config['execution']['stop_on_first_crash']): self._result = node.result raise - yield i, node, err + finally: + yield i, node, err def _collate_results(self, nodes): self._result = InterfaceResult(interface=[], runtime=[], diff --git a/nipype/pipeline/engine/tests/test_engine.py b/nipype/pipeline/engine/tests/test_engine.py index 09f3ec92c2..5eaaa81fbf 100644 --- a/nipype/pipeline/engine/tests/test_engine.py +++ b/nipype/pipeline/engine/tests/test_engine.py @@ -715,7 +715,7 @@ def func1(in1): w1.config['execution'] = {'stop_on_first_crash': 'true', 'local_hash_check': 'true', 'crashdump_dir': wd, - 'poll_sleep_duration' : 2} + 'poll_sleep_duration': 2} # test output of num_subnodes method when serial is default (False) yield assert_equal, n1.num_subnodes(), len(n1.inputs.in1) @@ -723,7 +723,7 @@ def func1(in1): # test running the workflow on default conditions error_raised = False try: - w1.run(plugin='ResourceMultiProc') + w1.run(plugin='MultiProc') except Exception as e: from nipype.pipeline.engine.base import logger logger.info('Exception: %s' % str(e)) @@ -737,7 +737,7 @@ def func1(in1): # test running the workflow on serial conditions error_raised = False try: - w1.run(plugin='ResourceMultiProc') + w1.run(plugin='MultiProc') except Exception as e: from nipype.pipeline.engine.base import logger logger.info('Exception: %s' % str(e)) diff --git a/nipype/pipeline/engine/tests/test_utils.py b/nipype/pipeline/engine/tests/test_utils.py index 9688e02395..8420f587c2 100644 --- a/nipype/pipeline/engine/tests/test_utils.py +++ b/nipype/pipeline/engine/tests/test_utils.py @@ -214,7 +214,7 @@ def test_function3(arg): out_dir = mkdtemp() - for plugin in ('Linear',): # , 'ResourceMultiProc'): + for plugin in ('Linear',): # , 'MultiProc'): n1 = pe.Node(niu.Function(input_names=['arg1'], output_names=['out_file1', 'out_file2', 'dir'], function=test_function), diff --git a/nipype/pipeline/plugins/__init__.py b/nipype/pipeline/plugins/__init__.py index 643d5735f8..26d1577f55 100644 --- a/nipype/pipeline/plugins/__init__.py +++ b/nipype/pipeline/plugins/__init__.py @@ -9,7 +9,7 @@ from .sge import SGEPlugin from .condor import CondorPlugin from .dagman import CondorDAGManPlugin -from .multiproc import ResourceMultiProcPlugin +from .multiproc import MultiProcPlugin from .ipython import IPythonPlugin from .somaflow import SomaFlowPlugin from .pbsgraph import PBSGraphPlugin @@ -17,5 +17,3 @@ from .lsf import LSFPlugin from .slurm import SLURMPlugin from .slurmgraph import SLURMGraphPlugin - -from .callback_log import log_nodes_cb diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py index 994805285d..3f1f216ac6 100644 --- a/nipype/pipeline/plugins/base.py +++ b/nipype/pipeline/plugins/base.py @@ -20,6 +20,7 @@ import numpy as np import scipy.sparse as ssp + from ...utils.filemanip import savepkl, loadpkl from ...utils.misc import str2bool from ..engine.utils import (nx, dfs_preorder, topological_sort) @@ -245,7 +246,7 @@ def run(self, graph, config, updatehash=False): notrun.append(self._clean_queue(jobid, graph, result=result)) else: - self._task_finished_cb(jobid, result) + self._task_finished_cb(jobid) self._remove_node_dirs() self._clear_task(taskid) else: @@ -264,15 +265,10 @@ def run(self, graph, config, updatehash=False): graph=graph) else: logger.debug('Not submitting') - self._wait() + sleep(float(self._config['execution']['poll_sleep_duration'])) self._remove_node_dirs() report_nodes_not_run(notrun) - - - def _wait(self): - sleep(float(self._config['execution']['poll_sleep_duration'])) - def _get_result(self, taskid): raise NotImplementedError @@ -414,7 +410,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None): else: break - def _task_finished_cb(self, jobid, result=None): + def _task_finished_cb(self, jobid): """ Extract outputs and assign to inputs of dependent tasks This is called when a job is completed. @@ -422,10 +418,7 @@ def _task_finished_cb(self, jobid, result=None): logger.info('[Job finished] jobname: %s jobid: %d' % (self.procs[jobid]._id, jobid)) if self._status_callback: - if result == None: - if self._taskresult.has_key(jobid): - result = self._taskresult[jobid].get() - self._status_callback(self.procs[jobid], 'end', result) + self._status_callback(self.procs[jobid], 'end') # Update job and worker queues self.proc_pending[jobid] = False # update the job dependency structure diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py deleted file mode 100644 index 548b98f342..0000000000 --- a/nipype/pipeline/plugins/callback_log.py +++ /dev/null @@ -1,55 +0,0 @@ -import datetime -import logging - -def log_nodes_cb(node, status, result=None): - ''' - ''' - - # Init variables - logger = logging.getLogger('callback') - - # Check runtime profile stats - if result is None: - runtime_memory = runtime_seconds = runtime_threads = 'N/A' - else: - try: - runtime_memory = result['runtime_memory'] - except KeyError: - runtime_memory = 'Unknown' - try: - runtime_seconds = result['runtime_seconds'] - except KeyError: - runtime_seconds = 'Unknown' - try: - runtime_threads = result['runtime_threads'] - except: - runtime_threads = 'Unknown' - - # Check status and write to log - # Start - if status == 'start': - message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\ - node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\ - '"' + ',"estimated_memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \ - + str(node._interface.num_threads) + '}' - - logger.debug(message) - # End - elif status == 'end': - message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \ - node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) + \ - '"' + ',"estimated_memory":' + '"'+ str(node._interface.estimated_memory) + '"'+ \ - ',"num_threads":' + '"'+ str(node._interface.num_threads) + '"'+ \ - ',"runtime_threads":' + '"'+ str(runtime_threads) + '"'+ \ - ',"runtime_memory":' + '"'+ str(runtime_memory) + '"' + \ - ',"runtime_seconds":' + '"'+ str(runtime_seconds) + '"'+ '}' - - logger.debug(message) - # Other - else: - message = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \ - node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\ - '"' + ',"estimated_memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \ - + str(node._interface.num_threads) + ',"error":"True"}' - - logger.debug(message) diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py index b34f9944c5..861e2cc507 100644 --- a/nipype/pipeline/plugins/multiproc.py +++ b/nipype/pipeline/plugins/multiproc.py @@ -6,56 +6,21 @@ http://stackoverflow.com/a/8963618/1183453 """ -# Import packages from multiprocessing import Process, Pool, cpu_count, pool from traceback import format_exception import sys -import numpy as np -from copy import deepcopy -from ..engine import MapNode -from ...utils.misc import str2bool -import psutil -from ... import logging -import semaphore_singleton + from .base import (DistributedPluginBase, report_crash) -# Init logger -logger = logging.getLogger('workflow') -# Run node -def run_node(node, updatehash, runtime_profile=False): - """docstring - """ - - # Import packages - import datetime - - # Init variables +def run_node(node, updatehash): result = dict(result=None, traceback=None) - - # If we're profiling the run - if runtime_profile: - try: - start = datetime.datetime.now() - retval = node.run(updatehash=updatehash) - run_secs = (datetime.datetime.now() - start).total_seconds() - result['result'] = retval - result['runtime_seconds'] = run_secs - if hasattr(retval.runtime, 'get'): - result['runtime_memory'] = retval.runtime.get('runtime_memory') - result['runtime_threads'] = retval.runtime.get('runtime_threads') - except: - etype, eval, etr = sys.exc_info() - result['traceback'] = format_exception(etype,eval,etr) - result['result'] = node.result - # Otherwise, execute node.run as normal - else: - try: - result['result'] = node.run(updatehash=updatehash) - except: - etype, eval, etr = sys.exc_info() - result['traceback'] = format_exception(etype,eval,etr) - result['result'] = node.result + try: + result['result'] = node.run(updatehash=updatehash) + except: + etype, eval, etr = sys.exc_info() + result['traceback'] = format_exception(etype, eval, etr) + result['result'] = node.result return result @@ -77,61 +42,33 @@ class NonDaemonPool(pool.Pool): Process = NonDaemonProcess -def release_lock(args): - semaphore_singleton.semaphore.release() - - -class ResourceMultiProcPlugin(DistributedPluginBase): - """Execute workflow with multiprocessing, not sending more jobs at once - than the system can support. +class MultiProcPlugin(DistributedPluginBase): + """Execute workflow with multiprocessing The plugin_args input to run can be used to control the multiprocessing - execution and defining the maximum amount of memory and threads that - should be used. When those parameters are not specified, - the number of threads and memory of the system is used. - - System consuming nodes should be tagged: - memory_consuming_node.interface.estimated_memory = 8 #Gb - thread_consuming_node.interface.num_threads = 16 - - The default number of threads and memory for a node is 1. - - Currently supported options are: + execution. Currently supported options are: + - n_procs : number of processes to use - non_daemon : boolean flag to execute as non-daemon processes - - num_threads: maximum number of threads to be executed in parallel - - estimated_memory: maximum memory that can be used at once. """ def __init__(self, plugin_args=None): - super(ResourceMultiProcPlugin, self).__init__(plugin_args=plugin_args) + super(MultiProcPlugin, self).__init__(plugin_args=plugin_args) self._taskresult = {} self._taskid = 0 non_daemon = True - self.plugin_args = plugin_args - self.processors = cpu_count() - memory = psutil.virtual_memory() - self.memory = float(memory.total) / (1024.0**3) - if self.plugin_args: - if 'non_daemon' in self.plugin_args: + n_procs = cpu_count() + if plugin_args: + if 'n_procs' in plugin_args: + n_procs = plugin_args['n_procs'] + if 'non_daemon' in plugin_args: non_daemon = plugin_args['non_daemon'] - if 'n_procs' in self.plugin_args: - self.processors = self.plugin_args['n_procs'] - if 'memory' in self.plugin_args: - self.memory = self.plugin_args['memory'] - if non_daemon: # run the execution using the non-daemon pool subclass - self.pool = NonDaemonPool(processes=self.processors) + self.pool = NonDaemonPool(processes=n_procs) else: - self.pool = Pool(processes=self.processors) - - def _wait(self): - if len(self.pending_tasks) > 0: - semaphore_singleton.semaphore.acquire() - semaphore_singleton.semaphore.release() - + self.pool = Pool(processes=n_procs) def _get_result(self, taskid): if taskid not in self._taskresult: @@ -140,6 +77,17 @@ def _get_result(self, taskid): return None return self._taskresult[taskid].get() + def _submit_job(self, node, updatehash=False): + self._taskid += 1 + try: + if node.inputs.terminal_output == 'stream': + node.inputs.terminal_output = 'allatonce' + except: + pass + self._taskresult[self._taskid] = self.pool.apply_async(run_node, + (node, + updatehash,)) + return self._taskid def _report_crash(self, node, result=None): if result and result['traceback']: @@ -152,120 +100,3 @@ def _report_crash(self, node, result=None): def _clear_task(self, taskid): del self._taskresult[taskid] - - def _submit_job(self, node, updatehash=False): - self._taskid += 1 - try: - if node.inputs.terminal_output == 'stream': - node.inputs.terminal_output = 'allatonce' - except: - pass - try: - runtime_profile = self.plugin_args['runtime_profile'] - except: - runtime_profile = False - self._taskresult[self._taskid] = \ - self.pool.apply_async(run_node, - (node, updatehash, runtime_profile), - callback=release_lock) - return self._taskid - - def _send_procs_to_workers(self, updatehash=False, graph=None): - """ Sends jobs to workers when system resources are available. - Check memory (gb) and cores usage before running jobs. - """ - executing_now = [] - - # Check to see if a job is available - jobids = np.flatnonzero((self.proc_pending == True) & (self.depidx.sum(axis=0) == 0).__array__()) - - #check available system resources by summing all threads and memory used - busy_memory = 0 - busy_processors = 0 - for jobid in jobids: - busy_memory+= self.procs[jobid]._interface.estimated_memory - busy_processors+= self.procs[jobid]._interface.num_threads - - free_memory = self.memory - busy_memory - free_processors = self.processors - busy_processors - - - #check all jobs without dependency not run - jobids = np.flatnonzero((self.proc_done == False) & (self.depidx.sum(axis=0) == 0).__array__()) - - - #sort jobs ready to run first by memory and then by number of threads - #The most resource consuming jobs run first - jobids = sorted(jobids, key=lambda item: (self.procs[item]._interface.estimated_memory, self.procs[item]._interface.num_threads)) - - logger.debug('Free memory: %d, Free processors: %d', free_memory, free_processors) - - - #while have enough memory and processors for first job - #submit first job on the list - for jobid in jobids: - logger.debug('Next Job: %d, memory: %d, threads: %d' %(jobid, self.procs[jobid]._interface.estimated_memory, self.procs[jobid]._interface.num_threads)) - - if self.procs[jobid]._interface.estimated_memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors: - logger.info('Executing: %s ID: %d' %(self.procs[jobid]._id, jobid)) - executing_now.append(self.procs[jobid]) - - if isinstance(self.procs[jobid], MapNode): - try: - num_subnodes = self.procs[jobid].num_subnodes() - except Exception: - self._clean_queue(jobid, graph) - self.proc_pending[jobid] = False - continue - if num_subnodes > 1: - submit = self._submit_mapnode(jobid) - if not submit: - continue - - # change job status in appropriate queues - self.proc_done[jobid] = True - self.proc_pending[jobid] = True - - free_memory -= self.procs[jobid]._interface.estimated_memory - free_processors -= self.procs[jobid]._interface.num_threads - - # Send job to task manager and add to pending tasks - if self._status_callback: - self._status_callback(self.procs[jobid], 'start') - if str2bool(self.procs[jobid].config['execution']['local_hash_check']): - logger.debug('checking hash locally') - try: - hash_exists, _, _, _ = self.procs[ - jobid].hash_exists() - logger.debug('Hash exists %s' % str(hash_exists)) - if (hash_exists and (self.procs[jobid].overwrite == False or (self.procs[jobid].overwrite == None and not self.procs[jobid]._interface.always_run))): - self._task_finished_cb(jobid) - self._remove_node_dirs() - continue - except Exception: - self._clean_queue(jobid, graph) - self.proc_pending[jobid] = False - continue - logger.debug('Finished checking hash') - - if self.procs[jobid].run_without_submitting: - logger.debug('Running node %s on master thread' %self.procs[jobid]) - try: - self.procs[jobid].run() - except Exception: - self._clean_queue(jobid, graph) - self._task_finished_cb(jobid) - self._remove_node_dirs() - - else: - logger.debug('submitting %s' % str(jobid)) - tid = self._submit_job(deepcopy(self.procs[jobid]), updatehash=updatehash) - if tid is None: - self.proc_done[jobid] = False - self.proc_pending[jobid] = False - else: - self.pending_tasks.insert(0, (tid, jobid)) - else: - break - - logger.debug('No jobs waiting to execute') diff --git a/nipype/pipeline/plugins/semaphore_singleton.py b/nipype/pipeline/plugins/semaphore_singleton.py deleted file mode 100644 index 99c7752b82..0000000000 --- a/nipype/pipeline/plugins/semaphore_singleton.py +++ /dev/null @@ -1,2 +0,0 @@ -import threading -semaphore = threading.Semaphore(1) diff --git a/nipype/pipeline/plugins/tests/test_base.py b/nipype/pipeline/plugins/tests/test_base.py index 616cb634a0..243ae195c2 100644 --- a/nipype/pipeline/plugins/tests/test_base.py +++ b/nipype/pipeline/plugins/tests/test_base.py @@ -38,5 +38,5 @@ def func(arg1): wf.add_nodes([funkynode]) wf.base_dir = '/tmp' -wf.run(plugin='ResourceMultiProc') +wf.run(plugin='MultiProc') ''' diff --git a/nipype/pipeline/plugins/tests/test_callback.py b/nipype/pipeline/plugins/tests/test_callback.py index f173a9b30c..db02bc889b 100644 --- a/nipype/pipeline/plugins/tests/test_callback.py +++ b/nipype/pipeline/plugins/tests/test_callback.py @@ -26,7 +26,7 @@ class Status(object): def __init__(self): self.statuses = [] - def callback(self, node, status, result=None): + def callback(self, node, status): self.statuses.append((node, status)) @@ -76,7 +76,7 @@ def test_callback_multiproc_normal(): wf.add_nodes([f_node]) wf.config['execution']['crashdump_dir'] = wf.base_dir wf.config['execution']['poll_sleep_duration'] = 2 - wf.run(plugin='ResourceMultiProc', plugin_args={'status_callback': so.callback}) + wf.run(plugin='MultiProc', plugin_args={'status_callback': so.callback}) assert_equal(len(so.statuses), 2) for (n, s) in so.statuses: yield assert_equal, n.name, 'f_node' @@ -93,8 +93,9 @@ def test_callback_multiproc_exception(): name='f_node') wf.add_nodes([f_node]) wf.config['execution']['crashdump_dir'] = wf.base_dir + wf.config['execution']['poll_sleep_duration'] = 2 try: - wf.run(plugin='ResourceMultiProc', + wf.run(plugin='MultiProc', plugin_args={'status_callback': so.callback}) except: pass diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py index cd41bbb695..efa9ec4161 100644 --- a/nipype/pipeline/plugins/tests/test_multiproc.py +++ b/nipype/pipeline/plugins/tests/test_multiproc.py @@ -3,10 +3,10 @@ from tempfile import mkdtemp from shutil import rmtree -from nipype.testing import assert_equal, assert_less_equal +from nipype.testing import assert_equal import nipype.pipeline.engine as pe - + class InputSpec(nib.TraitedSpec): input1 = nib.traits.Int(desc='a random int') input2 = nib.traits.Int(desc='a random int') @@ -44,196 +44,10 @@ def test_run_multiproc(): pipe.base_dir = os.getcwd() mod1.inputs.input1 = 1 pipe.config['execution']['poll_sleep_duration'] = 2 - execgraph = pipe.run(plugin="ResourceMultiProc") + execgraph = pipe.run(plugin="MultiProc") names = ['.'.join((node._hierarchy, node.name)) for node in execgraph.nodes()] node = execgraph.nodes()[names.index('pipe.mod1')] result = node.get_output('output1') yield assert_equal, result, [1, 1] os.chdir(cur_dir) rmtree(temp_dir) - -################################ - - -class InputSpecSingleNode(nib.TraitedSpec): - input1 = nib.traits.Int(desc='a random int') - input2 = nib.traits.Int(desc='a random int') - -class OutputSpecSingleNode(nib.TraitedSpec): - output1 = nib.traits.Int(desc='a random int') - - -class TestInterfaceSingleNode(nib.BaseInterface): - input_spec = InputSpecSingleNode - output_spec = OutputSpecSingleNode - - def _run_interface(self, runtime): - runtime.returncode = 0 - return runtime - - def _list_outputs(self): - outputs = self._outputs().get() - outputs['output1'] = self.inputs.input1 - return outputs - - -def find_metrics(nodes, last_node): - import json - from dateutil.parser import parse - from datetime import datetime - import datetime as d - - - start = parse(nodes[0]['start']) - total_duration = int((parse(last_node['finish']) - start).total_seconds()) - - total_memory = [] - total_threads = [] - for i in range(total_duration): - total_memory.append(0) - total_threads.append(0) - - now = start - for i in range(total_duration): - start_index = 0 - node_start = None - node_finish = None - - x = now - - for j in range(start_index, len(nodes)): - node_start = parse(nodes[j]['start']) - node_finish = parse(nodes[j]['finish']) - - if node_start < x and node_finish > x: - total_memory[i] += nodes[j]['estimated_memory'] - total_threads[i] += nodes[j]['num_threads'] - start_index = j - - if node_start > x: - break - - now += d.timedelta(seconds=1) - - return total_memory, total_threads - - -import os -from nipype.pipeline.plugins.callback_log import log_nodes_cb -import logging -import logging.handlers -import psutil -from multiprocessing import cpu_count - -from nipype.utils import draw_gantt_chart - -def test_do_not_use_more_memory_then_specified(): - LOG_FILENAME = 'callback.log' - my_logger = logging.getLogger('callback') - my_logger.setLevel(logging.DEBUG) - - # Add the log message handler to the logger - handler = logging.FileHandler(LOG_FILENAME) - my_logger.addHandler(handler) - - max_memory = 10 - pipe = pe.Workflow(name='pipe') - n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1') - n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2') - n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3') - n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4') - - n1.interface.estimated_memory = 1 - n2.interface.estimated_memory = 1 - n3.interface.estimated_memory = 10 - n4.interface.estimated_memory = 1 - - pipe.connect(n1, 'output1', n2, 'input1') - pipe.connect(n1, 'output1', n3, 'input1') - pipe.connect(n2, 'output1', n4, 'input1') - pipe.connect(n3, 'output1', n4, 'input2') - n1.inputs.input1 = 10 - - pipe.run(plugin='ResourceMultiProc', plugin_args={'memory': max_memory, - 'status_callback': log_nodes_cb}) - - - nodes, last_node = draw_gantt_chart.log_to_json(LOG_FILENAME) - #usage in every second - memory, threads = find_metrics(nodes, last_node) - - result = True - for m in memory: - if m > max_memory: - result = False - break - - yield assert_equal, result, True - - max_threads = cpu_count() - - result = True - for t in threads: - if t > max_threads: - result = False - break - - yield assert_equal, result, True, "using more threads than system has (threads is not specified by user)" - - os.remove(LOG_FILENAME) - - - - -def test_do_not_use_more_threads_then_specified(): - LOG_FILENAME = 'callback.log' - my_logger = logging.getLogger('callback') - my_logger.setLevel(logging.DEBUG) - - # Add the log message handler to the logger - handler = logging.FileHandler(LOG_FILENAME) - my_logger.addHandler(handler) - - max_threads = 10 - pipe = pe.Workflow(name='pipe') - n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1') - n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2') - n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3') - n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4') - - n1.interface.num_threads = 1 - n2.interface.num_threads = 1 - n3.interface.num_threads = 10 - n4.interface.num_threads = 1 - - pipe.connect(n1, 'output1', n2, 'input1') - pipe.connect(n1, 'output1', n3, 'input1') - pipe.connect(n2, 'output1', n4, 'input1') - pipe.connect(n3, 'output1', n4, 'input2') - n1.inputs.input1 = 10 - pipe.config['execution']['poll_sleep_duration'] = 1 - pipe.run(plugin='ResourceMultiProc', plugin_args={'n_procs': max_threads, - 'status_callback': log_nodes_cb}) - - nodes, last_node = draw_gantt_chart.log_to_json(LOG_FILENAME) - #usage in every second - memory, threads = find_metrics(nodes, last_node) - - result = True - for t in threads: - if t > max_threads: - result = False - break - - yield assert_equal, result, True, "using more threads than specified" - - max_memory = psutil.virtual_memory().total / (1024*1024) - result = True - for m in memory: - if m > max_memory: - result = False - break - yield assert_equal, result, True, "using more memory than system has (memory is not specified by user)" - - os.remove(LOG_FILENAME) - diff --git a/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py b/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py index 429eff0f26..89336c2026 100644 --- a/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py +++ b/nipype/pipeline/plugins/tests/test_multiproc_nondaemon.py @@ -84,7 +84,7 @@ def dummyFunction(filename): def run_multiproc_nondaemon_with_flag(nondaemon_flag): ''' - Start a pipe with two nodes using the resource multiproc plugin and passing the nondaemon_flag. + Start a pipe with two nodes using the multiproc plugin and passing the nondaemon_flag. ''' cur_dir = os.getcwd() @@ -107,10 +107,11 @@ def run_multiproc_nondaemon_with_flag(nondaemon_flag): f1.inputs.insum = 0 pipe.config['execution']['stop_on_first_crash'] = True + pipe.config['execution']['poll_sleep_duration'] = 2 - # execute the pipe using the ResourceMultiProc plugin with 2 processes and the non_daemon flag + # execute the pipe using the MultiProc plugin with 2 processes and the non_daemon flag # to enable child processes which start other multiprocessing jobs - execgraph = pipe.run(plugin="ResourceMultiProc", + execgraph = pipe.run(plugin="MultiProc", plugin_args={'n_procs': 2, 'non_daemon': nondaemon_flag}) diff --git a/nipype/utils/draw_gantt_chart.py b/nipype/utils/draw_gantt_chart.py deleted file mode 100644 index b435d5d925..0000000000 --- a/nipype/utils/draw_gantt_chart.py +++ /dev/null @@ -1,268 +0,0 @@ -import json -from dateutil import parser -import datetime -import random - - -def log_to_json(logfile): - result = [] - with open(logfile, 'r') as content: - - #read file separating each line - content = content.read() - lines = content.split('\n') - l = [] - for i in lines: - try: - y = json.loads(i) - l.append(y) - except Exception, e: - pass - - lines = l - - last_node = [ x for x in lines if x.has_key('finish')][-1] - - for i, line in enumerate(lines): - #get first start it finds - if not line.has_key('start'): - continue - - #fint the end node for that start - for j in range(i+1, len(lines)): - if lines[j].has_key('finish'): - if lines[j]['id'] == line['id'] and lines[j]['name'] == line['name']: - line['finish'] = lines[j]['finish'] - line['duration'] = (parser.parse(line['finish']) - parser.parse(line['start'])).total_seconds() - result.append(line) - break - - return result, last_node - - -#total duration in seconds -def draw_lines(start, total_duration, minute_scale, scale): - result = '' - next_line = 220 - next_time = start; - num_lines = int((total_duration/60) / minute_scale) +2; - - for i in range(num_lines): - new_line = "
" - result += new_line - - time = "

" + str(next_time.hour) + ':' + str(next_time.minute) + "

"; - result += time - - next_line += minute_scale * scale - next_time += datetime.timedelta(minutes=minute_scale) - return result - -def draw_nodes(start, nodes, cores, scale, colors): - result = '' - end_times = [datetime.datetime(start.year, start.month, start.day, start.hour, start.minute, start.second) for x in range(cores)] - - for node in nodes: - node_start = parser.parse(node['start']) - node_finish = parser.parse(node['finish']) - offset = ((node_start - start).total_seconds() / 60) * scale + 220 - scale_duration = (node['duration'] / 60) * scale - if scale_duration < 5: - scale_duration = 5 - - scale_duration -= 2 - left = 60 - for j in range(len(end_times)): - if end_times[j] < node_start: - left += j * 30 - end_times[j] = datetime.datetime(node_finish.year, node_finish.month, node_finish.day, node_finish.hour, node_finish.minute, node_finish.second) - #end_times[j]+= datetime.timedelta(microseconds=node_finish.microsecond) - break - - color = random.choice(colors) - new_node = "
"; - result += new_node - return result - - -def draw_thread_bar(start, total_duration, nodes, space_between_minutes, minute_scale): - result = "

Threads

" - - total = total_duration/60 - thread = [0 for x in range(total)] - - now = start - - #calculate nuber of threads in every second - for i in range(total): - node_start = None - node_finish = None - - for j in range(i, len(nodes)): - node_start = parser.parse(nodes[j]['start']) - node_finish = parser.parse(nodes[j]['finish']) - - if node_start <= now and node_finish >= now: - thread[i] += nodes[j]['num_threads'] - if node_start > now: - break - now += datetime.timedelta(minutes=1) - - - #draw thread bar - scale = float(space_between_minutes/float(minute_scale)) - - for i in range(len(thread)): - width = thread[i] * 10 - t = (i*scale*minute_scale) + 220 - bar = "
" - result += bar - - return result - - - -def draw_memory_bar(start, total_duration, nodes, space_between_minutes, minute_scale): - result = "

Memory

" - - total = total_duration/60 - memory = [0 for x in range(total)] - - now = start - - #calculate nuber of threads in every second - for i in range(total): - node_start = None - node_finish = None - - for j in range(i, len(nodes)): - node_start = parser.parse(nodes[j]['start']) - node_finish = parser.parse(nodes[j]['finish']) - - if node_start <= now and node_finish >= now: - memory[i] += nodes[j]['estimated_memory'] - if node_start > now: - break - now += datetime.timedelta(minutes=1) - - - #draw thread bar - scale = float(space_between_minutes/float(minute_scale)) - - for i in range(len(memory)): - width = memory[i] * 10 - t = (i*scale*minute_scale) + 220 - bar = "
" - result += bar - - return result - - -''' -Generates a gantt chart in html showing the workflow execution based on a callback log file. -This script was intended to be used with the ResourceMultiprocPlugin. -The following code shows how to set up the workflow in order to generate the log file: - -# import logging -# import logging.handlers -# from nipype.pipeline.plugins.callback_log import log_nodes_cb - -# log_filename = 'callback.log' -# logger = logging.getLogger('callback') -# logger.setLevel(logging.DEBUG) -# handler = logging.FileHandler(log_filename) -# logger.addHandler(handler) - -# #create workflow -# workflow = ... - -# workflow.run(plugin='ResourceMultiProc', -# plugin_args={'num_threads':8, 'memory':12, 'status_callback': log_nodes_cb}) - -# generate_gantt_chart('callback.log', 8) -''' -def generate_gantt_chart(logfile, cores, minute_scale=10, space_between_minutes=50, colors=["#7070FF", "#4E4EB2", "#2D2D66", "#9B9BFF"]): - - result, last_node = log_to_json(logfile) - scale = space_between_minutes - - #add the html header - html_string = ''' - - - - - -
''' - - - #create the header of the report with useful information - start = parser.parse(result[0]['start']) - duration = int((parser.parse(last_node['finish']) - start).total_seconds()) - - html_string += '

Start: '+ result[0]['start'] +'

' - html_string += '

Finish: '+ last_node['finish'] +'

' - html_string += '

Duration: '+ str(duration/60) +' minutes

' - html_string += '

Nodes: '+str(len(result))+'

' - html_string += '

Cores: '+str(cores)+'

' - - - #draw lines - html_string += draw_lines(start, duration, minute_scale, scale) - - #draw nodes - html_string += draw_nodes(start, result, cores, scale, colors) - - #html_string += draw_thread_bar(start, duration, result, space_between_minutes, minute_scale) - #html_string += draw_memory_bar(start, duration, result, space_between_minutes, minute_scale) - - #finish html - html_string+= ''' -
- ''' - - #save file - html_file = open(logfile +'.html', 'wb') - html_file.write(html_string) - html_file.close() From 15a7cef0b033eece7c83004140abc9cb2bae4d41 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 27 Apr 2016 14:31:55 -0400 Subject: [PATCH 32/37] Fixed unit test for s3 datasink override interface input --- nipype/interfaces/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py index 7bc0ff358c..0035971f31 100644 --- a/nipype/interfaces/io.py +++ b/nipype/interfaces/io.py @@ -211,7 +211,7 @@ class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): encrypt_bucket_keys = traits.Bool(desc='Flag indicating whether to use S3 '\ 'server-side AES-256 encryption') # Set this if user wishes to override the bucket with their own - bucket = traits.Str(desc='Boto3 S3 bucket for manual override of bucket') + bucket = traits.Any(desc='Boto3 S3 bucket for manual override of bucket') # Set this if user wishes to have local copy of files as well local_copy = traits.Str(desc='Copy files locally as well as to S3 bucket') From 0a5bb0b36ad2a8744a30b61bb4e5967ae9276c4d Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 27 Apr 2016 14:33:37 -0400 Subject: [PATCH 33/37] Added new interfaces to __init__ import --- nipype/interfaces/afni/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nipype/interfaces/afni/__init__.py b/nipype/interfaces/afni/__init__.py index 5ba885bce5..c7f123c4a1 100644 --- a/nipype/interfaces/afni/__init__.py +++ b/nipype/interfaces/afni/__init__.py @@ -8,8 +8,9 @@ from .base import Info from .preprocess import (To3D, Refit, Resample, TStat, Automask, Volreg, Merge, - ZCutUp, Calc, TShift, Warp, Detrend, Despike, DegreeCentrality, - LFCD, Copy, Fourier, Allineate, Maskave, SkullStrip, TCat, + ZCutUp, Calc, TShift, Warp, Detrend, Despike, + DegreeCentrality, ECM, LFCD, Copy, Fourier, Allineate, + Maskave, SkullStrip, TCat, ClipLevel, MaskTool, Seg, Fim, BlurInMask, Autobox, TCorrMap, Bandpass, Retroicor, TCorrelate, TCorr1D, BrickStat, ROIStats, AutoTcorrelate, AFNItoNIFTI, Eval, Means, Hist, FWHMx, OutlierCount, From df8c65f6693cee536f02cafa3d8efb5aa5595b60 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Wed, 27 Apr 2016 19:11:10 -0400 Subject: [PATCH 34/37] Changed doctests to use functional.nii --- nipype/interfaces/afni/preprocess.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index eb7082e85c..bf18879141 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -567,8 +567,8 @@ class DegreeCentrality(AFNICommand): ======== >>> from nipype.interfaces import afni as afni - >>> degree = afni.DegreeCentrality() - >>> degree.inputs.in_file = 'func_preproc.nii' + >>> degree = afni.DegreeCentrality() + >>> degree.inputs.in_file = 'functional.nii' >>> degree.inputs.mask = 'mask.nii' >>> degree.inputs.sparsity = 1 # keep the top one percent of connections >>> degree.inputs.out_file = 'out.nii' @@ -656,7 +656,7 @@ class ECM(AFNICommand): >>> from nipype.interfaces import afni as afni >>> ecm = afni.ECM() - >>> ecm.inputs.in_file = 'func_preproc.nii' + >>> ecm.inputs.in_file = 'functional.nii' >>> ecm.inputs.mask = 'mask.nii' >>> ecm.inputs.sparsity = 0.1 # keep top 0.1% of connections >>> ecm.inputs.out_file = 'out.nii' @@ -694,7 +694,7 @@ class LFCD(AFNICommand): >>> from nipype.interfaces import afni as afni >>> lfcd = afni.LFCD() - >>> lfcd.inputs.in_file = 'func_preproc.nii' + >>> lfcd.inputs.in_file = 'functional.nii' >>> lfcd.inputs.mask = 'mask.nii' >>> lfcd.inputs.thresh = 0.8 # keep all connections with corr >= 0.8 >>> lfcd.inputs.out_file = 'out.nii' From a28ed37921f6cf3d87c4ec88a2214cea52a5fcaa Mon Sep 17 00:00:00 2001 From: dclark87 Date: Thu, 28 Apr 2016 10:21:32 -0400 Subject: [PATCH 35/37] Fixed commandline strings for centrality doctests --- nipype/interfaces/afni/preprocess.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index bf18879141..2b92445236 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -573,7 +573,7 @@ class DegreeCentrality(AFNICommand): >>> degree.inputs.sparsity = 1 # keep the top one percent of connections >>> degree.inputs.out_file = 'out.nii' >>> degree.cmdline - '3dDegreeCentrality -sparsity 1 -mask mask.nii -prefix out.nii func_preproc.nii' + '3dDegreeCentrality -mask mask.nii -prefix out.nii -sparsity 1.000000 functional.nii' >>> res = degree.run() # doctest: +SKIP """ @@ -661,7 +661,7 @@ class ECM(AFNICommand): >>> ecm.inputs.sparsity = 0.1 # keep top 0.1% of connections >>> ecm.inputs.out_file = 'out.nii' >>> ecm.cmdline - '3dECM -sparsity 0.1 -mask mask.nii -prefix out.nii func_preproc.nii' + '3dECM -mask mask.nii -prefix out.nii -sparsity 0.100000 functional.nii' >>> res = ecm.run() # doctest: +SKIP """ @@ -699,7 +699,7 @@ class LFCD(AFNICommand): >>> lfcd.inputs.thresh = 0.8 # keep all connections with corr >= 0.8 >>> lfcd.inputs.out_file = 'out.nii' >>> lfcd.cmdline - '3dLFCD -thresh 0.8 -mask mask.nii -prefix out.nii func_preproc.nii' + '3dLFCD -mask mask.nii -prefix out.nii -thresh 0.800000 functional.nii' >>> res = lfcd.run() # doctest: +SKIP """ From 5c89bdf60ae42ec9474d4ee37e470e5474514b89 Mon Sep 17 00:00:00 2001 From: sgiavasis Date: Wed, 11 May 2016 14:47:37 -0400 Subject: [PATCH 36/37] Minor fix to 3dSeg's aggregate_outputs --- nipype/interfaces/afni/preprocess.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nipype/interfaces/afni/preprocess.py b/nipype/interfaces/afni/preprocess.py index b0e43b04c4..401b88d01d 100644 --- a/nipype/interfaces/afni/preprocess.py +++ b/nipype/interfaces/afni/preprocess.py @@ -1949,14 +1949,16 @@ class Seg(AFNICommandBase): def aggregate_outputs(self, runtime=None, needed_outputs=None): + import glob + outputs = self._outputs() if isdefined(self.inputs.prefix): - outfile = os.path.join(os.getcwd(), self.inputs.prefix, 'Classes+orig.BRIK') + outfile = os.path.join(os.getcwd(), self.inputs.prefix, 'Classes+*.BRIK') else: - outfile = os.path.join(os.getcwd(), 'Segsy', 'Classes+orig.BRIK') + outfile = os.path.join(os.getcwd(), 'Segsy', 'Classes+*.BRIK') - outputs.out_file = outfile + outputs.out_file = glob.glob(outfile)[0] return outputs From bb3a5da38529c0838fed2b7e91aa5af30d6874c0 Mon Sep 17 00:00:00 2001 From: dclark87 Date: Tue, 17 May 2016 18:01:23 -0400 Subject: [PATCH 37/37] Updated CHANGES to reflect AFNI interfaces --- CHANGES | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES b/CHANGES index 50337ab634..2a28d40cd9 100644 --- a/CHANGES +++ b/CHANGES @@ -31,6 +31,8 @@ Release 0.12.0-rc1 (April 20, 2016) * ENH: Nipype workflow and interfaces for FreeSurfer's recon-all (https://github.com/nipy/nipype/pull/1326) * FIX: Permit relative path for concatenated_file input to Concatenate() (https://github.com/nipy/nipype/pull/1411) * ENH: Makes ReconAll workflow backwards compatible with FreeSurfer 5.3.0 (https://github.com/nipy/nipype/pull/1434) +* ENH: Added interfaces for AFNI 3dDegreeCentrality, 3dECM, 3dLFCD, 3dClipLevel, 3dmask_tool, and 3dSeg + (https://github.com/nipy/nipype/pull/1460) Release 0.11.0 (September 15, 2015) ============