From 4b3f926d2382720466b00c2cb060469a6886bf0f Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Tue, 29 Sep 2015 14:15:15 -0400
Subject: [PATCH 01/45] add resource multiproc plugin

---
 nipype/interfaces/base.py               |   2 +
 nipype/pipeline/plugins/__init__.py     |   3 +
 nipype/pipeline/plugins/callback_log.py |  20 +++
 nipype/pipeline/plugins/multiproc.py    | 163 ++++++++++++++++++++++++
 4 files changed, 188 insertions(+)
 create mode 100644 nipype/pipeline/plugins/callback_log.py

diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py
index ac6b7b8af4..854fb44fe1 100644
--- a/nipype/interfaces/base.py
+++ b/nipype/interfaces/base.py
@@ -750,6 +750,8 @@ def __init__(self, **inputs):
             raise Exception('No input_spec in class: %s' %
                             self.__class__.__name__)
         self.inputs = self.input_spec(**inputs)
+        self.memory = 1
+        self.num_threads = 1
 
     @classmethod
     def help(cls, returnhelp=False):
diff --git a/nipype/pipeline/plugins/__init__.py b/nipype/pipeline/plugins/__init__.py
index dac14301b2..cf392f0f77 100644
--- a/nipype/pipeline/plugins/__init__.py
+++ b/nipype/pipeline/plugins/__init__.py
@@ -9,6 +9,7 @@
 from .condor import CondorPlugin
 from .dagman import CondorDAGManPlugin
 from .multiproc import MultiProcPlugin
+from .multiproc import ResourceMultiProcPlugin
 from .ipython import IPythonPlugin
 from .somaflow import SomaFlowPlugin
 from .pbsgraph import PBSGraphPlugin
@@ -16,3 +17,5 @@
 from .lsf import LSFPlugin
 from .slurm import SLURMPlugin
 from .slurmgraph import SLURMGraphPlugin
+
+from .callback_log import log_nodes_cb
diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
new file mode 100644
index 0000000000..cd8827bf29
--- /dev/null
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -0,0 +1,20 @@
+import logging
+import datetime
+
+def log_nodes_cb(node, status):
+    if status == 'start':
+        print 'START', "name:",node.name, "id:", node._id, "start:", datetime.datetime.now(), "memory:", node._interface.memory,"num_threads:", node._interface.num_threads
+        logging.debug(
+            "name:",node.name, 
+            "id:", node._id, 
+            "start:", datetime.datetime.now(), 
+            "memory:", node._interface.memory, 
+            "num_threads:", node._interface.num_threads)
+    else:
+        print 'FINISH', "name:",node.name, "id:", node._id, "finish:", datetime.datetime.now(), "memory:", node._interface.memory,"num_threads:", node._interface.num_threads
+        logging.debug(
+            "name:",node.name, 
+            "id:", node._id, 
+            "finish:", datetime.datetime.now(), 
+            "memory:", node._interface.memory, 
+            "num_threads:", node._interface.num_threads)
\ No newline at end of file
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 0f6b11c30a..51de405542 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -96,3 +96,166 @@ def _report_crash(self, node, result=None):
 
     def _clear_task(self, taskid):
         del self._taskresult[taskid]
+
+
+
+
+
+import numpy as np
+from copy import deepcopy
+from ..engine import (MapNode, str2bool)
+import datetime
+import psutil
+
+class ResourceMultiProcPlugin(MultiProcPlugin):
+
+    def __init__(self, plugin_args=None):
+        super(ResourceMultiProcPlugin, self).__init__(plugin_args=plugin_args)
+        self.plugin_args = plugin_args
+        self.current_time = datetime.datetime.now()
+        self.log_nodes = []
+
+    def _send_procs_to_workers(self, updatehash=False, graph=None):
+        executing_now = []
+        processors = cpu_count()
+        memory = psutil.virtual_memory()
+        memory = memory.total
+        if self.plugin_args:
+            if 'n_procs' in self.plugin_args:
+                processors = self.plugin_args['n_procs']
+            if 'memory' in self.plugin_args:
+                memory = self.plugin_args['memory']
+
+
+        jobids = np.flatnonzero((self.proc_pending == True) & (self.depidx.sum(axis=0) == 0).__array__())
+        print('START, pending_tasks:', jobids)
+
+        #busy_processors = number of busy processors
+        busy_memory = 0
+        busy_processors = 0
+        for jobid in jobids:
+            print 'using memory:', jobid, self.procs[jobid]._interface.num_threads
+            busy_memory+= self.procs[jobid]._interface.memory
+            busy_processors+= self.procs[jobid]._interface.num_threads
+                
+
+        free_memory = memory - busy_memory
+        free_processors = processors - busy_processors
+
+        #jobids = all jobs without dependency not run
+        jobids = np.flatnonzero((self.proc_done == False) & (self.depidx.sum(axis=0) == 0).__array__())
+
+
+        #sort jobids first by memory and then by number of threads
+        jobids = sorted(jobids, key=lambda item: (self.procs[item]._interface.memory, self.procs[item]._interface.num_threads))
+        print('jobids ->', jobids)
+
+        print 'free memory ->', free_memory, ', free processors ->', free_processors
+
+
+        #while have enough memory and processors for first job
+        #submit first job on the list
+        for jobid in jobids:
+            print 'next_job ->', jobid, 'memory:', self.procs[jobid]._interface.memory, 'threads:', self.procs[jobid]._interface.num_threads
+
+            print 'can job execute?', self.procs[jobid]._interface.memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors
+            if self.procs[jobid]._interface.memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors:
+                print('Executing: %s ID: %d' %(self.procs[jobid]._id, jobid))
+                executing_now.append(self.procs[jobid])
+                
+                if isinstance(self.procs[jobid], MapNode):
+                    try:
+                        num_subnodes = self.procs[jobid].num_subnodes()
+                    except Exception:
+                        self._clean_queue(jobid, graph)
+                        self.proc_pending[jobid] = False
+                        continue
+                    if num_subnodes > 1:
+                        submit = self._submit_mapnode(jobid)
+                        if not submit:
+                            continue
+
+
+                self.proc_done[jobid] = True
+                self.proc_pending[jobid] = True
+
+                free_memory -= self.procs[jobid]._interface.memory
+                free_processors -= self.procs[jobid]._interface.num_threads
+
+                if self._status_callback:
+                    self._status_callback(self.procs[jobid], 'start')
+                    
+
+                
+                if str2bool(self.procs[jobid].config['execution']['local_hash_check']):
+                    print('checking hash locally')
+                    try:
+                        hash_exists, _, _, _ = self.procs[
+                            jobid].hash_exists()
+                        print('Hash exists %s' % str(hash_exists))
+                        if (hash_exists and (self.procs[jobid].overwrite == False or (self.procs[jobid].overwrite == None and not self.procs[jobid]._interface.always_run))):
+                            self._task_finished_cb(jobid)
+                            self._remove_node_dirs()
+                            continue
+                    except Exception:
+                        self._clean_queue(jobid, graph)
+                        self.proc_pending[jobid] = False
+                        continue
+
+                    
+                print('Finished checking hash')
+
+
+                if self.procs[jobid].run_without_submitting:
+                    print('Running node %s on master thread' %self.procs[jobid])
+                    try:
+                        self.procs[jobid].run()
+                    except Exception:
+                        self._clean_queue(jobid, graph)
+                    self._task_finished_cb(jobid)
+                    self._remove_node_dirs()
+
+                else:
+                    print('submitting', jobid)
+                    tid = self._submit_job(deepcopy(self.procs[jobid]), updatehash=updatehash)
+                    if tid is None:
+                        self.proc_done[jobid] = False
+                        self.proc_pending[jobid] = False
+                    else:
+                        self.pending_tasks.insert(0, (tid, jobid))
+            else:
+                break
+
+        #run this code when not running each node
+        # max_node = datetime.datetime.min
+        # for n in executing_now:
+        #     name = n.name 
+        #     start = self.current_time
+        #     finish = self.current_time + n._interface.time
+        #     duration = (finish - start).total_seconds()
+        #     memory = n._interface.memory
+        #     num_threads = n._interface.num_threads
+            
+        #     if finish > max_node:
+        #         max_node = finish
+
+        #     self.log_nodes.append({'name': name, 'start': str(start), 'finish': str(finish), 'duration': duration, 'memory':memory, 'num_threads': num_threads})
+
+
+        # if len(executing_now) > 0:
+        #     self.current_time = finish
+        #     #write log
+        #     self.log_nodes = sorted(self.log_nodes, key=lambda n: datetime.datetime.strptime(n['start'],"%Y-%m-%d %H:%M:%S.%f"))
+        #     first_node = datetime.datetime.strptime(self.log_nodes[0]['start'],"%Y-%m-%d %H:%M:%S.%f")
+        #     last_node = datetime.datetime.strptime(self.log_nodes[-1]['finish'],"%Y-%m-%d %H:%M:%S.%f")
+
+
+        #     result = {"name": os.getcwd(), "start": str(first_node), "finish": str(last_node), "duration": (last_node - first_node).total_seconds() / 60, "nodes": self.log_nodes}
+
+        #     log_content = json.dumps(result)
+        #     log_file = open('log_anat_preproc.py', 'wb')
+        #     log_file.write(log_content)
+        #     log_file.close()
+
+        print('- - - - - - - - - - - - - - - ', len(self.log_nodes), '- - - - - - - - - - - - - - - ')
+        print('No jobs waiting to execute')

From 6f4690bde42bb219b1e59369cf93b2904fd8b7df Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Tue, 29 Sep 2015 17:19:35 -0400
Subject: [PATCH 02/45] callback functions write log

---
 nipype/pipeline/plugins/callback_log.py | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index cd8827bf29..1faec2b4c1 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -1,20 +1,11 @@
-import logging
 import datetime
+import logging
 
 def log_nodes_cb(node, status):
+    logger = logging.getLogger('callback')
     if status == 'start':
-        print 'START', "name:",node.name, "id:", node._id, "start:", datetime.datetime.now(), "memory:", node._interface.memory,"num_threads:", node._interface.num_threads
-        logging.debug(
-            "name:",node.name, 
-            "id:", node._id, 
-            "start:", datetime.datetime.now(), 
-            "memory:", node._interface.memory, 
-            "num_threads:", node._interface.num_threads)
+        message  = "name:",node.name, "id:", node._id, "start:", datetime.datetime.now(), "memory:", node._interface.memory, "num_threads:", node._interface.num_threads
+        logger.debug(message)
     else:
-        print 'FINISH', "name:",node.name, "id:", node._id, "finish:", datetime.datetime.now(), "memory:", node._interface.memory,"num_threads:", node._interface.num_threads
-        logging.debug(
-            "name:",node.name, 
-            "id:", node._id, 
-            "finish:", datetime.datetime.now(), 
-            "memory:", node._interface.memory, 
-            "num_threads:", node._interface.num_threads)
\ No newline at end of file
+        message  = "name:",node.name, "id:", node._id, "finish:", datetime.datetime.now(), "memory:", node._interface.memory, "num_threads:", node._interface.num_threads
+        logger.debug(message)
\ No newline at end of file

From 52da583c02d60e5272b036e7a64a9919300f8e50 Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Wed, 30 Sep 2015 14:00:22 -0400
Subject: [PATCH 03/45] fix multiproc tests. create lot 2 json converter

---
 nipype/pipeline/plugins/callback_log.py       |  45 ++++-
 .../pipeline/plugins/tests/test_multiproc.py  | 162 +++++++++++++++++-
 2 files changed, 202 insertions(+), 5 deletions(-)

diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index 1faec2b4c1..52e2621f60 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -4,8 +4,47 @@
 def log_nodes_cb(node, status):
     logger = logging.getLogger('callback')
     if status == 'start':
-        message  = "name:",node.name, "id:", node._id, "start:", datetime.datetime.now(), "memory:", node._interface.memory, "num_threads:", node._interface.num_threads
+        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) + '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' + str(node._interface.num_threads) +  '}'
         logger.debug(message)
     else:
-        message  = "name:",node.name, "id:", node._id, "finish:", datetime.datetime.now(), "memory:", node._interface.memory, "num_threads:", node._interface.num_threads
-        logger.debug(message)
\ No newline at end of file
+        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) + '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' + str(node._interface.num_threads) +  '}'
+        logger.debug(message)
+
+
+
+import json
+from dateutil import parser
+
+def convert_logcb_to_json(filename):
+    with open(filename, 'r') as content:
+        #read file separating each line
+        content = content.read()
+        lines = content.split('\n')
+
+        #separate lines of starting nodes and ending nodes
+        starts = [ json.loads(x) for x in lines if '"start":' in x ]
+        ends = [json.loads(x) for x in lines if '"finish":' in x ]
+
+
+
+        #foreach start, search its end. They have same name and id
+        #this line is O(n^2). refactor
+        for element in starts:
+            end = next((f for f in ends if (f['id'] == element['id'] and  f['name'] == element['name'])), None)
+
+            if end is not None:
+                element['finish'] = end['finish']
+            else:
+                element['finish'] = element['start']
+
+
+        first_node = starts[0]['start']
+        last_node = ends[-1]['finish']
+
+        duration = parser.parse(last_node) - parser.parse(first_node)
+
+        #sorted(starts, key=lambda e: parser.parse(e['start']))   # sort by age
+        result = {'start': first_node, 'finish': last_node, 'duration':duration.total_seconds(), 'nodes': starts}
+        #finally, save the json file
+        with open(filename + '.json', 'w') as outfile:
+            json.dump(result, outfile)
\ No newline at end of file
diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py
index 66f755da9a..e0fdce7255 100644
--- a/nipype/pipeline/plugins/tests/test_multiproc.py
+++ b/nipype/pipeline/plugins/tests/test_multiproc.py
@@ -3,7 +3,7 @@
 from tempfile import mkdtemp
 from shutil import rmtree
 
-from nipype.testing import assert_equal
+from nipype.testing import assert_equal, assert_less_equal
 import nipype.pipeline.engine as pe
 
 class InputSpec(nib.TraitedSpec):
@@ -46,4 +46,162 @@ def test_run_multiproc():
     result = node.get_output('output1')
     yield assert_equal, result, [1, 1]
     os.chdir(cur_dir)
-    rmtree(temp_dir)
\ No newline at end of file
+    rmtree(temp_dir)
+
+
+
+#################################
+
+
+class InputSpecSingleNode(nib.TraitedSpec):
+    input1 = nib.traits.Int(desc='a random int')
+    input2 = nib.traits.Int(desc='a random int')
+
+class OutputSpecSingleNode(nib.TraitedSpec):
+    output1 = nib.traits.Int(desc='a random int')
+
+
+class TestInterfaceSingleNode(nib.BaseInterface):
+    input_spec = InputSpecSingleNode
+    output_spec = OutputSpecSingleNode
+
+    def _run_interface(self, runtime):
+        runtime.returncode = 0
+        return runtime
+
+    def _list_outputs(self):
+        outputs = self._outputs().get()
+        outputs['output1'] =  self.inputs.input1
+        return outputs
+
+
+def parse_log(filename, measure):
+    import json
+    from dateutil.parser import parse
+    from datetime import datetime
+    import datetime as d
+
+    json_data = open(filename).read()
+    data = json.loads(json_data)
+    total_duration = int(float(data['duration']) * 60) #total duration in seconds
+
+    total = []
+    for i in range(total_duration):
+        total.append(0)
+
+    now = parse(data['start'])
+    for i in range(total_duration):
+        start_index = 0
+        node_start = None
+        node_finish = None
+
+        x = now
+
+        for j in range(start_index, len(data['nodes'])):
+            node_start = parse(data['nodes'][j]['start'])
+            node_finish = parse(data['nodes'][j]['finish'])
+
+            if node_start < x and node_finish > x:
+                total[i] += data['nodes'][j][measure]
+                start_index = j
+
+            if node_start > x:
+                break
+
+        now += d.timedelta(seconds=1)
+
+    return total
+
+
+import os
+from nipype.pipeline.plugins.callback_log import log_nodes_cb, convert_logcb_to_json
+import logging
+import logging.handlers
+def test_do_not_use_more_memory_then_specified():
+    LOG_FILENAME = 'callback.log'
+    my_logger = logging.getLogger('callback')
+    my_logger.setLevel(logging.DEBUG)
+
+    # Add the log message handler to the logger
+    handler = logging.FileHandler(LOG_FILENAME)
+    my_logger.addHandler(handler)
+
+    max_memory = 10
+    pipe = pe.Workflow(name='pipe')
+    n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1')
+    n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2')
+    n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3')
+    n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4')
+
+    n1.interface.memory = 1
+    n2.interface.memory = 1
+    n3.interface.memory = 10
+    n4.interface.memory = 1
+
+    pipe.connect(n1, 'output1', n2, 'input1')
+    pipe.connect(n1, 'output1', n3, 'input1')
+    pipe.connect(n2, 'output1', n4, 'input1')
+    pipe.connect(n3, 'output1', n4, 'input2')
+    n1.inputs.input1 = 10
+    pipe.config['execution']['poll_sleep_duration'] = 1
+    pipe.run(plugin='ResourceMultiProc', plugin_args={'memory': max_memory, 'status_callback': log_nodes_cb})
+
+    convert_logcb_to_json(LOG_FILENAME)
+    #memory usage in every second
+    memory = parse_log(LOG_FILENAME + '.json' , 'memory')
+
+    result = True
+    for m in memory:
+        if m > max_memory:
+            result = False
+            break
+
+    yield assert_equal, result, True
+
+    os.remove(LOG_FILENAME)
+    os.remove(LOG_FILENAME + '.json') 
+
+
+def test_do_not_use_more_threads_then_specified():
+    LOG_FILENAME = 'callback.log'
+    my_logger = logging.getLogger('callback')
+    my_logger.setLevel(logging.DEBUG)
+
+    # Add the log message handler to the logger
+    handler = logging.FileHandler(LOG_FILENAME)
+    my_logger.addHandler(handler)
+
+    max_threads = 10
+    pipe = pe.Workflow(name='pipe')
+    n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1')
+    n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2')
+    n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3')
+    n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4')
+
+    n1.interface.num_threads = 1
+    n2.interface.num_threads = 1
+    n3.interface.num_threads = 10
+    n4.interface.num_threads = 1
+
+    pipe.connect(n1, 'output1', n2, 'input1')
+    pipe.connect(n1, 'output1', n3, 'input1')
+    pipe.connect(n2, 'output1', n4, 'input1')
+    pipe.connect(n3, 'output1', n4, 'input2')
+    n1.inputs.input1 = 10
+    pipe.config['execution']['poll_sleep_duration'] = 1
+    pipe.run(plugin='ResourceMultiProc', plugin_args={'n_procs': max_threads, 'status_callback': log_nodes_cb})
+
+    convert_logcb_to_json(LOG_FILENAME)
+    #memory usage in every second
+    threads = parse_log(LOG_FILENAME + '.json' , 'num_threads')
+
+    result = True
+    for t in threads:
+        if t > max_threads:
+            result = False
+            break
+
+    yield assert_equal, result, True
+
+    os.remove(LOG_FILENAME)
+    os.remove(LOG_FILENAME + '.json')
\ No newline at end of file

From ffb4756e365e318f299f46c8d5e5747d34790c49 Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Wed, 30 Sep 2015 14:43:24 -0400
Subject: [PATCH 04/45] fix comments and logs

---
 nipype/pipeline/plugins/multiproc.py          | 76 +++++--------------
 .../pipeline/plugins/tests/test_multiproc.py  |  4 +-
 2 files changed, 23 insertions(+), 57 deletions(-)

diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 51de405542..88c843ddab 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -106,6 +106,8 @@ def _clear_task(self, taskid):
 from ..engine import (MapNode, str2bool)
 import datetime
 import psutil
+from ... import logging
+logger = logging.getLogger('workflow')
 
 class ResourceMultiProcPlugin(MultiProcPlugin):
 
@@ -116,6 +118,9 @@ def __init__(self, plugin_args=None):
         self.log_nodes = []
 
     def _send_procs_to_workers(self, updatehash=False, graph=None):
+        """ Sends jobs to workers when system resources are available.
+            Check memory and cores usage before running jobs.
+        """
         executing_now = []
         processors = cpu_count()
         memory = psutil.virtual_memory()
@@ -126,41 +131,38 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
             if 'memory' in self.plugin_args:
                 memory = self.plugin_args['memory']
 
-
+        # Check to see if a job is available
         jobids = np.flatnonzero((self.proc_pending == True) & (self.depidx.sum(axis=0) == 0).__array__())
-        print('START, pending_tasks:', jobids)
 
-        #busy_processors = number of busy processors
+        #check available system resources by summing all threads and memory used
         busy_memory = 0
         busy_processors = 0
         for jobid in jobids:
-            print 'using memory:', jobid, self.procs[jobid]._interface.num_threads
             busy_memory+= self.procs[jobid]._interface.memory
             busy_processors+= self.procs[jobid]._interface.num_threads
                 
-
         free_memory = memory - busy_memory
         free_processors = processors - busy_processors
 
-        #jobids = all jobs without dependency not run
+
+        #check all jobs without dependency not run
         jobids = np.flatnonzero((self.proc_done == False) & (self.depidx.sum(axis=0) == 0).__array__())
 
 
-        #sort jobids first by memory and then by number of threads
+        #sort jobs ready to run first by memory and then by number of threads
+        #The most resource consuming jobs run first
         jobids = sorted(jobids, key=lambda item: (self.procs[item]._interface.memory, self.procs[item]._interface.num_threads))
-        print('jobids ->', jobids)
 
-        print 'free memory ->', free_memory, ', free processors ->', free_processors
+        logger.debug('Free memory: %d, Free processors: %d', free_memory, free_processors)
 
 
         #while have enough memory and processors for first job
         #submit first job on the list
         for jobid in jobids:
-            print 'next_job ->', jobid, 'memory:', self.procs[jobid]._interface.memory, 'threads:', self.procs[jobid]._interface.num_threads
+            logger.debug('Next Job: %d, memory: %d, threads: %d' %(jobid, self.procs[jobid]._interface.memory, self.procs[jobid]._interface.num_threads))
 
-            print 'can job execute?', self.procs[jobid]._interface.memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors
             if self.procs[jobid]._interface.memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors:
-                print('Executing: %s ID: %d' %(self.procs[jobid]._id, jobid))
+                logger.info('Executing: %s ID: %d' %(self.procs[jobid]._id, jobid))
                 executing_now.append(self.procs[jobid])
                 
                 if isinstance(self.procs[jobid], MapNode):
@@ -175,24 +177,23 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                         if not submit:
                             continue
 
-
+                # change job status in appropriate queues
                 self.proc_done[jobid] = True
                 self.proc_pending[jobid] = True
 
                 free_memory -= self.procs[jobid]._interface.memory
                 free_processors -= self.procs[jobid]._interface.num_threads
 
+                # Send job to task manager and add to pending tasks
                 if self._status_callback:
                     self._status_callback(self.procs[jobid], 'start')
-                    
-
                 
                 if str2bool(self.procs[jobid].config['execution']['local_hash_check']):
-                    print('checking hash locally')
+                    logger.debug('checking hash locally')
                     try:
                         hash_exists, _, _, _ = self.procs[
                             jobid].hash_exists()
-                        print('Hash exists %s' % str(hash_exists))
+                        logger.debug('Hash exists %s' % str(hash_exists))
                         if (hash_exists and (self.procs[jobid].overwrite == False or (self.procs[jobid].overwrite == None and not self.procs[jobid]._interface.always_run))):
                             self._task_finished_cb(jobid)
                             self._remove_node_dirs()
@@ -201,13 +202,10 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                         self._clean_queue(jobid, graph)
                         self.proc_pending[jobid] = False
                         continue
-
-                    
-                print('Finished checking hash')
-
+                logger.debug('Finished checking hash')
 
                 if self.procs[jobid].run_without_submitting:
-                    print('Running node %s on master thread' %self.procs[jobid])
+                    logger.debug('Running node %s on master thread' %self.procs[jobid])
                     try:
                         self.procs[jobid].run()
                     except Exception:
@@ -226,36 +224,4 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
             else:
                 break
 
-        #run this code when not running each node
-        # max_node = datetime.datetime.min
-        # for n in executing_now:
-        #     name = n.name 
-        #     start = self.current_time
-        #     finish = self.current_time + n._interface.time
-        #     duration = (finish - start).total_seconds()
-        #     memory = n._interface.memory
-        #     num_threads = n._interface.num_threads
-            
-        #     if finish > max_node:
-        #         max_node = finish
-
-        #     self.log_nodes.append({'name': name, 'start': str(start), 'finish': str(finish), 'duration': duration, 'memory':memory, 'num_threads': num_threads})
-
-
-        # if len(executing_now) > 0:
-        #     self.current_time = finish
-        #     #write log
-        #     self.log_nodes = sorted(self.log_nodes, key=lambda n: datetime.datetime.strptime(n['start'],"%Y-%m-%d %H:%M:%S.%f"))
-        #     first_node = datetime.datetime.strptime(self.log_nodes[0]['start'],"%Y-%m-%d %H:%M:%S.%f")
-        #     last_node = datetime.datetime.strptime(self.log_nodes[-1]['finish'],"%Y-%m-%d %H:%M:%S.%f")
-
-
-        #     result = {"name": os.getcwd(), "start": str(first_node), "finish": str(last_node), "duration": (last_node - first_node).total_seconds() / 60, "nodes": self.log_nodes}
-
-        #     log_content = json.dumps(result)
-        #     log_file = open('log_anat_preproc.py', 'wb')
-        #     log_file.write(log_content)
-        #     log_file.close()
-
-        print('- - - - - - - - - - - - - - - ', len(self.log_nodes), '- - - - - - - - - - - - - - - ')
-        print('No jobs waiting to execute')
+        logger.debug('No jobs waiting to execute')
\ No newline at end of file
diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py
index e0fdce7255..dde99d76a0 100644
--- a/nipype/pipeline/plugins/tests/test_multiproc.py
+++ b/nipype/pipeline/plugins/tests/test_multiproc.py
@@ -83,7 +83,7 @@ def parse_log(filename, measure):
 
     json_data = open(filename).read()
     data = json.loads(json_data)
-    total_duration = int(float(data['duration']) * 60) #total duration in seconds
+    total_duration = int(float(data['duration'])) #total duration in seconds
 
     total = []
     for i in range(total_duration):
@@ -192,7 +192,7 @@ def test_do_not_use_more_threads_then_specified():
     pipe.run(plugin='ResourceMultiProc', plugin_args={'n_procs': max_threads, 'status_callback': log_nodes_cb})
 
     convert_logcb_to_json(LOG_FILENAME)
-    #memory usage in every second
+    #threads usage in every second
     threads = parse_log(LOG_FILENAME + '.json' , 'num_threads')
 
     result = True

From 0890e8163032a9fa4d2b4c28a5e0f9dfbbfa6c67 Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Thu, 1 Oct 2015 14:08:43 -0400
Subject: [PATCH 05/45] fix tests

---
 .../pipeline/plugins/tests/test_multiproc.py  | 37 +++++++++++++++----
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py
index dde99d76a0..645d3bc567 100644
--- a/nipype/pipeline/plugins/tests/test_multiproc.py
+++ b/nipype/pipeline/plugins/tests/test_multiproc.py
@@ -85,9 +85,11 @@ def parse_log(filename, measure):
     data = json.loads(json_data)
     total_duration = int(float(data['duration'])) #total duration in seconds
 
-    total = []
+    total_memory = []
+    total_threads = []
     for i in range(total_duration):
-        total.append(0)
+        total_memory.append(0)
+        total_threads.append(0)
 
     now = parse(data['start'])
     for i in range(total_duration):
@@ -102,7 +104,8 @@ def parse_log(filename, measure):
             node_finish = parse(data['nodes'][j]['finish'])
 
             if node_start < x and node_finish > x:
-                total[i] += data['nodes'][j][measure]
+                total_memory[i] += data['nodes'][j]['memory']
+                total_threads[i] += data['nodes'][j]['num_threads']
                 start_index = j
 
             if node_start > x:
@@ -110,13 +113,15 @@ def parse_log(filename, measure):
 
         now += d.timedelta(seconds=1)
 
-    return total
+    return total_memory, total_threads
 
 
 import os
 from nipype.pipeline.plugins.callback_log import log_nodes_cb, convert_logcb_to_json
 import logging
 import logging.handlers
+import psutil
+from multiprocessing import cpu_count
 def test_do_not_use_more_memory_then_specified():
     LOG_FILENAME = 'callback.log'
     my_logger = logging.getLogger('callback')
@@ -148,7 +153,7 @@ def test_do_not_use_more_memory_then_specified():
 
     convert_logcb_to_json(LOG_FILENAME)
     #memory usage in every second
-    memory = parse_log(LOG_FILENAME + '.json' , 'memory')
+    memory, threads = parse_log(LOG_FILENAME + '.json' , 'memory')
 
     result = True
     for m in memory:
@@ -158,6 +163,16 @@ def test_do_not_use_more_memory_then_specified():
 
     yield assert_equal, result, True
 
+    max_threads = cpu_count()
+
+    result = True
+    for t in threads:
+        if t > max_threads:
+            result = False
+            break
+
+    yield assert_equal, result, True, "using more threads than system has (threads is not specified by user)"
+
     os.remove(LOG_FILENAME)
     os.remove(LOG_FILENAME + '.json') 
 
@@ -193,7 +208,7 @@ def test_do_not_use_more_threads_then_specified():
 
     convert_logcb_to_json(LOG_FILENAME)
     #threads usage in every second
-    threads = parse_log(LOG_FILENAME + '.json' , 'num_threads')
+    memory, threads = parse_log(LOG_FILENAME + '.json' , 'num_threads')
 
     result = True
     for t in threads:
@@ -201,7 +216,15 @@ def test_do_not_use_more_threads_then_specified():
             result = False
             break
 
-    yield assert_equal, result, True
+    yield assert_equal, result, True, "using more threads than specified"
+
+    max_memory = psutil.virtual_memory().total / (1024*1024)
+    result = True
+    for m in memory:
+        if m > max_memory:
+            result = False
+            break
+    yield assert_equal, result, True, "using more memory than system has (memory is not specified by user)"
 
     os.remove(LOG_FILENAME)
     os.remove(LOG_FILENAME + '.json')
\ No newline at end of file

From b3c6afc83b1d5fcb68d7a2b45ee24b71d6b40548 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Tue, 6 Oct 2015 15:24:18 -0400
Subject: [PATCH 06/45] Modified the DataSink class and DataSinkInputSpec class
 to be able to handle uploading data to S3 by including "s3://bucket_name/.."
 in the base_directory, passes all unittests in
 https://github.com/FCP-INDI/C-PAC/blob/test_dev/test/unit/nipype/datasink_test.py

---
 nipype/interfaces/io.py | 398 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 352 insertions(+), 46 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 39ae774c21..b35765c62e 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -131,31 +131,85 @@ def _add_output_traits(self, base):
         return base
 
 
+# Class to track percentage of S3 file upload
+class ProgressPercentage(object):
+    '''
+    Call-able class instsance (via __call__ method) that displays
+    upload percentage of a file to S3
+    '''
+
+    def __init__(self, filename):
+        '''
+        '''
+
+        # Import packages
+        import threading
+        import os
+
+        # Initialize data attributes
+        self._filename = filename
+        self._size = float(os.path.getsize(filename))
+        self._seen_so_far = 0
+        self._lock = threading.Lock()
+
+    def __call__(self, bytes_amount):
+        '''
+        '''
+
+        # Import packages
+        import sys
+
+        # With the lock on, print upload status
+        with self._lock:
+            self._seen_so_far += bytes_amount
+            percentage = (self._seen_so_far / self._size) * 100
+            progress_str = '%d / %d (%.2f%%)\r'\
+                           % (self._seen_so_far, self._size, percentage)
+
+            # Write to stdout
+            sys.stdout.write(progress_str)
+            sys.stdout.flush()
+
+
+# DataSink inputs
 class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
+    '''
+    '''
+
+    # Init inputspec data attributes
     base_directory = Directory(
         desc='Path to the base directory for storing data.')
     container = traits.Str(
         desc='Folder within base directory in which to store output')
     parameterization = traits.Bool(True, usedefault=True,
                                    desc='store output in parametrized structure')
-    strip_dir = Directory(desc='path to strip out of filename')
+    strip_dir = traits.Directory(desc='path to strip out of filename')
     substitutions = InputMultiPath(traits.Tuple(traits.Str, traits.Str),
                                    desc=('List of 2-tuples reflecting string '
                                          'to substitute and string to replace '
                                          'it with'))
-    regexp_substitutions = InputMultiPath(traits.Tuple(traits.Str, traits.Str),
-                                          desc=('List of 2-tuples reflecting a pair '
-                                                'of a Python regexp pattern and a '
-                                                'replacement string. Invoked after '
-                                                'string `substitutions`'))
+    regexp_substitutions = \
+        InputMultiPath(traits.Tuple(traits.Str, traits.Str),
+                       desc=('List of 2-tuples reflecting a pair of a '\
+                             'Python regexp pattern and a replacement '\
+                             'string. Invoked after string `substitutions`'))
 
     _outputs = traits.Dict(traits.Str, value={}, usedefault=True)
     remove_dest_dir = traits.Bool(False, usedefault=True,
                                   desc='remove dest directory when copying dirs')
 
+    # AWS S3 data attributes
+    creds_path = traits.Str(desc='Filepath to AWS credentials file for S3 bucket '\
+                              'access')
+    encrypt_bucket_keys = traits.Bool(desc='Flag indicating whether to use S3 '\
+                                        'server-side AES-256 encryption')
+
+    # Set call-able inputs attributes
     def __setattr__(self, key, value):
+        import nipype.interfaces.traits_extension as nit
+
         if key not in self.copyable_trait_names():
-            if not isdefined(value):
+            if not nit.isdefined(value):
                 super(DataSinkInputSpec, self).__setattr__(key, value)
             self._outputs[key] = value
         else:
@@ -164,11 +218,19 @@ def __setattr__(self, key, value):
             super(DataSinkInputSpec, self).__setattr__(key, value)
 
 
+# DataSink outputs
 class DataSinkOutputSpec(TraitedSpec):
+    '''
+    '''
+
+    # Import packages
+    import traits.api as tapi
 
-    out_file = traits.Any(desc='datasink output')
+    # Init out file
+    out_file = tapi.Any(desc='datasink output')
 
 
+# Custom DataSink class
 class DataSink(IOBase):
     """ Generic datasink module to store structured outputs
 
@@ -230,9 +292,12 @@ class DataSink(IOBase):
         >>> ds.run() # doctest: +SKIP
 
     """
+
+    # Give obj .inputs and .outputs
     input_spec = DataSinkInputSpec
     output_spec = DataSinkOutputSpec
 
+    # Initialization method to set up datasink
     def __init__(self, infields=None, force_run=True, **kwargs):
         """
         Parameters
@@ -254,6 +319,7 @@ def __init__(self, infields=None, force_run=True, **kwargs):
         if force_run:
             self._always_run = True
 
+    # Get destination paths
     def _get_dst(self, src):
         # If path is directory with trailing os.path.sep,
         # then remove that for a more robust behavior
@@ -277,6 +343,7 @@ def _get_dst(self, src):
             dst = dst[1:]
         return dst
 
+    # Substitute paths in substitutions dictionary parameter
     def _substitute(self, pathstr):
         pathstr_ = pathstr
         if isdefined(self.inputs.substitutions):
@@ -297,17 +364,251 @@ def _substitute(self, pathstr):
             iflogger.info('sub: %s -> %s' % (pathstr_, pathstr))
         return pathstr
 
+    # Check for s3 in base directory
+    def _check_s3_base_dir(self):
+        '''
+        Method to see if the datasink's base directory specifies an
+        S3 bucket path; it it does, it parses the path for the bucket
+        name in the form 's3://bucket_name/...' and adds a bucket
+        attribute to the data sink instance, i.e. self.bucket
+
+        Parameters
+        ----------
+
+        Returns
+        -------
+        s3_flag : boolean
+            flag indicating whether the base_directory contained an
+            S3 bucket path
+        '''
+
+        # Import packages
+        import os
+        import sys
+
+        # Init variables
+        s3_str = 's3://'
+        sep = os.path.sep
+        base_directory = self.inputs.base_directory
+
+        # Check if 's3://' in base dir
+        if base_directory.startswith(s3_str):
+            try:
+                # Expects bucket name to be 's3://bucket_name/base_dir/..'
+                bucket_name = base_directory.split(s3_str)[1].split(sep)[0]
+                # Get the actual bucket object
+                self.bucket = self._fetch_bucket(bucket_name)
+            except Exception as exc:
+                err_msg = 'Unable to access S3 bucket. Error:\n%s. Exiting...'\
+                          % exc
+                print err_msg
+                sys.exit()
+            # Bucket access was a success, set flag
+            s3_flag = True
+        # Otherwise it's just a normal datasink
+        else:
+            s3_flag = False
+
+        # Return s3_flag
+        return s3_flag
+
+    # Function to return AWS secure environment variables
+    def _return_aws_keys(self, creds_path):
+        '''
+        Method to return AWS access key id and secret access key using
+        credentials found in a local file.
+
+        Parameters
+        ----------
+        creds_path : string (filepath)
+            path to the csv file with 'AWSAccessKeyId=' followed by access
+            key in the first row and 'AWSSecretAccessKey=' followed by
+            secret access key in the second row
+
+        Returns
+        -------
+        aws_access_key_id : string
+            string of the AWS access key ID
+        aws_secret_access_key : string
+            string of the AWS secret access key
+        '''
+
+        # Import packages
+        import csv
+
+        # Init variables
+        csv_reader = csv.reader(open(creds_path, 'r'))
+
+        # Grab csv rows
+        row1 = csv_reader.next()[0]
+        row2 = csv_reader.next()[0]
+
+        # And split out for keys
+        aws_access_key_id = row1.split('=')[1]
+        aws_secret_access_key = row2.split('=')[1]
+
+        # Return keys
+        return aws_access_key_id, aws_secret_access_key
+
+    # Fetch bucket object
+    def _fetch_bucket(self, bucket_name):
+        '''
+        Method to a return a bucket object which can be used to interact
+        with an AWS S3 bucket using credentials found in a local file.
+
+        Parameters
+        ----------
+        bucket_name : string
+            string corresponding to the name of the bucket on S3
+
+        Returns
+        -------
+        bucket : boto3.resources.factory.s3.Bucket
+            boto3 s3 Bucket object which is used to interact with files
+            in an S3 bucket on AWS
+        '''
+
+        # Import packages
+        import logging
+
+        try:
+            import boto3
+            import botocore
+        except ImportError as exc:
+            err_msg = 'Boto3 package is not installed - install boto3 and '\
+                      'try again.'
+            raise Exception(err_msg)
+
+        # Init variables
+        creds_path = self.inputs.creds_path
+        iflogger = logging.getLogger('interface')
+
+        # Try and get AWS credentials if a creds_path is specified
+        if creds_path:
+            try:
+                aws_access_key_id, aws_secret_access_key = \
+                    self._return_aws_keys(creds_path)
+            except Exception as exc:
+                err_msg = 'There was a problem extracting the AWS credentials '\
+                          'from the credentials file provided: %s. Error:\n%s'\
+                          % (creds_path, exc)
+                raise Exception(err_msg)
+            # Init connection
+            iflogger.info('Connecting to S3 bucket: %s with credentials from '\
+                          '%s ...' % (bucket_name, creds_path))
+            # Use individual session for each instance of DataSink
+            # Better when datasinks are being used in multi-threading, see:
+            # http://boto3.readthedocs.org/en/latest/guide/resources.html#multithreading
+            session = boto3.session.Session(aws_access_key_id=aws_access_key_id,
+                                            aws_secret_access_key=aws_secret_access_key)
+            s3_resource = session.resource('s3', use_ssl=True)
+
+        # Otherwise, connect anonymously
+        else:
+            iflogger.info('Connecting to AWS: %s anonymously...'\
+                          % bucket_name)
+            session = boto3.session.Session()
+            s3_resource = session.resource('s3', use_ssl=True)
+            s3_resource.meta.client.meta.events.register('choose-signer.s3.*',
+                                                         botocore.handlers.disable_signing)
+
+        # Explicitly declare a secure SSL connection for bucket object
+        bucket = s3_resource.Bucket(bucket_name)
+
+        # And try fetch the bucket with the name argument
+        try:
+            s3_resource.meta.client.head_bucket(Bucket=bucket_name)
+        except botocore.exceptions.ClientError as exc:
+            error_code = int(exc.response['Error']['Code'])
+            if error_code == 403:
+                err_msg = 'Access to bucket: %s is denied; check credentials'\
+                          % bucket_name
+                raise Exception(err_msg)
+            elif error_code == 404:
+                err_msg = 'Bucket: %s does not exist; check spelling and try '\
+                          'again' % bucket_name
+                raise Exception(err_msg)
+            else:
+                err_msg = 'Unable to connect to bucket: %s. Error message:\n%s'\
+                          % (bucket_name, exc)
+        except Exception as exc:
+            err_msg = 'Unable to connect to bucket: %s. Error message:\n%s'\
+                      % (bucket_name, exc)
+            raise Exception(err_msg)
+
+        # Return the bucket
+        return bucket
+
+    # Send up to S3 method
+    def _upload_to_s3(self, src, dst):
+        '''
+        Method to upload outputs to S3 bucket instead of on local disk
+        '''
+
+        # Import packages
+        import logging
+        import os
+
+        # Init variables
+        bucket = self.bucket
+        iflogger = logging.getLogger('interface')
+        s3_str = 's3://'
+        s3_prefix = os.path.join(s3_str, bucket.name)
+
+        # If src is a directory, collect files (this assumes dst is a dir too)
+        if os.path.isdir(src):
+            src_files = []
+            for root, dirs, files in os.walk(src):
+                src_files.extend([os.path.join(root, fil) for fil in files])
+            # Make the dst files have the dst folder as base dir
+            dst_files = [os.path.join(dst, src_f.split(src)[1]) \
+                         for src_f in src_files]
+        else:
+            src_files = [src]
+            dst_files = [dst]
+
+        # Iterate over src and copy to dst
+        for src_idx, src_f in enumerate(src_files):
+            # Get destination filename/keyname
+            dst_f = dst_files[src_idx]
+            dst_k = dst_f.replace(s3_prefix, '').lstrip('/')
+
+            # Copy file up to S3 (either encrypted or not)
+            iflogger.info('Copying %s to S3 bucket, %s, as %s...'\
+                          % (src_f, bucket.name, dst_f))
+            if self.inputs.encrypt_bucket_keys:
+                extra_args = {'ServerSideEncryption' : 'AES256'}
+            else:
+                extra_args = {}
+            bucket.upload_file(src_f, dst_k, ExtraArgs=extra_args,
+                               Callback=ProgressPercentage(src_f))
+
+    # List outputs, main run routine
     def _list_outputs(self):
         """Execute this module.
         """
+
+        # Init variables
+        iflogger = logging.getLogger('interface')
         outputs = self.output_spec().get()
         out_files = []
         outdir = self.inputs.base_directory
+        use_hardlink = str2bool(config.get('execution',
+                                               'try_hard_link_datasink'))
+
+        # If base directory isn't given, assume current directory
         if not isdefined(outdir):
             outdir = '.'
-        outdir = os.path.abspath(outdir)
+
+        # Check if base directory reflects S3-bucket upload
+        s3_flag = self._check_s3_base_dir()
+        if not s3_flag:
+            outdir = os.path.abspath(outdir)
+
+        # If container input is given, append that to outdir
         if isdefined(self.inputs.container):
             outdir = os.path.join(outdir, self.inputs.container)
+        # Create the directory if it doesn't exist
         if not os.path.exists(outdir):
             try:
                 os.makedirs(outdir)
@@ -316,8 +617,8 @@ def _list_outputs(self):
                     pass
                 else:
                     raise(inst)
-        use_hardlink = str2bool(config.get('execution',
-                                           'try_hard_link_datasink') )
+
+        # Iterate through outputs attributes {key : path(s)}
         for key, files in self.inputs._outputs.items():
             if not isdefined(files):
                 continue
@@ -334,44 +635,49 @@ def _list_outputs(self):
                 if isinstance(files[0], list):
                     files = [item for sublist in files for item in sublist]
 
+            # Iterate through passed-in source files
             for src in filename_to_list(files):
+                # Format src and dst files
                 src = os.path.abspath(src)
-                if os.path.isfile(src):
-                    dst = self._get_dst(src)
-                    dst = os.path.join(tempoutdir, dst)
-                    dst = self._substitute(dst)
-                    path, _ = os.path.split(dst)
-                    if not os.path.exists(path):
-                        try:
-                            os.makedirs(path)
-                        except OSError, inst:
-                            if 'File exists' in inst:
-                                pass
-                            else:
-                                raise(inst)
-                    iflogger.debug("copyfile: %s %s" % (src, dst))
-                    copyfile(src, dst, copy=True, hashmethod='content',
-                             use_hardlink=use_hardlink)
-                    out_files.append(dst)
-                elif os.path.isdir(src):
-                    dst = self._get_dst(os.path.join(src, ''))
-                    dst = os.path.join(tempoutdir, dst)
-                    dst = self._substitute(dst)
-                    path, _ = os.path.split(dst)
-                    if not os.path.exists(path):
-                        try:
-                            os.makedirs(path)
-                        except OSError, inst:
-                            if 'File exists' in inst:
-                                pass
-                            else:
-                                raise(inst)
-                    if os.path.exists(dst) and self.inputs.remove_dest_dir:
-                        iflogger.debug("removing: %s" % dst)
-                        shutil.rmtree(dst)
-                    iflogger.debug("copydir: %s %s" % (src, dst))
-                    copytree(src, dst)
+                if not os.path.isfile(src):
+                    src = os.path.join(src, '')
+                dst = self._get_dst(src)
+                dst = os.path.join(tempoutdir, dst)
+                dst = self._substitute(dst)
+                path, _ = os.path.split(dst)
+
+                # Create output directory if it doesnt exist
+                if not os.path.exists(path):
+                    try:
+                        os.makedirs(path)
+                    except OSError, inst:
+                        if 'File exists' in inst:
+                            pass
+                        else:
+                            raise(inst)
+
+                # If we're uploading to S3
+                if s3_flag:
+                    self._upload_to_s3(src, dst)
                     out_files.append(dst)
+                # Otherwise, copy locally src -> dst
+                else:
+                    # If src is a file, copy it to dst
+                    if os.path.isfile(src):
+                        iflogger.debug('copyfile: %s %s' % (src, dst))
+                        copyfile(src, dst, copy=True, hashmethod='content',
+                                 use_hardlink=use_hardlink)
+                        out_files.append(dst)
+                    # If src is a directory, copy entire contents to dst dir
+                    elif os.path.isdir(src):
+                        if os.path.exists(dst) and self.inputs.remove_dest_dir:
+                            iflogger.debug('removing: %s' % dst)
+                            shutil.rmtree(dst)
+                        iflogger.debug('copydir: %s %s' % (src, dst))
+                        copytree(src, dst)
+                        out_files.append(dst)
+
+        # Return outputs dictionary
         outputs['out_file'] = out_files
 
         return outputs

From 4b0255815933a8e1c94b4b5a7de16a345bc48783 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Tue, 6 Oct 2015 15:58:33 -0400
Subject: [PATCH 07/45] Removed redundant imports

---
 nipype/interfaces/io.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index b35765c62e..71bce269f6 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -183,7 +183,7 @@ class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
         desc='Folder within base directory in which to store output')
     parameterization = traits.Bool(True, usedefault=True,
                                    desc='store output in parametrized structure')
-    strip_dir = traits.Directory(desc='path to strip out of filename')
+    strip_dir = Directory(desc='path to strip out of filename')
     substitutions = InputMultiPath(traits.Tuple(traits.Str, traits.Str),
                                    desc=('List of 2-tuples reflecting string '
                                          'to substitute and string to replace '
@@ -206,10 +206,9 @@ class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
 
     # Set call-able inputs attributes
     def __setattr__(self, key, value):
-        import nipype.interfaces.traits_extension as nit
 
         if key not in self.copyable_trait_names():
-            if not nit.isdefined(value):
+            if not isdefined(value):
                 super(DataSinkInputSpec, self).__setattr__(key, value)
             self._outputs[key] = value
         else:

From 42f0b1b2e50673be3044fdc055c03d1ef181183d Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Tue, 6 Oct 2015 16:01:58 -0400
Subject: [PATCH 08/45] Quick cosmetic fix

---
 nipype/interfaces/io.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 71bce269f6..3137836642 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -592,8 +592,7 @@ def _list_outputs(self):
         outputs = self.output_spec().get()
         out_files = []
         outdir = self.inputs.base_directory
-        use_hardlink = str2bool(config.get('execution',
-                                               'try_hard_link_datasink'))
+        use_hardlink = str2bool(config.get('execution', 'try_hard_link_datasink'))
 
         # If base directory isn't given, assume current directory
         if not isdefined(outdir):

From 872e7529e9bbc3daf8b38a9cfa68a284813db9fd Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Wed, 7 Oct 2015 12:52:34 -0400
Subject: [PATCH 09/45] scheduler does not sleep

---
 nipype/pipeline/plugins/base.py               | 10 ++++++++-
 nipype/pipeline/plugins/multiproc.py          | 22 +++++++++++++------
 .../pipeline/plugins/semaphore_singleton.py   |  3 +++
 3 files changed, 27 insertions(+), 8 deletions(-)
 create mode 100644 nipype/pipeline/plugins/semaphore_singleton.py

diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py
index b987dcfc13..0ad681e1f7 100644
--- a/nipype/pipeline/plugins/base.py
+++ b/nipype/pipeline/plugins/base.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 import scipy.sparse as ssp
+import semaphore_singleton
 
 
 from ..utils import (nx, dfs_preorder, topological_sort)
@@ -261,9 +262,16 @@ def run(self, graph, config, updatehash=False):
                                             graph=graph)
             else:
                 logger.debug('Not submitting')
-            sleep(float(self._config['execution']['poll_sleep_duration']))
+
+            print 'locking semaphore'
+            print 'pending tasks:', len(self.pending_tasks)
+            if len(self.pending_tasks) > 0:
+                semaphore_singleton.semaphore.acquire()
+            print 'semaphore was released'
+            #sleep(float(self._config['execution']['poll_sleep_duration']))
         self._remove_node_dirs()
         report_nodes_not_run(notrun)
+    semaphore_singleton.semaphore.release()
 
     def _get_result(self, taskid):
         raise NotImplementedError
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 88c843ddab..5f91ab9e19 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -11,6 +11,9 @@
 import sys
 
 from .base import (DistributedPluginBase, report_crash)
+import semaphore_singleton
+
+
 
 def run_node(node, updatehash):
     result = dict(result=None, traceback=None)
@@ -22,6 +25,13 @@ def run_node(node, updatehash):
         result['result'] = node.result
     return result
 
+
+
+def release_lock(args):
+    print 'releasing semaphore'
+    semaphore_singleton.semaphore.release()
+
+
 class NonDaemonProcess(Process):
     """A non-daemon process to support internal multiprocessing.
     """
@@ -66,6 +76,7 @@ def __init__(self, plugin_args=None):
         else:
             self.pool = Pool(processes=n_procs)
 
+
     def _get_result(self, taskid):
         if taskid not in self._taskresult:
             raise RuntimeError('Multiproc task %d not found'%taskid)
@@ -80,9 +91,8 @@ def _submit_job(self, node, updatehash=False):
                 node.inputs.terminal_output = 'allatonce'
         except:
             pass
-        self._taskresult[self._taskid] = self.pool.apply_async(run_node,
-                                                               (node,
-                                                                updatehash,))
+        self._taskresult[self._taskid] = self.pool.apply_async(run_node, (node,
+                                                                updatehash,), callback=release_lock)
         return self._taskid
 
     def _report_crash(self, node, result=None):
@@ -114,17 +124,15 @@ class ResourceMultiProcPlugin(MultiProcPlugin):
     def __init__(self, plugin_args=None):
         super(ResourceMultiProcPlugin, self).__init__(plugin_args=plugin_args)
         self.plugin_args = plugin_args
-        self.current_time = datetime.datetime.now()
-        self.log_nodes = []
 
     def _send_procs_to_workers(self, updatehash=False, graph=None):
         """ Sends jobs to workers when system resources are available.
-            Check memory and cores usage before running jobs.
+            Check memory (mb) and cores usage before running jobs.
         """
         executing_now = []
         processors = cpu_count()
         memory = psutil.virtual_memory()
-        memory = memory.total
+        memory = memory.total / (1024*1024)
         if self.plugin_args:
             if 'n_procs' in self.plugin_args:
                 processors = self.plugin_args['n_procs']
diff --git a/nipype/pipeline/plugins/semaphore_singleton.py b/nipype/pipeline/plugins/semaphore_singleton.py
new file mode 100644
index 0000000000..b5b3ca79b9
--- /dev/null
+++ b/nipype/pipeline/plugins/semaphore_singleton.py
@@ -0,0 +1,3 @@
+print 'calling semaphore'
+import threading
+semaphore = threading.Semaphore(1)
\ No newline at end of file

From e465c281783ed165da01b5d3fc10615b76eaab2b Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Thu, 8 Oct 2015 11:30:10 -0400
Subject: [PATCH 10/45] clean code

---
 nipype/pipeline/plugins/base.py         | 16 +++----
 nipype/pipeline/plugins/callback_log.py | 13 +++++-
 nipype/pipeline/plugins/multiproc.py    | 56 +++++++++++++++++++------
 3 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py
index 0ad681e1f7..cee2c7dad5 100644
--- a/nipype/pipeline/plugins/base.py
+++ b/nipype/pipeline/plugins/base.py
@@ -16,8 +16,6 @@
 
 import numpy as np
 import scipy.sparse as ssp
-import semaphore_singleton
-
 
 from ..utils import (nx, dfs_preorder, topological_sort)
 from ..engine import (MapNode, str2bool)
@@ -262,16 +260,14 @@ def run(self, graph, config, updatehash=False):
                                             graph=graph)
             else:
                 logger.debug('Not submitting')
-
-            print 'locking semaphore'
-            print 'pending tasks:', len(self.pending_tasks)
-            if len(self.pending_tasks) > 0:
-                semaphore_singleton.semaphore.acquire()
-            print 'semaphore was released'
-            #sleep(float(self._config['execution']['poll_sleep_duration']))
+            self._wait()
         self._remove_node_dirs()
         report_nodes_not_run(notrun)
-    semaphore_singleton.semaphore.release()
+
+
+
+    def _wait(self):
+        sleep(float(self._config['execution']['poll_sleep_duration']))
 
     def _get_result(self, taskid):
         raise NotImplementedError
diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index 52e2621f60..9ca38027c1 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -4,10 +4,18 @@
 def log_nodes_cb(node, status):
     logger = logging.getLogger('callback')
     if status == 'start':
-        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) + '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' + str(node._interface.num_threads) +  '}'
+        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\
+        node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\
+        '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
+        + str(node._interface.num_threads) +  '}'
+
         logger.debug(message)
     else:
-        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) + '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' + str(node._interface.num_threads) +  '}'
+        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
+        node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
+        '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
+        + str(node._interface.num_threads) +  '}'
+        
         logger.debug(message)
 
 
@@ -34,6 +42,7 @@ def convert_logcb_to_json(filename):
 
             if end is not None:
                 element['finish'] = end['finish']
+                element['duration'] = (parser.parse(element['finish']) - parser.parse(element['start'])).total_seconds()
             else:
                 element['finish'] = element['start']
 
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 5f91ab9e19..53272aafc4 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -11,8 +11,6 @@
 import sys
 
 from .base import (DistributedPluginBase, report_crash)
-import semaphore_singleton
-
 
 
 def run_node(node, updatehash):
@@ -26,12 +24,6 @@ def run_node(node, updatehash):
     return result
 
 
-
-def release_lock(args):
-    print 'releasing semaphore'
-    semaphore_singleton.semaphore.release()
-
-
 class NonDaemonProcess(Process):
     """A non-daemon process to support internal multiprocessing.
     """
@@ -92,7 +84,7 @@ def _submit_job(self, node, updatehash=False):
         except:
             pass
         self._taskresult[self._taskid] = self.pool.apply_async(run_node, (node,
-                                                                updatehash,), callback=release_lock)
+                                                                updatehash,))
         return self._taskid
 
     def _report_crash(self, node, result=None):
@@ -109,25 +101,63 @@ def _clear_task(self, taskid):
 
 
 
-
-
 import numpy as np
 from copy import deepcopy
 from ..engine import (MapNode, str2bool)
 import datetime
 import psutil
 from ... import logging
+import semaphore_singleton
 logger = logging.getLogger('workflow')
 
+def release_lock(args):
+    semaphore_singleton.semaphore.release()
+
 class ResourceMultiProcPlugin(MultiProcPlugin):
+    """Execute workflow with multiprocessing not sending more jobs at once
+    than the system can support.
+
+    The plugin_args input to run can be used to control the multiprocessing
+    execution and defining the maximum amount of memory and threads that 
+    should be used.
+    System consuming nodes should be tagged:
+    memory_consuming_node.interface.memory = 8 #Gb
+    thread_consuming_node.interface.num_threads = 16
+
+    The default number of threads and memory for a node is 1. 
+
+    Currently supported options are:
+
+    - num_thread: maximum number of threads to be executed in parallel
+    - memory: maximum memory that can be used at once.
+
+    """
 
     def __init__(self, plugin_args=None):
         super(ResourceMultiProcPlugin, self).__init__(plugin_args=plugin_args)
         self.plugin_args = plugin_args
 
+    def _wait(self):
+        if len(self.pending_tasks) > 0:
+            semaphore_singleton.semaphore.acquire()
+        else:
+            semaphore_singleton.semaphore.release()
+
+
+    def _submit_job(self, node, updatehash=False):
+        self._taskid += 1
+        try:
+            if node.inputs.terminal_output == 'stream':
+                node.inputs.terminal_output = 'allatonce'
+        except:
+            pass
+        self._taskresult[self._taskid] = self.pool.apply_async(run_node, (node,
+                                                                updatehash,), callback=release_lock)
+        return self._taskid
+
     def _send_procs_to_workers(self, updatehash=False, graph=None):
         """ Sends jobs to workers when system resources are available.
-            Check memory (mb) and cores usage before running jobs.
+            Check memory (gb) and cores usage before running jobs.
         """
         executing_now = []
         processors = cpu_count()
@@ -222,7 +252,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                     self._remove_node_dirs()
 
                 else:
-                    print('submitting', jobid)
+                    logger.debug('submitting', jobid)
                     tid = self._submit_job(deepcopy(self.procs[jobid]), updatehash=updatehash)
                     if tid is None:
                         self.proc_done[jobid] = False

From e49965ca84b39441addfcec00ae9deaf900973e6 Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Thu, 8 Oct 2015 14:19:00 -0400
Subject: [PATCH 11/45] draw gant chart, small fixes

---
 nipype/pipeline/plugins/callback_log.py     |  42 +-----
 nipype/pipeline/plugins/draw_gantt_chart.py | 148 ++++++++++++++++++++
 nipype/pipeline/plugins/multiproc.py        |   6 +-
 3 files changed, 153 insertions(+), 43 deletions(-)
 create mode 100644 nipype/pipeline/plugins/draw_gantt_chart.py

diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index 9ca38027c1..c78356081d 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -15,45 +15,5 @@ def log_nodes_cb(node, status):
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
         '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
         + str(node._interface.num_threads) +  '}'
-        
-        logger.debug(message)
-
-
-
-import json
-from dateutil import parser
-
-def convert_logcb_to_json(filename):
-    with open(filename, 'r') as content:
-        #read file separating each line
-        content = content.read()
-        lines = content.split('\n')
-
-        #separate lines of starting nodes and ending nodes
-        starts = [ json.loads(x) for x in lines if '"start":' in x ]
-        ends = [json.loads(x) for x in lines if '"finish":' in x ]
-
-
-
-        #foreach start, search its end. They have same name and id
-        #this line is O(n^2). refactor
-        for element in starts:
-            end = next((f for f in ends if (f['id'] == element['id'] and  f['name'] == element['name'])), None)
-
-            if end is not None:
-                element['finish'] = end['finish']
-                element['duration'] = (parser.parse(element['finish']) - parser.parse(element['start'])).total_seconds()
-            else:
-                element['finish'] = element['start']
-
-
-        first_node = starts[0]['start']
-        last_node = ends[-1]['finish']
-
-        duration = parser.parse(last_node) - parser.parse(first_node)
 
-        #sorted(starts, key=lambda e: parser.parse(e['start']))   # sort by age
-        result = {'start': first_node, 'finish': last_node, 'duration':duration.total_seconds(), 'nodes': starts}
-        #finally, save the json file
-        with open(filename + '.json', 'w') as outfile:
-            json.dump(result, outfile)
\ No newline at end of file
+        logger.debug(message)
\ No newline at end of file
diff --git a/nipype/pipeline/plugins/draw_gantt_chart.py b/nipype/pipeline/plugins/draw_gantt_chart.py
new file mode 100644
index 0000000000..0478f88639
--- /dev/null
+++ b/nipype/pipeline/plugins/draw_gantt_chart.py
@@ -0,0 +1,148 @@
+import json
+from dateutil import parser
+import datetime
+import random
+
+
+def log_to_json(logfile):
+    result = []
+    with open(logfile, 'r') as content:
+
+            #read file separating each line
+            content = content.read()
+            lines = content.split('\n')
+
+            lines = [ json.loads(x) for x in lines[:-1]]
+
+            last_node = [ x for x in lines if x.has_key('finish')][-1]
+
+            for i, line in enumerate(lines):
+                #get first start it finds
+                if not line.has_key('start'):
+                    continue
+
+                #fint the end node for that start
+                for j in range(i+1, len(lines)):
+                    if lines[j].has_key('finish'):
+                        if lines[j]['id'] == line['id'] and lines[j]['name'] == line['name']:
+                            line['finish'] = lines[j]['finish']
+                            line['duration'] = (parser.parse(line['finish']) - parser.parse(line['start'])).total_seconds()
+                            result.append(line)
+                            break
+
+    return result, last_node
+
+
+#total duration in seconds
+def draw_lines(start, total_duration, minute_scale, scale):
+    result = ''
+    next_line = 220
+    next_time = start;
+    num_lines = int((total_duration/60) / minute_scale) +2;
+
+    for i in range(num_lines):
+        new_line = "<hr class='line' width='100%' style='top:"+ str(next_line) + "px;'>"
+        result += new_line
+
+        time = "<p class='time' style='top:" + str(next_line - 20) + "px;'> " + str(next_time.hour) + ':' + str(next_time.minute) + " </p>";
+        result += time
+
+        next_line += minute_scale * scale
+        next_time += datetime.timedelta(minutes=minute_scale)
+    return result
+
+def draw_nodes(start, nodes, cores, scale, colors):
+    result = ''
+    end_times = [datetime.datetime(start.year, start.month, start.day, start.hour, start.minute, start.second) for x in range(cores)]
+
+    for node in nodes:
+        node_start = parser.parse(node['start'])
+        node_finish = parser.parse(node['finish'])
+        offset = ((node_start - start).total_seconds() / 60) * scale + 220
+        scale_duration = (node['duration'] / 60) * scale
+        if scale_duration < 5:
+            scale_duration = 5
+
+        scale_duration -= 2
+        left = 60
+        for j in range(len(end_times)):
+            if end_times[j] < node_start:
+                left += j * 30
+                end_times[j] = datetime.datetime(node_finish.year, node_finish.month, node_finish.day, node_finish.hour, node_finish.minute, node_finish.second)
+                #end_times[j]+=  datetime.timedelta(microseconds=node_finish.microsecond)
+                break
+
+        color = random.choice(colors)
+        new_node = "<div class='node' style=' left:" + str(left) + "px;top: " + str(offset) + "px;height:" + str(scale_duration) + "px; background-color: " + color  + " 'title='" + node['name'] +'\nduration: ' + str(node['duration']/60) + '\nstart: ' + node['start'] + '\nend: ' + node['finish'] + "'></div>";
+        result += new_node
+    return result
+
+
+'''
+Generates a gantt chart in html showing the workflow execution based on a callback log file.
+This script was intended to be used with the ResourceMultiprocPlugin.
+The following code shows how to set up the workflow in order to generate the log file:
+
+# import logging
+# import logging.handlers
+# from nipype.pipeline.plugins.callback_log import log_nodes_cb
+
+# log_filename = 'callback.log'
+# logger = logging.getLogger('callback')
+# logger.setLevel(logging.DEBUG)
+# handler = logging.FileHandler(log_filename)
+# logger.addHandler(handler)
+
+# #create workflow
+# workflow = ...
+
+# workflow.run(plugin='ResourceMultiProc',  
+#     plugin_args={'num_threads':8, 'memory':12, 'status_callback': log_nodes_cb})
+
+# generate_gantt_chart('callback.log', 8)
+'''
+def generate_gantt_chart(logfile, cores, minute_scale=10, space_between_minutes=50, colors=["#7070FF", "#4E4EB2", "#2D2D66", "#9B9BFF"]):
+
+    result, last_node = log_to_json(logfile)
+    scale = space_between_minutes 
+
+    #add the html header
+    html_string = '''<!DOCTYPE html>
+    <head>
+        <link rel="stylesheet" type="text/css" href="style.css">
+    </head>
+
+    <body>
+        <div id="content">'''
+
+
+    #create the header of the report with useful information
+    start = parser.parse(result[0]['start'])
+    duration = int((parser.parse(last_node['finish']) - start).total_seconds())
+
+    html_string += '<p>Start: '+ result[0]['start'] +'</p>'
+    html_string += '<p>Finish: '+ last_node['finish'] +'</p>'
+    html_string += '<p>Duration: '+ str(duration/60) +' minutes</p>'
+    html_string += '<p>Nodes: '+str(len(result))+'</p>'
+    html_string += '<p>Cores: '+str(cores)+'</p>'
+
+
+    #draw lines
+    html_string += draw_lines(start, duration, minute_scale, scale)
+
+    #draw nodes
+    html_string += draw_nodes(start, result, cores, scale, colors)
+
+
+    #finish html
+    html_string+= '''
+        </div>
+    </body>'''
+
+    #save file
+    html_file = open(logfile +'.html', 'wb')
+    html_file.write(html_string)
+    html_file.close()
+
+
+generate_gantt_chart('/home/caroline/Desktop/callback.log', 8)
\ No newline at end of file
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 53272aafc4..8f14825ddd 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -119,7 +119,9 @@ class ResourceMultiProcPlugin(MultiProcPlugin):
 
     The plugin_args input to run can be used to control the multiprocessing
     execution and defining the maximum amount of memory and threads that 
-    should be used.
+    should be used. When those parameters are not specified,
+    the number of threads and memory of the system is used.
+
     System consuming nodes should be tagged:
     memory_consuming_node.interface.memory = 8 #Gb
     thread_consuming_node.interface.num_threads = 16
@@ -162,7 +164,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
         executing_now = []
         processors = cpu_count()
         memory = psutil.virtual_memory()
-        memory = memory.total / (1024*1024)
+        memory = memory.total / (1024*1024*1024)
         if self.plugin_args:
             if 'n_procs' in self.plugin_args:
                 processors = self.plugin_args['n_procs']

From 34acdf8f8a3285ec80c1aa9def49a30e1eaa9702 Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Thu, 8 Oct 2015 17:06:41 -0400
Subject: [PATCH 12/45] add memory and thread to gantt chart, callback handles
 errors

---
 nipype/pipeline/plugins/callback_log.py       | 12 +++++++++-
 nipype/pipeline/plugins/multiproc.py          | 23 +++++++++----------
 .../plugins => utils}/draw_gantt_chart.py     |  5 +---
 3 files changed, 23 insertions(+), 17 deletions(-)
 rename nipype/{pipeline/plugins => utils}/draw_gantt_chart.py (98%)

diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index c78356081d..34952864b7 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -2,6 +2,7 @@
 import logging
 
 def log_nodes_cb(node, status):
+    print 'status', status
     logger = logging.getLogger('callback')
     if status == 'start':
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\
@@ -10,10 +11,19 @@ def log_nodes_cb(node, status):
         + str(node._interface.num_threads) +  '}'
 
         logger.debug(message)
-    else:
+
+    elif status == 'end':
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
         '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
         + str(node._interface.num_threads) +  '}'
 
+        logger.debug(message)
+
+    else:
+        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
+        node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
+        '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
+        + str(node._interface.num_threads) + ',"error":"True"}'
+
         logger.debug(message)
\ No newline at end of file
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 8f14825ddd..8d66be6999 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -138,12 +138,19 @@ class ResourceMultiProcPlugin(MultiProcPlugin):
     def __init__(self, plugin_args=None):
         super(ResourceMultiProcPlugin, self).__init__(plugin_args=plugin_args)
         self.plugin_args = plugin_args
+        self.processors = cpu_count()
+        memory = psutil.virtual_memory()
+        self.memory = memory.total / (1024*1024*1024)
+        if self.plugin_args:
+            if 'n_procs' in self.plugin_args:
+                self.processors = self.plugin_args['n_procs']
+            if 'memory' in self.plugin_args:
+                self.memory = self.plugin_args['memory']
 
     def _wait(self):
         if len(self.pending_tasks) > 0:
             semaphore_singleton.semaphore.acquire()
-        else:
-            semaphore_singleton.semaphore.release()
+        semaphore_singleton.semaphore.release()
 
 
     def _submit_job(self, node, updatehash=False):
@@ -162,14 +169,6 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
             Check memory (gb) and cores usage before running jobs.
         """
         executing_now = []
-        processors = cpu_count()
-        memory = psutil.virtual_memory()
-        memory = memory.total / (1024*1024*1024)
-        if self.plugin_args:
-            if 'n_procs' in self.plugin_args:
-                processors = self.plugin_args['n_procs']
-            if 'memory' in self.plugin_args:
-                memory = self.plugin_args['memory']
 
         # Check to see if a job is available
         jobids = np.flatnonzero((self.proc_pending == True) & (self.depidx.sum(axis=0) == 0).__array__())
@@ -181,8 +180,8 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
             busy_memory+= self.procs[jobid]._interface.memory
             busy_processors+= self.procs[jobid]._interface.num_threads
                 
-        free_memory = memory - busy_memory
-        free_processors = processors - busy_processors
+        free_memory = self.memory - busy_memory
+        free_processors = self.processors - busy_processors
 
 
         #check all jobs without dependency not run
diff --git a/nipype/pipeline/plugins/draw_gantt_chart.py b/nipype/utils/draw_gantt_chart.py
similarity index 98%
rename from nipype/pipeline/plugins/draw_gantt_chart.py
rename to nipype/utils/draw_gantt_chart.py
index 0478f88639..85ae66ddb8 100644
--- a/nipype/pipeline/plugins/draw_gantt_chart.py
+++ b/nipype/utils/draw_gantt_chart.py
@@ -142,7 +142,4 @@ def generate_gantt_chart(logfile, cores, minute_scale=10, space_between_minutes=
     #save file
     html_file = open(logfile +'.html', 'wb')
     html_file.write(html_string)
-    html_file.close()
-
-
-generate_gantt_chart('/home/caroline/Desktop/callback.log', 8)
\ No newline at end of file
+    html_file.close()
\ No newline at end of file

From c9c92ef9181ae285178d4dd949af3cf360e17235 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Thu, 8 Oct 2015 18:09:47 -0400
Subject: [PATCH 13/45] Added handling of DataSink to save to a local directory
 if it cant access S3

---
 nipype/interfaces/io.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 3137836642..97812373b2 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -400,8 +400,7 @@ def _check_s3_base_dir(self):
             except Exception as exc:
                 err_msg = 'Unable to access S3 bucket. Error:\n%s. Exiting...'\
                           % exc
-                print err_msg
-                sys.exit()
+                raise Exception(err_msg)
             # Bucket access was a success, set flag
             s3_flag = True
         # Otherwise it's just a normal datasink
@@ -599,7 +598,18 @@ def _list_outputs(self):
             outdir = '.'
 
         # Check if base directory reflects S3-bucket upload
-        s3_flag = self._check_s3_base_dir()
+        try:
+            s3_flag = self._check_s3_base_dir()
+        # If encountering an exception during bucket access, set output
+        # base directory to a local folder
+        except Exception as exc:
+            local_out_exception = os.path.join(os.path.expanduser('~'),
+                                               'data_output')
+            iflogger.info('Access to S3 failed! Storing outputs locally at: '\
+                          '%s\nError: %s' %(local_out_exception, exc))
+            self.inputs.base_directory = local_out_exception
+
+        # If not accessing S3, just set outdir to local absolute path 
         if not s3_flag:
             outdir = os.path.abspath(outdir)
 

From cb07b5ab4d1079ff759989d7c6443d852874954a Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Fri, 9 Oct 2015 11:01:54 -0400
Subject: [PATCH 14/45] add tests

---
 .../pipeline/plugins/semaphore_singleton.py   |   1 -
 .../pipeline/plugins/tests/test_multiproc.py  |  52 ++++----
 nipype/utils/draw_gantt_chart.py              | 118 +++++++++++++++++-
 3 files changed, 144 insertions(+), 27 deletions(-)

diff --git a/nipype/pipeline/plugins/semaphore_singleton.py b/nipype/pipeline/plugins/semaphore_singleton.py
index b5b3ca79b9..8894615a14 100644
--- a/nipype/pipeline/plugins/semaphore_singleton.py
+++ b/nipype/pipeline/plugins/semaphore_singleton.py
@@ -1,3 +1,2 @@
-print 'calling semaphore'
 import threading
 semaphore = threading.Semaphore(1)
\ No newline at end of file
diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py
index 645d3bc567..d7e6b1661a 100644
--- a/nipype/pipeline/plugins/tests/test_multiproc.py
+++ b/nipype/pipeline/plugins/tests/test_multiproc.py
@@ -26,7 +26,7 @@ def _list_outputs(self):
         outputs['output1'] = [1, self.inputs.input1]
         return outputs
 
-def test_run_multiproc():
+def run_multiproc():
     cur_dir = os.getcwd()
     temp_dir = mkdtemp(prefix='test_engine_')
     os.chdir(temp_dir)
@@ -49,8 +49,7 @@ def test_run_multiproc():
     rmtree(temp_dir)
 
 
-
-#################################
+################################
 
 
 class InputSpecSingleNode(nib.TraitedSpec):
@@ -75,15 +74,15 @@ def _list_outputs(self):
         return outputs
 
 
-def parse_log(filename, measure):
+def find_metrics(nodes, last_node):
     import json
     from dateutil.parser import parse
     from datetime import datetime
     import datetime as d
 
-    json_data = open(filename).read()
-    data = json.loads(json_data)
-    total_duration = int(float(data['duration'])) #total duration in seconds
+
+    start = parse(nodes[0]['start'])
+    total_duration = int((parse(last_node['finish']) - start).total_seconds())
 
     total_memory = []
     total_threads = []
@@ -91,7 +90,7 @@ def parse_log(filename, measure):
         total_memory.append(0)
         total_threads.append(0)
 
-    now = parse(data['start'])
+    now = start
     for i in range(total_duration):
         start_index = 0
         node_start = None
@@ -99,13 +98,13 @@ def parse_log(filename, measure):
 
         x = now
 
-        for j in range(start_index, len(data['nodes'])):
-            node_start = parse(data['nodes'][j]['start'])
-            node_finish = parse(data['nodes'][j]['finish'])
+        for j in range(start_index, len(nodes)):
+            node_start = parse(nodes[j]['start'])
+            node_finish = parse(nodes[j]['finish'])
 
             if node_start < x and node_finish > x:
-                total_memory[i] += data['nodes'][j]['memory']
-                total_threads[i] += data['nodes'][j]['num_threads']
+                total_memory[i] += nodes[j]['memory']
+                total_threads[i] += nodes[j]['num_threads']
                 start_index = j
 
             if node_start > x:
@@ -117,11 +116,14 @@ def parse_log(filename, measure):
 
 
 import os
-from nipype.pipeline.plugins.callback_log import log_nodes_cb, convert_logcb_to_json
+from nipype.pipeline.plugins.callback_log import log_nodes_cb
 import logging
 import logging.handlers
 import psutil
 from multiprocessing import cpu_count
+
+from nipype.utils import draw_gantt_chart
+
 def test_do_not_use_more_memory_then_specified():
     LOG_FILENAME = 'callback.log'
     my_logger = logging.getLogger('callback')
@@ -148,12 +150,14 @@ def test_do_not_use_more_memory_then_specified():
     pipe.connect(n2, 'output1', n4, 'input1')
     pipe.connect(n3, 'output1', n4, 'input2')
     n1.inputs.input1 = 10
-    pipe.config['execution']['poll_sleep_duration'] = 1
-    pipe.run(plugin='ResourceMultiProc', plugin_args={'memory': max_memory, 'status_callback': log_nodes_cb})
 
-    convert_logcb_to_json(LOG_FILENAME)
-    #memory usage in every second
-    memory, threads = parse_log(LOG_FILENAME + '.json' , 'memory')
+    pipe.run(plugin='ResourceMultiProc', plugin_args={'memory': max_memory, 
+                                        'status_callback': log_nodes_cb})
+
+
+    nodes, last_node = draw_gantt_chart.log_to_json(LOG_FILENAME)
+    #usage in every second
+    memory, threads = find_metrics(nodes, last_node)
 
     result = True
     for m in memory:
@@ -174,7 +178,6 @@ def test_do_not_use_more_memory_then_specified():
     yield assert_equal, result, True, "using more threads than system has (threads is not specified by user)"
 
     os.remove(LOG_FILENAME)
-    os.remove(LOG_FILENAME + '.json') 
 
 
 def test_do_not_use_more_threads_then_specified():
@@ -206,9 +209,9 @@ def test_do_not_use_more_threads_then_specified():
     pipe.config['execution']['poll_sleep_duration'] = 1
     pipe.run(plugin='ResourceMultiProc', plugin_args={'n_procs': max_threads, 'status_callback': log_nodes_cb})
 
-    convert_logcb_to_json(LOG_FILENAME)
-    #threads usage in every second
-    memory, threads = parse_log(LOG_FILENAME + '.json' , 'num_threads')
+    nodes, last_node = draw_gantt_chart.log_to_json(LOG_FILENAME)
+    #usage in every second
+    memory, threads = find_metrics(nodes, last_node)
 
     result = True
     for t in threads:
@@ -226,5 +229,4 @@ def test_do_not_use_more_threads_then_specified():
             break
     yield assert_equal, result, True, "using more memory than system has (memory is not specified by user)"
 
-    os.remove(LOG_FILENAME)
-    os.remove(LOG_FILENAME + '.json')
\ No newline at end of file
+    os.remove(LOG_FILENAME)
\ No newline at end of file
diff --git a/nipype/utils/draw_gantt_chart.py b/nipype/utils/draw_gantt_chart.py
index 85ae66ddb8..5adff16c3d 100644
--- a/nipype/utils/draw_gantt_chart.py
+++ b/nipype/utils/draw_gantt_chart.py
@@ -78,6 +78,79 @@ def draw_nodes(start, nodes, cores, scale, colors):
     return result
 
 
+def draw_thread_bar(start, total_duration, nodes, space_between_minutes, minute_scale):
+    result = "<p class='time' style='top:198px;left:900px;'>Threads</p>"
+
+    total = total_duration/60
+    thread = [0 for x in range(total)]
+
+    now = start
+
+    #calculate nuber of threads in every second
+    for i in range(total):
+        node_start = None
+        node_finish = None
+
+        for j in range(i, len(nodes)):
+            node_start = parser.parse(nodes[j]['start'])
+            node_finish = parser.parse(nodes[j]['finish'])
+
+            if node_start <= now and node_finish >= now:
+                thread[i] += nodes[j]['num_threads']
+            if node_start > now:
+                break
+        now += datetime.timedelta(minutes=1)
+
+
+    #draw thread bar
+    scale = float(space_between_minutes/float(minute_scale))
+
+    for i in range(len(thread)):
+        width = thread[i] * 10
+        t = (i*scale*minute_scale) + 220
+        bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:900px;top:"+str(t)+"px'></div>"
+        result += bar
+
+    return result
+
+
+
+def draw_memory_bar(start, total_duration, nodes, space_between_minutes, minute_scale):
+    result = "<p class='time' style='top:198px;left:1200px;'>Memory</p>"
+
+    total = total_duration/60
+    memory = [0 for x in range(total)]
+
+    now = start
+
+    #calculate nuber of threads in every second
+    for i in range(total):
+        node_start = None
+        node_finish = None
+
+        for j in range(i, len(nodes)):
+            node_start = parser.parse(nodes[j]['start'])
+            node_finish = parser.parse(nodes[j]['finish'])
+
+            if node_start <= now and node_finish >= now:
+                memory[i] += nodes[j]['memory']
+            if node_start > now:
+                break
+        now += datetime.timedelta(minutes=1)
+
+
+    #draw thread bar
+    scale = float(space_between_minutes/float(minute_scale))
+
+    for i in range(len(memory)):
+        width = memory[i] * 10
+        t = (i*scale*minute_scale) + 220
+        bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:1200px;top:"+str(t)+"px'></div>"
+        result += bar
+
+    return result
+
+
 '''
 Generates a gantt chart in html showing the workflow execution based on a callback log file.
 This script was intended to be used with the ResourceMultiprocPlugin.
@@ -109,7 +182,48 @@ def generate_gantt_chart(logfile, cores, minute_scale=10, space_between_minutes=
     #add the html header
     html_string = '''<!DOCTYPE html>
     <head>
-        <link rel="stylesheet" type="text/css" href="style.css">
+        <style>
+            #content{
+                width:100%;
+                height:100%;
+                position:absolute;
+            }
+
+            .node{
+                background-color:#7070FF;
+                border-radius: 5px;
+                position:absolute;
+                width:20px;
+                white-space:pre-wrap;
+            }
+
+            .line{
+                position: absolute;
+                color: #C2C2C2;
+                opacity: 0.5;
+                margin: 0px;
+            }
+
+            .time{
+                position: absolute;
+                font-size: 16px;
+                color: #666666;
+                margin: 0px;
+            }
+
+            .bar{
+                position: absolute;
+                background-color: #80E680;
+                height: 1px;
+            }
+
+            .dot{
+                position: absolute;
+                width: 1px;
+                height: 1px;
+                background-color: red;
+            }
+        </style>
     </head>
 
     <body>
@@ -133,6 +247,8 @@ def generate_gantt_chart(logfile, cores, minute_scale=10, space_between_minutes=
     #draw nodes
     html_string += draw_nodes(start, result, cores, scale, colors)
 
+    html_string += draw_thread_bar(start, duration, result, space_between_minutes, minute_scale)
+    html_string += draw_memory_bar(start, duration, result, space_between_minutes, minute_scale)
 
     #finish html
     html_string+= '''

From 827d2c2e6fd038f1c3104fc8e33af83c4e527e1d Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Fri, 9 Oct 2015 11:15:45 -0400
Subject: [PATCH 15/45] fix method name

---
 nipype/pipeline/plugins/tests/test_multiproc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py
index d7e6b1661a..5e841b78a3 100644
--- a/nipype/pipeline/plugins/tests/test_multiproc.py
+++ b/nipype/pipeline/plugins/tests/test_multiproc.py
@@ -26,7 +26,7 @@ def _list_outputs(self):
         outputs['output1'] = [1, self.inputs.input1]
         return outputs
 
-def run_multiproc():
+def test_run_multiproc():
     cur_dir = os.getcwd()
     temp_dir = mkdtemp(prefix='test_engine_')
     os.chdir(temp_dir)

From a8f8006d9dee7c94185b017bccefebbf72a63f77 Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Fri, 9 Oct 2015 15:36:11 -0400
Subject: [PATCH 16/45] fix typos

---
 nipype/interfaces/io.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 97812373b2..b979297f66 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -134,7 +134,7 @@ def _add_output_traits(self, base):
 # Class to track percentage of S3 file upload
 class ProgressPercentage(object):
     '''
-    Call-able class instsance (via __call__ method) that displays
+    Callable class instsance (via __call__ method) that displays
     upload percentage of a file to S3
     '''
 
@@ -367,7 +367,7 @@ def _substitute(self, pathstr):
     def _check_s3_base_dir(self):
         '''
         Method to see if the datasink's base directory specifies an
-        S3 bucket path; it it does, it parses the path for the bucket
+        S3 bucket path; if it does, it parses the path for the bucket
         name in the form 's3://bucket_name/...' and adds a bucket
         attribute to the data sink instance, i.e. self.bucket
 
@@ -451,7 +451,7 @@ def _return_aws_keys(self, creds_path):
     # Fetch bucket object
     def _fetch_bucket(self, bucket_name):
         '''
-        Method to a return a bucket object which can be used to interact
+        Method to return a bucket object which can be used to interact
         with an AWS S3 bucket using credentials found in a local file.
 
         Parameters

From 300d20c1a2b3ca7d3c0599349f9823c93907cf3e Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Thu, 15 Oct 2015 16:48:50 -0400
Subject: [PATCH 17/45] Update io.py

---
 nipype/interfaces/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index b979297f66..1d50062eb0 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -572,7 +572,7 @@ def _upload_to_s3(self, src, dst):
             dst_k = dst_f.replace(s3_prefix, '').lstrip('/')
 
             # Copy file up to S3 (either encrypted or not)
-            iflogger.info('Copying %s to S3 bucket, %s, as %s...'\
+            iflogger.info('Uploading %s to S3 bucket, %s, as %s...'\
                           % (src_f, bucket.name, dst_f))
             if self.inputs.encrypt_bucket_keys:
                 extra_args = {'ServerSideEncryption' : 'AES256'}

From 0503c23912d9d90b0010cf83b8594b1594f0d32b Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Thu, 15 Oct 2015 18:02:49 -0400
Subject: [PATCH 18/45] Added md5 checking for s3

---
 nipype/interfaces/io.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 97812373b2..ef4cc5aab0 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -544,9 +544,12 @@ def _upload_to_s3(self, src, dst):
         '''
 
         # Import packages
+        import hashlib
         import logging
         import os
 
+        from botocore.exceptions import ClientError
+
         # Init variables
         bucket = self.bucket
         iflogger = logging.getLogger('interface')
@@ -571,8 +574,25 @@ def _upload_to_s3(self, src, dst):
             dst_f = dst_files[src_idx]
             dst_k = dst_f.replace(s3_prefix, '').lstrip('/')
 
+            # See if same file is already up there
+            try:
+                dst_obj = bucket.Object(key=dst_k)
+                dst_md5 = dst_obj.e_tag.strip('"')
+
+                # See if same file is already there
+                src_read = open(src_f, 'rb').read()
+                src_md5 = hashlib.md5(src_read).hexdigest()
+                # Move to next loop iteration
+                if dst_md5 == src_md5:
+                    continue
+                else:
+                    iflogger.info('Overwriting previous S3 file...')
+
+            except ClientError as exc:
+                iflogger.info('New file to S3')
+
             # Copy file up to S3 (either encrypted or not)
-            iflogger.info('Copying %s to S3 bucket, %s, as %s...'\
+            iflogger.info('Uploading %s to S3 bucket, %s, as %s...'\
                           % (src_f, bucket.name, dst_f))
             if self.inputs.encrypt_bucket_keys:
                 extra_args = {'ServerSideEncryption' : 'AES256'}

From f6cfad76205bebbb8f96db88b790f3c5f6ddb158 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Thu, 15 Oct 2015 18:39:28 -0400
Subject: [PATCH 19/45] Added message about file already existsing

---
 nipype/interfaces/io.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 1f1396ae45..b5c2e9d1f5 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -584,6 +584,7 @@ def _upload_to_s3(self, src, dst):
                 src_md5 = hashlib.md5(src_read).hexdigest()
                 # Move to next loop iteration
                 if dst_md5 == src_md5:
+                    iflogger.info('File %s already exists on S3, skipping...' % dst_f)
                     continue
                 else:
                     iflogger.info('Overwriting previous S3 file...')

From 186d00a14abc7c4ff3cf3934d1f1763308ed0edd Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Wed, 21 Oct 2015 16:02:31 -0400
Subject: [PATCH 20/45] Fixed dive by 0 bug

---
 nipype/interfaces/io.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index b5c2e9d1f5..85f53b1cd1 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -162,7 +162,10 @@ def __call__(self, bytes_amount):
         # With the lock on, print upload status
         with self._lock:
             self._seen_so_far += bytes_amount
-            percentage = (self._seen_so_far / self._size) * 100
+            if self._size != 0:
+                percentage = (self._seen_so_far / self._size) * 100
+            else:
+                percentage = 0
             progress_str = '%d / %d (%.2f%%)\r'\
                            % (self._seen_so_far, self._size, percentage)
 

From f77371b9649ecc2aba5581c893677a4302f2a5a5 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Fri, 30 Oct 2015 16:19:00 -0400
Subject: [PATCH 21/45] Added upper/lower case support for S3 prefix

---
 nipype/interfaces/io.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 85f53b1cd1..89d1bf9bb0 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -393,6 +393,12 @@ def _check_s3_base_dir(self):
         sep = os.path.sep
         base_directory = self.inputs.base_directory
 
+        # Explicitly lower-case the "s3"
+        if base_directory.lower().startswith(s3_str):
+            base_dir_sp = base_directory.split('/')
+            base_dir_sp[0] = base_dir_sp[0].lower()
+            base_directory = '/'.join(base_dir_sp)
+
         # Check if 's3://' in base dir
         if base_directory.startswith(s3_str):
             try:
@@ -559,6 +565,12 @@ def _upload_to_s3(self, src, dst):
         s3_str = 's3://'
         s3_prefix = os.path.join(s3_str, bucket.name)
 
+        # Explicitly lower-case the "s3"
+        if dst.lower().startswith(s3_str):
+            dst_sp = dst.split('/')
+            dst_sp[0] = dst_sp[0].lower()
+            dst = '/'.join(dst_sp)
+
         # If src is a directory, collect files (this assumes dst is a dir too)
         if os.path.isdir(src):
             src_files = []

From e2f51f61edf26b88907eea9f46af3f9bf8420235 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Tue, 3 Nov 2015 16:59:28 -0500
Subject: [PATCH 22/45] Added support for both non-root and root AWS creds in
 DataSink

---
 nipype/interfaces/io.py | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 89d1bf9bb0..f8b4f16f60 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -144,7 +144,6 @@ def __init__(self, filename):
 
         # Import packages
         import threading
-        import os
 
         # Initialize data attributes
         self._filename = filename
@@ -384,10 +383,6 @@ def _check_s3_base_dir(self):
             S3 bucket path
         '''
 
-        # Import packages
-        import os
-        import sys
-
         # Init variables
         s3_str = 's3://'
         sep = os.path.sep
@@ -428,9 +423,8 @@ def _return_aws_keys(self, creds_path):
         Parameters
         ----------
         creds_path : string (filepath)
-            path to the csv file with 'AWSAccessKeyId=' followed by access
-            key in the first row and 'AWSSecretAccessKey=' followed by
-            secret access key in the second row
+            path to the csv file downloaded from AWS; can either be root
+            or user credentials
 
         Returns
         -------
@@ -440,19 +434,28 @@ def _return_aws_keys(self, creds_path):
             string of the AWS secret access key
         '''
 
-        # Import packages
-        import csv
-
         # Init variables
-        csv_reader = csv.reader(open(creds_path, 'r'))
-
-        # Grab csv rows
-        row1 = csv_reader.next()[0]
-        row2 = csv_reader.next()[0]
+        with open(creds_path, 'r') as creds_in:
+            # Grab csv rows
+            row1 = creds_in.readline()
+            row2 = creds_in.readline()
+
+        # Are they root or user keys
+        if 'User Name' in row1:
+            # And split out for keys
+            aws_access_key_id = row2.split(',')[1]
+            aws_secret_access_key = row2.split(',')[2]
+        elif 'AWSAccessKeyId' in row1:
+            # And split out for keys
+            aws_access_key_id = row1.split('=')[1]
+            aws_secret_access_key = row2.split('=')[1]
+        else:
+            err_msg = 'Credentials file not recognized, check file is correct'
+            raise Exception(err_msg)
 
-        # And split out for keys
-        aws_access_key_id = row1.split('=')[1]
-        aws_secret_access_key = row2.split('=')[1]
+        # Strip any carriage return/line feeds
+        aws_access_key_id = aws_access_key_id.replace('\r', '').replace('\n', '')
+        aws_secret_access_key = aws_secret_access_key.replace('\r', '').replace('\n', '')
 
         # Return keys
         return aws_access_key_id, aws_secret_access_key

From 350fd4a96300c87796183cd7c4bcccd72cc8cfe3 Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Wed, 25 Nov 2015 09:40:53 -0500
Subject: [PATCH 23/45] add attribute real_memory to interface, change attr
 memory to estimated_memory

---
 nipype/interfaces/base.py                       |  3 ++-
 nipype/pipeline/plugins/callback_log.py         |  6 +++---
 nipype/pipeline/plugins/multiproc.py            | 10 +++++-----
 nipype/pipeline/plugins/tests/test_multiproc.py | 10 +++++-----
 nipype/utils/draw_gantt_chart.py                |  2 +-
 5 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py
index 854fb44fe1..694f858b11 100644
--- a/nipype/interfaces/base.py
+++ b/nipype/interfaces/base.py
@@ -750,7 +750,8 @@ def __init__(self, **inputs):
             raise Exception('No input_spec in class: %s' %
                             self.__class__.__name__)
         self.inputs = self.input_spec(**inputs)
-        self.memory = 1
+        self.estimated_memory = 1
+        self.real_memory = 0
         self.num_threads = 1
 
     @classmethod
diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index 34952864b7..48a4f28637 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -7,7 +7,7 @@ def log_nodes_cb(node, status):
     if status == 'start':
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\
         node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\
-        '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
+        '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
         + str(node._interface.num_threads) +  '}'
 
         logger.debug(message)
@@ -15,7 +15,7 @@ def log_nodes_cb(node, status):
     elif status == 'end':
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
-        '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
+        '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
         + str(node._interface.num_threads) +  '}'
 
         logger.debug(message)
@@ -23,7 +23,7 @@ def log_nodes_cb(node, status):
     else:
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
-        '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
+        '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
         + str(node._interface.num_threads) + ',"error":"True"}'
 
         logger.debug(message)
\ No newline at end of file
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 8d66be6999..a8a99325a9 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -177,7 +177,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
         busy_memory = 0
         busy_processors = 0
         for jobid in jobids:
-            busy_memory+= self.procs[jobid]._interface.memory
+            busy_memory+= self.procs[jobid]._interface.estimated_memory
             busy_processors+= self.procs[jobid]._interface.num_threads
                 
         free_memory = self.memory - busy_memory
@@ -190,7 +190,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
 
         #sort jobs ready to run first by memory and then by number of threads
         #The most resource consuming jobs run first
-        jobids = sorted(jobids, key=lambda item: (self.procs[item]._interface.memory, self.procs[item]._interface.num_threads))
+        jobids = sorted(jobids, key=lambda item: (self.procs[item]._interface.estimated_memory, self.procs[item]._interface.num_threads))
 
         logger.debug('Free memory: %d, Free processors: %d', free_memory, free_processors)
 
@@ -198,9 +198,9 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
         #while have enough memory and processors for first job
         #submit first job on the list
         for jobid in jobids:
-            logger.debug('Next Job: %d, memory: %d, threads: %d' %(jobid, self.procs[jobid]._interface.memory, self.procs[jobid]._interface.num_threads))
+            logger.debug('Next Job: %d, memory: %d, threads: %d' %(jobid, self.procs[jobid]._interface.estimated_memory, self.procs[jobid]._interface.num_threads))
 
-            if self.procs[jobid]._interface.memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors:
+            if self.procs[jobid]._interface.estimated_memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors:
                 logger.info('Executing: %s ID: %d' %(self.procs[jobid]._id, jobid))
                 executing_now.append(self.procs[jobid])
                 
@@ -220,7 +220,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                 self.proc_done[jobid] = True
                 self.proc_pending[jobid] = True
 
-                free_memory -= self.procs[jobid]._interface.memory
+                free_memory -= self.procs[jobid]._interface.estimated_memory
                 free_processors -= self.procs[jobid]._interface.num_threads
 
                 # Send job to task manager and add to pending tasks
diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py
index 5e841b78a3..d2f281eadd 100644
--- a/nipype/pipeline/plugins/tests/test_multiproc.py
+++ b/nipype/pipeline/plugins/tests/test_multiproc.py
@@ -103,7 +103,7 @@ def find_metrics(nodes, last_node):
             node_finish = parse(nodes[j]['finish'])
 
             if node_start < x and node_finish > x:
-                total_memory[i] += nodes[j]['memory']
+                total_memory[i] += nodes[j]['estimated_memory']
                 total_threads[i] += nodes[j]['num_threads']
                 start_index = j
 
@@ -140,10 +140,10 @@ def test_do_not_use_more_memory_then_specified():
     n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3')
     n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4')
 
-    n1.interface.memory = 1
-    n2.interface.memory = 1
-    n3.interface.memory = 10
-    n4.interface.memory = 1
+    n1.interface.estimated_memory = 1
+    n2.interface.estimated_memory = 1
+    n3.interface.estimated_memory = 10
+    n4.interface.estimated_memory = 1
 
     pipe.connect(n1, 'output1', n2, 'input1')
     pipe.connect(n1, 'output1', n3, 'input1')
diff --git a/nipype/utils/draw_gantt_chart.py b/nipype/utils/draw_gantt_chart.py
index 5adff16c3d..84bbc033a0 100644
--- a/nipype/utils/draw_gantt_chart.py
+++ b/nipype/utils/draw_gantt_chart.py
@@ -133,7 +133,7 @@ def draw_memory_bar(start, total_duration, nodes, space_between_minutes, minute_
             node_finish = parser.parse(nodes[j]['finish'])
 
             if node_start <= now and node_finish >= now:
-                memory[i] += nodes[j]['memory']
+                memory[i] += nodes[j]['estimated_memory']
             if node_start > now:
                 break
         now += datetime.timedelta(minutes=1)

From f74fe25835c49749c85c8834d9249220646fea2e Mon Sep 17 00:00:00 2001
From: Cameron Craddock <cameron.craddock@gmail.com>
Date: Wed, 25 Nov 2015 14:55:32 +0000
Subject: [PATCH 24/45] Added real memory recording to plugn

---
 nipype/pipeline/plugins/callback_log.py |  8 ++++----
 nipype/pipeline/plugins/multiproc.py    | 19 +++++++++++++++----
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index 48a4f28637..951a6f8291 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -7,8 +7,8 @@ def log_nodes_cb(node, status):
     if status == 'start':
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\
         node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\
-        '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
-        + str(node._interface.num_threads) +  '}'
+        '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
+        + str(node._interface.num_threads) + '}'
 
         logger.debug(message)
 
@@ -16,7 +16,7 @@ def log_nodes_cb(node, status):
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
         '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
-        + str(node._interface.num_threads) +  '}'
+        + str(node._interface.num_threads) + ',"real memory":' str(node._interface.real_memory) + '}'
 
         logger.debug(message)
 
@@ -26,4 +26,4 @@ def log_nodes_cb(node, status):
         '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
         + str(node._interface.num_threads) + ',"error":"True"}'
 
-        logger.debug(message)
\ No newline at end of file
+        logger.debug(message)
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index a8a99325a9..b9fc5c9d20 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -13,8 +13,18 @@
 from .base import (DistributedPluginBase, report_crash)
 
 
-def run_node(node, updatehash):
+def run_node(node, updatehash, plugin_args=None):
     result = dict(result=None, traceback=None)
+    try:
+        run_memory = plugin_args['memory_profile']
+    except Exception:
+        run_memory = False
+    if run_memory:
+        import memory_profiler
+        proc = (node.run(), (), {'updatehash' : updatehash})
+        mem_mb, retval = memory_profiler.memory_usage(proc, max_usage=True, retval=True)
+        result['result'] = retval
+        node._interface.real_memory = mem_mb[0]/1024.0
     try:
         result['result'] = node.run(updatehash=updatehash)
     except:
@@ -160,8 +170,9 @@ def _submit_job(self, node, updatehash=False):
                 node.inputs.terminal_output = 'allatonce'
         except:
             pass
-        self._taskresult[self._taskid] = self.pool.apply_async(run_node, (node,
-                                                                updatehash,), callback=release_lock)
+        self._taskresult[self._taskid] = self.pool.apply_async(run_node,
+                                                               (node, updatehash, self.plugin_args),
+                                                               callback=release_lock)
         return self._taskid
 
     def _send_procs_to_workers(self, updatehash=False, graph=None):
@@ -263,4 +274,4 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
             else:
                 break
 
-        logger.debug('No jobs waiting to execute')
\ No newline at end of file
+        logger.debug('No jobs waiting to execute')

From 1e66b864285513c3785b67e209476f6a9663d1ab Mon Sep 17 00:00:00 2001
From: Cameron Craddock <cameron.craddock@gmail.com>
Date: Wed, 25 Nov 2015 19:01:22 +0000
Subject: [PATCH 25/45] Added initial code for getting used memory of node

---
 nipype/interfaces/base.py               | 10 +++++++++-
 nipype/pipeline/plugins/base.py         |  5 ++++-
 nipype/pipeline/plugins/callback_log.py |  4 ++--
 nipype/pipeline/plugins/multiproc.py    | 23 ++++++++++++++---------
 4 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py
index 694f858b11..a0b1110098 100644
--- a/nipype/interfaces/base.py
+++ b/nipype/interfaces/base.py
@@ -751,9 +751,17 @@ def __init__(self, **inputs):
                             self.__class__.__name__)
         self.inputs = self.input_spec(**inputs)
         self.estimated_memory = 1
-        self.real_memory = 0
+        self._real_memory = 0
         self.num_threads = 1
 
+    @property
+    def real_memory(self):
+        return self._real_memory
+
+    @real_memory.setter
+    def real_memory(self, value):
+        self._real_memory = value
+
     @classmethod
     def help(cls, returnhelp=False):
         """ Prints class help
diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py
index cee2c7dad5..bda811354d 100644
--- a/nipype/pipeline/plugins/base.py
+++ b/nipype/pipeline/plugins/base.py
@@ -416,7 +416,10 @@ def _task_finished_cb(self, jobid):
         logger.info('[Job finished] jobname: %s jobid: %d' %
                     (self.procs[jobid]._id, jobid))
         if self._status_callback:
-            self._status_callback(self.procs[jobid], 'end')
+            print '!!!!!!!!!!!!!!!!!!!'
+            print self._taskresult
+            print self._taskresult.keys()
+            self._status_callback(self.procs[jobid], 'end', self._taskresult[self.taskresultid])
         # Update job and worker queues
         self.proc_pending[jobid] = False
         # update the job dependency structure
diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index 951a6f8291..6fad0eee44 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -1,7 +1,7 @@
 import datetime
 import logging
 
-def log_nodes_cb(node, status):
+def log_nodes_cb(node, status, result=None):
     print 'status', status
     logger = logging.getLogger('callback')
     if status == 'start':
@@ -16,7 +16,7 @@ def log_nodes_cb(node, status):
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
         '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
-        + str(node._interface.num_threads) + ',"real memory":' str(node._interface.real_memory) + '}'
+        + str(node._interface.num_threads) + ',"real memory":' + str(result['real_memory']) + '}'
 
         logger.debug(message)
 
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index b9fc5c9d20..5091d25b27 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -21,16 +21,20 @@ def run_node(node, updatehash, plugin_args=None):
         run_memory = False
     if run_memory:
         import memory_profiler
-        proc = (node.run(), (), {'updatehash' : updatehash})
+        proc = (node.run, (), {'updatehash' : updatehash})
         mem_mb, retval = memory_profiler.memory_usage(proc, max_usage=True, retval=True)
         result['result'] = retval
-        node._interface.real_memory = mem_mb[0]/1024.0
-    try:
-        result['result'] = node.run(updatehash=updatehash)
-    except:
-        etype, eval, etr = sys.exc_info()
-        result['traceback'] = format_exception(etype,eval,etr)
-        result['result'] = node.result
+        result['real_memory'] = 100
+        print 'Just populated task result!!!!!!!!!!!!!!!!!!!'
+        print result
+        #node._interface.real_memory = mem_mb[0]/1024.0
+    else:
+        try:
+            result['result'] = node.run(updatehash=updatehash)
+        except:
+            etype, eval, etr = sys.exc_info()
+            result['traceback'] = format_exception(etype,eval,etr)
+            result['result'] = node.result
     return result
 
 
@@ -173,6 +177,8 @@ def _submit_job(self, node, updatehash=False):
         self._taskresult[self._taskid] = self.pool.apply_async(run_node,
                                                                (node, updatehash, self.plugin_args),
                                                                callback=release_lock)
+        print 'Printing on output!!!!!!!!!!'
+        print self._taskresult, self._taskid
         return self._taskid
 
     def _send_procs_to_workers(self, updatehash=False, graph=None):
@@ -237,7 +243,6 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                 # Send job to task manager and add to pending tasks
                 if self._status_callback:
                     self._status_callback(self.procs[jobid], 'start')
-                
                 if str2bool(self.procs[jobid].config['execution']['local_hash_check']):
                     logger.debug('checking hash locally')
                     try:

From 716f92336add029661abc29b8eb27ce0cc494930 Mon Sep 17 00:00:00 2001
From: Cameron Craddock <cameron.craddock@gmail.com>
Date: Wed, 2 Dec 2015 03:12:07 +0000
Subject: [PATCH 26/45] Fixed logging of real memory

---
 nipype/interfaces/base.py               | 38 +++++++++++++++++++++++--
 nipype/interfaces/utility.py            |  7 ++++-
 nipype/pipeline/plugins/base.py         | 20 +++++++++----
 nipype/pipeline/plugins/callback_log.py |  8 ++++--
 nipype/pipeline/plugins/multiproc.py    | 10 ++-----
 5 files changed, 65 insertions(+), 18 deletions(-)

diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py
index a0b1110098..2112cdc739 100644
--- a/nipype/interfaces/base.py
+++ b/nipype/interfaces/base.py
@@ -1206,9 +1206,18 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False):
 
     The returned runtime contains a merged stdout+stderr log with timestamps
     """
-    PIPE = subprocess.PIPE
 
+    # Import packages
+    try:
+        from memory_profiler import _get_memory
+        mem_prof = True
+    except:
+        mem_prof = False
+
+    # Init variables
+    PIPE = subprocess.PIPE
     cmdline = runtime.cmdline
+
     if redirect_x:
         exist_xvfb, _ = _exists_in_path('xvfb-run', runtime.environ)
         if not exist_xvfb:
@@ -1237,6 +1246,11 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False):
     result = {}
     errfile = os.path.join(runtime.cwd, 'stderr.nipype')
     outfile = os.path.join(runtime.cwd, 'stdout.nipype')
+
+    # Init variables for memory profiling
+    ret = -1
+    interval = 0.1
+
     if output == 'stream':
         streams = [Stream('stdout', proc.stdout), Stream('stderr', proc.stderr)]
 
@@ -1252,8 +1266,10 @@ def _process(drain=0):
             else:
                 for stream in res[0]:
                     stream.read(drain)
-
         while proc.returncode is None:
+            if mem_prof:
+                ret = max([ret, _get_memory(proc.pid, include_children=True)])
+                time.sleep(interval)
             proc.poll()
             _process()
         _process(drain=1)
@@ -1267,12 +1283,23 @@ def _process(drain=0):
             result[stream._name] = [r[2] for r in rows]
         temp.sort()
         result['merged'] = [r[1] for r in temp]
+
     if output == 'allatonce':
+        if mem_prof:
+            while proc.returncode is None:
+                ret = max([ret, _get_memory(proc.pid, include_children=True)])
+                time.sleep(interval)
+                proc.poll()
         stdout, stderr = proc.communicate()
         result['stdout'] = stdout.split('\n')
         result['stderr'] = stderr.split('\n')
         result['merged'] = ''
     if output == 'file':
+        if mem_prof:
+            while proc.returncode is None:
+                ret = max([ret, _get_memory(proc.pid, include_children=True)])
+                time.sleep(interval)
+                proc.poll()
         ret_code = proc.wait()
         stderr.flush()
         stdout.flush()
@@ -1280,10 +1307,17 @@ def _process(drain=0):
         result['stderr'] = [line.strip() for line in open(errfile).readlines()]
         result['merged'] = ''
     if output == 'none':
+        if mem_prof:
+            while proc.returncode is None:
+                ret = max([ret, _get_memory(proc.pid, include_children=True)])
+                time.sleep(interval)
+                proc.poll()
         proc.communicate()
         result['stdout'] = []
         result['stderr'] = []
         result['merged'] = ''
+
+    setattr(runtime, 'real_memory2', ret/1024.0)
     runtime.stderr = '\n'.join(result['stderr'])
     runtime.stdout = '\n'.join(result['stdout'])
     runtime.merged = result['merged']
diff --git a/nipype/interfaces/utility.py b/nipype/interfaces/utility.py
index ca2bb5ba69..10effaa548 100644
--- a/nipype/interfaces/utility.py
+++ b/nipype/interfaces/utility.py
@@ -442,7 +442,12 @@ def _run_interface(self, runtime):
             if isdefined(value):
                 args[name] = value
 
-        out = function_handle(**args)
+        # mem stuff
+        import memory_profiler
+        proc = (function_handle, (), args)
+        mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True)
+        setattr(runtime, 'real_memory2', mem_mb[0]/1024.0)
+        #out = function_handle(**args)
 
         if len(self._output_names) == 1:
             self._out[self._output_names[0]] = out
diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py
index bda811354d..2299bf4b23 100644
--- a/nipype/pipeline/plugins/base.py
+++ b/nipype/pipeline/plugins/base.py
@@ -241,7 +241,8 @@ def run(self, graph, config, updatehash=False):
                             notrun.append(self._clean_queue(jobid, graph,
                                                             result=result))
                         else:
-                            self._task_finished_cb(jobid)
+                            print "DJC: Calling task finished for %s cb from DistributedPluginBase.run"%(str(taskid))
+                            self._task_finished_cb(jobid, result)
                             self._remove_node_dirs()
                         self._clear_task(taskid)
                     else:
@@ -379,6 +380,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                                  )
                                ):
                                 continue_with_submission = False
+                                print "DJC: Calling task finised cb from DistributedPluginBase._send_procs_to_workers hash==true"
                                 self._task_finished_cb(jobid)
                                 self._remove_node_dirs()
                         except Exception:
@@ -395,6 +397,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                                 self.procs[jobid].run()
                             except Exception:
                                 self._clean_queue(jobid, graph)
+                            print "DJC: Calling task finised cb from DistributedPluginBase._send_procs_to_workers continue_with_submission==true"
                             self._task_finished_cb(jobid)
                             self._remove_node_dirs()
                         else:
@@ -408,7 +411,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
             else:
                 break
 
-    def _task_finished_cb(self, jobid):
+    def _task_finished_cb(self, jobid, result=None):
         """ Extract outputs and assign to inputs of dependent tasks
 
         This is called when a job is completed.
@@ -416,10 +419,15 @@ def _task_finished_cb(self, jobid):
         logger.info('[Job finished] jobname: %s jobid: %d' %
                     (self.procs[jobid]._id, jobid))
         if self._status_callback:
-            print '!!!!!!!!!!!!!!!!!!!'
-            print self._taskresult
-            print self._taskresult.keys()
-            self._status_callback(self.procs[jobid], 'end', self._taskresult[self.taskresultid])
+            if result == None:
+                if self._taskresult.has_key(jobid):
+                    result = self._taskresult[jobid].get()
+                    print 'MMMM'
+                    print result['real_memory'], result['real_memory2']
+                else:
+                    print "DJC: %s not found, taskresult keys are: %s"%(str(jobid),":".join([str(k) for k in self._taskresult.keys()]))
+                    result = {'real_memory' : 'nokey'}
+            self._status_callback(self.procs[jobid], 'end', result)
         # Update job and worker queues
         self.proc_pending[jobid] = False
         # update the job dependency structure
diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index 6fad0eee44..d6795048df 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -2,8 +2,12 @@
 import logging
 
 def log_nodes_cb(node, status, result=None):
-    print 'status', status
     logger = logging.getLogger('callback')
+    try:
+        real_mem1 = result['real_memory']
+        real_mem2 = result['result'].runtime.get('real_memory2')
+    except Exception as exc:
+        real_mem1 = real_mem2 = 'N/A'
     if status == 'start':
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\
         node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\
@@ -16,7 +20,7 @@ def log_nodes_cb(node, status, result=None):
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
         '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
-        + str(node._interface.num_threads) + ',"real memory":' + str(result['real_memory']) + '}'
+        + str(node._interface.num_threads) + ',"real memory1":' + str(real_mem1) + ',"real memory2":' + str(real_mem2) + '}'
 
         logger.debug(message)
 
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 5091d25b27..c4b6be1af9 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -22,12 +22,10 @@ def run_node(node, updatehash, plugin_args=None):
     if run_memory:
         import memory_profiler
         proc = (node.run, (), {'updatehash' : updatehash})
-        mem_mb, retval = memory_profiler.memory_usage(proc, max_usage=True, retval=True)
+        mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True)
         result['result'] = retval
-        result['real_memory'] = 100
-        print 'Just populated task result!!!!!!!!!!!!!!!!!!!'
-        print result
-        #node._interface.real_memory = mem_mb[0]/1024.0
+        result['real_memory'] = mem_mb[0]/1024.0
+        result['real_memory2'] = retval.runtime.get('real_memory2')
     else:
         try:
             result['result'] = node.run(updatehash=updatehash)
@@ -177,8 +175,6 @@ def _submit_job(self, node, updatehash=False):
         self._taskresult[self._taskid] = self.pool.apply_async(run_node,
                                                                (node, updatehash, self.plugin_args),
                                                                callback=release_lock)
-        print 'Printing on output!!!!!!!!!!'
-        print self._taskresult, self._taskid
         return self._taskid
 
     def _send_procs_to_workers(self, updatehash=False, graph=None):

From ff7959ac4af9766da324635bca44f5b9aa21f227 Mon Sep 17 00:00:00 2001
From: Cameron Craddock <cameron.craddock@gmail.com>
Date: Wed, 2 Dec 2015 03:51:37 +0000
Subject: [PATCH 27/45] Added per node runtime logging

---
 nipype/pipeline/plugins/callback_log.py | 7 ++++---
 nipype/pipeline/plugins/multiproc.py    | 4 ++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index d6795048df..a20242df95 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -5,9 +5,10 @@ def log_nodes_cb(node, status, result=None):
     logger = logging.getLogger('callback')
     try:
         real_mem1 = result['real_memory']
-        real_mem2 = result['result'].runtime.get('real_memory2')
+        real_mem2 = result['real_memory2']
+        run_seconds = result['run_seconds']
     except Exception as exc:
-        real_mem1 = real_mem2 = 'N/A'
+        real_mem1 = real_mem2 = run_seconds = 'N/A'
     if status == 'start':
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\
         node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\
@@ -20,7 +21,7 @@ def log_nodes_cb(node, status, result=None):
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
         '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
-        + str(node._interface.num_threads) + ',"real memory1":' + str(real_mem1) + ',"real memory2":' + str(real_mem2) + '}'
+        + str(node._interface.num_threads) + ',"real_memory1":' + str(real_mem1) + ',"real_memory2":' + str(real_mem2) + ',"run_seconds":' + str(run_seconds) + '}'
 
         logger.debug(message)
 
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index c4b6be1af9..3a5c63df35 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -21,11 +21,15 @@ def run_node(node, updatehash, plugin_args=None):
         run_memory = False
     if run_memory:
         import memory_profiler
+        import datetime
         proc = (node.run, (), {'updatehash' : updatehash})
+        start = datetime.datetime.now()
         mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True)
+        runtime = (datetime.datetime.now() - start).total_seconds()
         result['result'] = retval
         result['real_memory'] = mem_mb[0]/1024.0
         result['real_memory2'] = retval.runtime.get('real_memory2')
+        result['run_seconds'] = runtime
     else:
         try:
             result['result'] = node.run(updatehash=updatehash)

From d25afb5b4a6668dcb6ddf06eda9a5f6702dbd598 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Thu, 10 Dec 2015 10:10:52 -0500
Subject: [PATCH 28/45] Removed debugging print statements

---
 nipype/pipeline/plugins/base.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py
index 2299bf4b23..ab76520844 100644
--- a/nipype/pipeline/plugins/base.py
+++ b/nipype/pipeline/plugins/base.py
@@ -241,7 +241,6 @@ def run(self, graph, config, updatehash=False):
                             notrun.append(self._clean_queue(jobid, graph,
                                                             result=result))
                         else:
-                            print "DJC: Calling task finished for %s cb from DistributedPluginBase.run"%(str(taskid))
                             self._task_finished_cb(jobid, result)
                             self._remove_node_dirs()
                         self._clear_task(taskid)
@@ -380,7 +379,6 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                                  )
                                ):
                                 continue_with_submission = False
-                                print "DJC: Calling task finised cb from DistributedPluginBase._send_procs_to_workers hash==true"
                                 self._task_finished_cb(jobid)
                                 self._remove_node_dirs()
                         except Exception:
@@ -397,7 +395,6 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                                 self.procs[jobid].run()
                             except Exception:
                                 self._clean_queue(jobid, graph)
-                            print "DJC: Calling task finised cb from DistributedPluginBase._send_procs_to_workers continue_with_submission==true"
                             self._task_finished_cb(jobid)
                             self._remove_node_dirs()
                         else:
@@ -422,10 +419,7 @@ def _task_finished_cb(self, jobid, result=None):
             if result == None:
                 if self._taskresult.has_key(jobid):
                     result = self._taskresult[jobid].get()
-                    print 'MMMM'
-                    print result['real_memory'], result['real_memory2']
                 else:
-                    print "DJC: %s not found, taskresult keys are: %s"%(str(jobid),":".join([str(k) for k in self._taskresult.keys()]))
                     result = {'real_memory' : 'nokey'}
             self._status_callback(self.procs[jobid], 'end', result)
         # Update job and worker queues

From 00a470bc845c6410bd885e7d77326ab42983c995 Mon Sep 17 00:00:00 2001
From: carolFrohlich <carol.frohlich@gmail.com>
Date: Wed, 30 Dec 2015 13:05:57 -0500
Subject: [PATCH 29/45] sync with master

---
 nipype/interfaces/fsl/model.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/nipype/interfaces/fsl/model.py b/nipype/interfaces/fsl/model.py
index 7cebdbb629..369ecb28f3 100644
--- a/nipype/interfaces/fsl/model.py
+++ b/nipype/interfaces/fsl/model.py
@@ -250,13 +250,22 @@ def _create_ev_files(
                                                               element=count,
                                                               ctype=ctype, val=val)
                         ev_txt += "\n"
-                    if con[0] in con_map.keys():
-                        for fconidx in con_map[con[0]]:
-                            ev_txt += contrast_ftest_element.substitute(
-                                cnum=ftest_idx.index(fconidx) + 1,
-                                element=tidx,
-                                ctype=ctype,
-                                val=1)
+                    # if con[0] in con_map.keys():
+                    #     for fconidx in con_map[con[0]]:
+                    #         ev_txt += contrast_ftest_element.substitute(
+                    #             cnum=ftest_idx.index(fconidx) + 1,
+                    #             element=tidx,
+                    #             ctype=ctype,
+                    #             val=1)
+                    for fconidx in ftest_idx:
+                        fval=0
+                        if con[0] in con_map.keys() and fconidx in con_map[con[0]]:
+                            fval=1
+                        ev_txt += contrast_ftest_element.substitute(
+                            cnum=ftest_idx.index(fconidx) + 1,
+                            element=tidx,
+                            ctype=ctype,
+                            val=fval)
                         ev_txt += "\n"
 
             # add contrast mask info

From 89d7e9c418b6fa9e684508127fb07b00bce4dc30 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Thu, 7 Jan 2016 18:27:01 -0500
Subject: [PATCH 30/45] Added fakes3 integration with datasink and started
 adding a local_copy flag to the output generation logic

---
 nipype/interfaces/io.py            |  98 +++++++++++++++++----------
 nipype/interfaces/tests/test_io.py | 104 +++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+), 36 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index f8b4f16f60..f944114b8d 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -205,6 +205,11 @@ class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
                               'access')
     encrypt_bucket_keys = traits.Bool(desc='Flag indicating whether to use S3 '\
                                         'server-side AES-256 encryption')
+    # Set this if user wishes to override the bucket with their own
+    bucket = traits.Generic(mandatory=False,
+                            desc='Boto3 S3 bucket for manual override of bucket')
+    # Set this if user wishes to have local copy of files as well
+    local_dir = traits.Str(desc='Copy files locally as well as to S3 bucket')
 
     # Set call-able inputs attributes
     def __setattr__(self, key, value):
@@ -385,7 +390,6 @@ def _check_s3_base_dir(self):
 
         # Init variables
         s3_str = 's3://'
-        sep = os.path.sep
         base_directory = self.inputs.base_directory
 
         # Explicitly lower-case the "s3"
@@ -396,11 +400,16 @@ def _check_s3_base_dir(self):
 
         # Check if 's3://' in base dir
         if base_directory.startswith(s3_str):
+            # Attempt to access bucket
             try:
                 # Expects bucket name to be 's3://bucket_name/base_dir/..'
-                bucket_name = base_directory.split(s3_str)[1].split(sep)[0]
+                bucket_name = base_directory.split(s3_str)[1].split('/')[0]
                 # Get the actual bucket object
-                self.bucket = self._fetch_bucket(bucket_name)
+                if self.inputs.bucket:
+                    self.bucket = self.inputs.bucket
+                else:
+                    self.bucket = self._fetch_bucket(bucket_name)
+            # Report error in case of exception
             except Exception as exc:
                 err_msg = 'Unable to access S3 bucket. Error:\n%s. Exiting...'\
                           % exc
@@ -566,7 +575,7 @@ def _upload_to_s3(self, src, dst):
         bucket = self.bucket
         iflogger = logging.getLogger('interface')
         s3_str = 's3://'
-        s3_prefix = os.path.join(s3_str, bucket.name)
+        s3_prefix = s3_str + bucket.name
 
         # Explicitly lower-case the "s3"
         if dst.lower().startswith(s3_str):
@@ -629,41 +638,53 @@ def _list_outputs(self):
         iflogger = logging.getLogger('interface')
         outputs = self.output_spec().get()
         out_files = []
-        outdir = self.inputs.base_directory
+        # Use hardlink
         use_hardlink = str2bool(config.get('execution', 'try_hard_link_datasink'))
 
-        # If base directory isn't given, assume current directory
-        if not isdefined(outdir):
-            outdir = '.'
+        # Set local output directory if specified
+        if isdefined(self.inputs.local_copy):
+            outdir = self.inputs.local_copy
+        else:
+            outdir = self.inputs.base_directory
+            # If base directory isn't given, assume current directory
+            if not isdefined(outdir):
+                outdir = '.'
 
-        # Check if base directory reflects S3-bucket upload
+        # Check if base directory reflects S3 bucket upload
         try:
             s3_flag = self._check_s3_base_dir()
+            s3dir = self.inputs.base_directory
+            if isdefined(self.inputs.container):
+                s3dir = os.path.join(s3dir, self.inputs.container)
         # If encountering an exception during bucket access, set output
         # base directory to a local folder
         except Exception as exc:
-            local_out_exception = os.path.join(os.path.expanduser('~'),
-                                               'data_output')
+            if not isdefined(self.inputs.local_copy):
+                local_out_exception = os.path.join(os.path.expanduser('~'),
+                                                   's3_datasink_' + self.bucket.name)
+                outdir = local_out_exception
+            else:
+                outdir = self.inputs.local_copy
+            # Log local copying directory
             iflogger.info('Access to S3 failed! Storing outputs locally at: '\
-                          '%s\nError: %s' %(local_out_exception, exc))
-            self.inputs.base_directory = local_out_exception
-
-        # If not accessing S3, just set outdir to local absolute path 
-        if not s3_flag:
-            outdir = os.path.abspath(outdir)
+                          '%s\nError: %s' %(outdir, exc))
 
         # If container input is given, append that to outdir
         if isdefined(self.inputs.container):
             outdir = os.path.join(outdir, self.inputs.container)
-        # Create the directory if it doesn't exist
-        if not os.path.exists(outdir):
-            try:
-                os.makedirs(outdir)
-            except OSError, inst:
-                if 'File exists' in inst:
-                    pass
-                else:
-                    raise(inst)
+
+        # If doing a localy output
+        if not outdir.lower().startswith('s3://'):
+            outdir = os.path.abspath(outdir)
+            # Create the directory if it doesn't exist
+            if not os.path.exists(outdir):
+                try:
+                    os.makedirs(outdir)
+                except OSError, inst:
+                    if 'File exists' in inst:
+                        pass
+                    else:
+                        raise(inst)
 
         # Iterate through outputs attributes {key : path(s)}
         for key, files in self.inputs._outputs.items():
@@ -672,10 +693,14 @@ def _list_outputs(self):
             iflogger.debug("key: %s files: %s" % (key, str(files)))
             files = filename_to_list(files)
             tempoutdir = outdir
+            if s3_flag:
+                s3tempoutdir = s3dir
             for d in key.split('.'):
                 if d[0] == '@':
                     continue
                 tempoutdir = os.path.join(tempoutdir, d)
+                if s3_flag:
+                    s3tempoutdir = os.path.join(s3tempoutdir, d)
 
             # flattening list
             if isinstance(files, list):
@@ -690,25 +715,26 @@ def _list_outputs(self):
                     src = os.path.join(src, '')
                 dst = self._get_dst(src)
                 dst = os.path.join(tempoutdir, dst)
+                s3dst = os.path.join(s3tempoutdir, dst)
                 dst = self._substitute(dst)
                 path, _ = os.path.split(dst)
 
-                # Create output directory if it doesnt exist
-                if not os.path.exists(path):
-                    try:
-                        os.makedirs(path)
-                    except OSError, inst:
-                        if 'File exists' in inst:
-                            pass
-                        else:
-                            raise(inst)
-
                 # If we're uploading to S3
                 if s3_flag:
+                    dst = dst.replace(outdir, self.inputs.base_directory)
                     self._upload_to_s3(src, dst)
                     out_files.append(dst)
                 # Otherwise, copy locally src -> dst
                 else:
+                    # Create output directory if it doesnt exist
+                    if not os.path.exists(path):
+                        try:
+                            os.makedirs(path)
+                        except OSError, inst:
+                            if 'File exists' in inst:
+                                pass
+                            else:
+                                raise(inst)
                     # If src is a file, copy it to dst
                     if os.path.isfile(src):
                         iflogger.debug('copyfile: %s %s' % (src, dst))
diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py
index 8c19ea0503..25ac843d0e 100644
--- a/nipype/interfaces/tests/test_io.py
+++ b/nipype/interfaces/tests/test_io.py
@@ -13,6 +13,7 @@
 import nipype.interfaces.io as nio
 from nipype.interfaces.base import Undefined
 
+# Check for boto
 noboto = False
 try:
     import boto
@@ -20,6 +21,13 @@
 except:
     noboto = True
 
+# Check for boto3
+noboto3 = False
+try:
+    import boto3
+    from botocore.utils import fix_s3_host
+except:
+    noboto3 = True
 
 def test_datagrabber():
     dg = nio.DataGrabber()
@@ -155,6 +163,102 @@ def test_datasink():
     ds = nio.DataSink(infields=['test'])
     yield assert_true, 'test' in ds.inputs.copyable_trait_names()
 
+# Function to check for fakes3
+def _check_for_fakes3():
+    '''
+    Function used internally to check for fakes3 installation
+    '''
+
+    # Import packages
+    import subprocess
+
+    # Init variables
+    fakes3_found = False
+
+    # Check for fakes3
+    try:
+        ret_code = subprocess.check_call(['which', 'fakes3'])
+        if ret_code == 0:
+            fakes3_found = True
+    except subprocess.CalledProcessError as exc:
+        print 'fakes3 not found, install via \'gem install fakes3\', skipping test...'
+    except:
+        print 'Unable to check for fakes3 installation, skipping test...'
+
+    # Return if found
+    return fakes3_found
+
+@skipif(noboto3)
+# Test datasink writes to s3 properly
+def test_datasink_to_s3():
+    '''
+    This function tests to see if the S3 functionality of a DataSink
+    works properly
+    '''
+
+    # Import packages
+    import hashlib
+    import tempfile
+
+    # Init variables
+    ds = nio.DataSink()
+    bucket_name = 'test'
+    container = 'outputs'
+    attr_folder = 'text_file'
+    output_dir = 's3://' + bucket_name
+    # Local temporary filepaths for testing
+    fakes3_dir = tempfile.mkdtemp()
+    input_dir = tempfile.mkdtemp()
+    input_path = os.path.join(input_dir, 'datasink_test_s3.txt')
+
+    # Check for fakes3
+    fakes3_found = _check_for_fakes3()
+    if not fakes3_found:
+        return
+
+    # Start up fake-S3 server
+    proc = Popen(['fakes3', '-r', fakes3_dir, '-p', '4567'], stdout=open(os.devnull, 'wb'))
+
+    # Init boto3 s3 resource to talk with fakes3
+    resource = boto3.resource(aws_access_key_id='mykey',
+                              aws_secret_access_key='mysecret',
+                              service_name='s3',
+                              endpoint_url='http://localhost:4567',
+                              use_ssl=False)
+    resource.meta.client.meta.events.unregister('before-sign.s3', fix_s3_host)
+
+    # Create bucket
+    bucket = resource.create_bucket(Bucket=bucket_name)
+
+    # Create input file
+    with open(input_path, 'wb') as f:
+        f.write('ABCD1234')
+
+    # Prep datasink
+    ds.inputs.base_directory = output_dir
+    ds.inputs.container = container
+    ds.inputs.bucket = bucket
+    setattr(ds.inputs, attr_folder, input_path)
+
+    # Run datasink
+    ds.run()
+
+    # Get MD5sums and compare
+    key = '/'.join([container, attr_folder, os.path.basename(input_path)])
+    obj = bucket.Object(key=key)
+    dst_md5 = obj.e_tag.replace('"', '')
+    src_md5 = hashlib.md5(open(input_path, 'rb').read()).hexdigest()
+
+    # Make sure md5sums match
+    yield assert_equal, src_md5, dst_md5
+
+    # Kill fakes3
+    proc.kill()
+
+    # Delete fakes3 folder and input file
+    shutil.rmtree(fakes3_dir)
+    shutil.rmtree(input_dir)
+
 @skipif(noboto)
 def test_s3datasink():
     ds = nio.S3DataSink()

From a70c81e9d01a8717a39ef1d171e60943b10f5db5 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Fri, 8 Jan 2016 14:32:33 -0500
Subject: [PATCH 31/45] Finished adding local_copy logic and passed all unit
 tests

---
 nipype/interfaces/io.py            | 38 +++++++-----
 nipype/interfaces/tests/test_io.py | 92 ++++++++++++++++++++++++------
 2 files changed, 98 insertions(+), 32 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index f944114b8d..86359756f6 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -209,7 +209,7 @@ class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
     bucket = traits.Generic(mandatory=False,
                             desc='Boto3 S3 bucket for manual override of bucket')
     # Set this if user wishes to have local copy of files as well
-    local_dir = traits.Str(desc='Copy files locally as well as to S3 bucket')
+    local_copy = traits.Str(desc='Copy files locally as well as to S3 bucket')
 
     # Set call-able inputs attributes
     def __setattr__(self, key, value):
@@ -392,6 +392,10 @@ def _check_s3_base_dir(self):
         s3_str = 's3://'
         base_directory = self.inputs.base_directory
 
+        if not isdefined(base_directory):
+            s3_flag = False
+            return s3_flag
+
         # Explicitly lower-case the "s3"
         if base_directory.lower().startswith(s3_str):
             base_dir_sp = base_directory.split('/')
@@ -616,7 +620,7 @@ def _upload_to_s3(self, src, dst):
                 else:
                     iflogger.info('Overwriting previous S3 file...')
 
-            except ClientError as exc:
+            except ClientError:
                 iflogger.info('New file to S3')
 
             # Copy file up to S3 (either encrypted or not)
@@ -653,18 +657,21 @@ def _list_outputs(self):
         # Check if base directory reflects S3 bucket upload
         try:
             s3_flag = self._check_s3_base_dir()
-            s3dir = self.inputs.base_directory
-            if isdefined(self.inputs.container):
-                s3dir = os.path.join(s3dir, self.inputs.container)
+            if s3_flag:
+                s3dir = self.inputs.base_directory
+                if isdefined(self.inputs.container):
+                    s3dir = os.path.join(s3dir, self.inputs.container)
+            else:
+                s3dir = '<N/A>'
         # If encountering an exception during bucket access, set output
         # base directory to a local folder
         except Exception as exc:
+            s3dir = '<N/A>'
+            s3_flag = False
             if not isdefined(self.inputs.local_copy):
                 local_out_exception = os.path.join(os.path.expanduser('~'),
                                                    's3_datasink_' + self.bucket.name)
                 outdir = local_out_exception
-            else:
-                outdir = self.inputs.local_copy
             # Log local copying directory
             iflogger.info('Access to S3 failed! Storing outputs locally at: '\
                           '%s\nError: %s' %(outdir, exc))
@@ -673,8 +680,8 @@ def _list_outputs(self):
         if isdefined(self.inputs.container):
             outdir = os.path.join(outdir, self.inputs.container)
 
-        # If doing a localy output
-        if not outdir.lower().startswith('s3://'):
+        # If sinking to local folder
+        if outdir != s3dir:
             outdir = os.path.abspath(outdir)
             # Create the directory if it doesn't exist
             if not os.path.exists(outdir):
@@ -714,18 +721,19 @@ def _list_outputs(self):
                 if not os.path.isfile(src):
                     src = os.path.join(src, '')
                 dst = self._get_dst(src)
+                if s3_flag:
+                    s3dst = os.path.join(s3tempoutdir, dst)
+                    s3dst = self._substitute(s3dst)
                 dst = os.path.join(tempoutdir, dst)
-                s3dst = os.path.join(s3tempoutdir, dst)
                 dst = self._substitute(dst)
                 path, _ = os.path.split(dst)
 
                 # If we're uploading to S3
                 if s3_flag:
-                    dst = dst.replace(outdir, self.inputs.base_directory)
-                    self._upload_to_s3(src, dst)
-                    out_files.append(dst)
+                    self._upload_to_s3(src, s3dst)
+                    out_files.append(s3dst)
                 # Otherwise, copy locally src -> dst
-                else:
+                if not s3_flag or isdefined(self.inputs.local_copy):
                     # Create output directory if it doesnt exist
                     if not os.path.exists(path):
                         try:
@@ -787,6 +795,8 @@ class S3DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
     _outputs = traits.Dict(traits.Str, value={}, usedefault=True)
     remove_dest_dir = traits.Bool(False, usedefault=True,
                                   desc='remove dest directory when copying dirs')
+    # Set this if user wishes to have local copy of files as well
+    local_copy = traits.Str(desc='Copy files locally as well as to S3 bucket')
 
     def __setattr__(self, key, value):
         if key not in self.copyable_trait_names():
diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py
index 25ac843d0e..d5abeab223 100644
--- a/nipype/interfaces/tests/test_io.py
+++ b/nipype/interfaces/tests/test_io.py
@@ -177,7 +177,7 @@ def _check_for_fakes3():
 
     # Check for fakes3
     try:
-        ret_code = subprocess.check_call(['which', 'fakes3'])
+        ret_code = subprocess.check_call(['which', 'fakes3'], stdout=open(os.devnull, 'wb'))
         if ret_code == 0:
             fakes3_found = True
     except subprocess.CalledProcessError as exc:
@@ -188,7 +188,29 @@ def _check_for_fakes3():
     # Return if found
     return fakes3_found
 
-@skipif(noboto3)
+def _make_dummy_input():
+    '''
+    '''
+
+    # Import packages
+    import tempfile
+
+    # Init variables
+    input_dir = tempfile.mkdtemp()
+    input_path = os.path.join(input_dir, 'datasink_test_s3.txt')
+
+    # Create input file
+    with open(input_path, 'wb') as f:
+        f.write('ABCD1234')
+
+    # Return path
+    return input_path
+
+# Check for fakes3
+fakes3 = _check_for_fakes3()
+
+
+@skipif(noboto3 or not fakes3)
 # Test datasink writes to s3 properly
 def test_datasink_to_s3():
     '''
@@ -208,13 +230,7 @@ def test_datasink_to_s3():
     output_dir = 's3://' + bucket_name
     # Local temporary filepaths for testing
     fakes3_dir = tempfile.mkdtemp()
-    input_dir = tempfile.mkdtemp()
-    input_path = os.path.join(input_dir, 'datasink_test_s3.txt')
-
-    # Check for fakes3
-    fakes3_found = _check_for_fakes3()
-    if not fakes3_found:
-        return
+    input_path = _make_dummy_input()
 
     # Start up fake-S3 server
     proc = Popen(['fakes3', '-r', fakes3_dir, '-p', '4567'], stdout=open(os.devnull, 'wb'))
@@ -230,10 +246,6 @@ def test_datasink_to_s3():
     # Create bucket
     bucket = resource.create_bucket(Bucket=bucket_name)
 
-    # Create input file
-    with open(input_path, 'wb') as f:
-        f.write('ABCD1234')
-
     # Prep datasink
     ds.inputs.base_directory = output_dir
     ds.inputs.container = container
@@ -249,15 +261,59 @@ def test_datasink_to_s3():
     dst_md5 = obj.e_tag.replace('"', '')
     src_md5 = hashlib.md5(open(input_path, 'rb').read()).hexdigest()
 
-    # Make sure md5sums match
-    yield assert_equal, src_md5, dst_md5
-
     # Kill fakes3
     proc.kill()
 
     # Delete fakes3 folder and input file
     shutil.rmtree(fakes3_dir)
-    shutil.rmtree(input_dir)
+    shutil.rmtree(os.path.dirname(input_path))
+
+    # Make sure md5sums match
+    yield assert_equal, src_md5, dst_md5
+
+# Test the local copy attribute
+def test_datasink_localcopy():
+    '''
+    Function to validate DataSink will make local copy via local_copy
+    attribute
+    '''
+
+    # Import packages
+    import hashlib
+    import tempfile
+
+    # Init variables
+    local_dir = tempfile.mkdtemp()
+    container = 'outputs'
+    attr_folder = 'text_file'
+
+    # Make dummy input file and datasink
+    input_path = _make_dummy_input()
+    ds = nio.DataSink()
+
+    # Set up datasink
+    ds.inputs.container = container
+    ds.inputs.local_copy = local_dir
+    setattr(ds.inputs, attr_folder, input_path)
+
+    # Expected local copy path
+    local_copy = os.path.join(local_dir, container, attr_folder,
+                              os.path.basename(input_path))
+
+    # Run the datasink
+    ds.run()
+
+    # Check md5sums of both
+    src_md5 = hashlib.md5(open(input_path, 'rb').read()).hexdigest()
+    dst_md5 = hashlib.md5(open(local_copy, 'rb').read()).hexdigest()
+
+    # Delete temp diretories
+    shutil.rmtree(os.path.dirname(input_path))
+    shutil.rmtree(local_dir)
+
+    # Perform test
+    yield assert_equal, src_md5, dst_md5
+
 
 @skipif(noboto)
 def test_s3datasink():
@@ -300,7 +356,7 @@ def test_datasink_substitutions():
     shutil.rmtree(indir)
     shutil.rmtree(outdir)
 
-@skipif(noboto)
+@skipif(noboto or not fakes3)
 def test_s3datasink_substitutions():
     indir = mkdtemp(prefix='-Tmp-nipype_ds_subs_in')
     outdir = mkdtemp(prefix='-Tmp-nipype_ds_subs_out')

From 2af5c1d4de916f6cd7fd143b7eadf3796489891e Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Fri, 8 Jan 2016 14:36:43 -0500
Subject: [PATCH 32/45] Removed memory profiler stuff for now

---
 nipype/interfaces/base.py            |  7 -------
 nipype/interfaces/utility.py         |  7 +------
 nipype/pipeline/plugins/multiproc.py | 26 +++++---------------------
 3 files changed, 6 insertions(+), 34 deletions(-)

diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py
index 2112cdc739..414f36932c 100644
--- a/nipype/interfaces/base.py
+++ b/nipype/interfaces/base.py
@@ -1207,13 +1207,6 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False):
     The returned runtime contains a merged stdout+stderr log with timestamps
     """
 
-    # Import packages
-    try:
-        from memory_profiler import _get_memory
-        mem_prof = True
-    except:
-        mem_prof = False
-
     # Init variables
     PIPE = subprocess.PIPE
     cmdline = runtime.cmdline
diff --git a/nipype/interfaces/utility.py b/nipype/interfaces/utility.py
index 10effaa548..ca2bb5ba69 100644
--- a/nipype/interfaces/utility.py
+++ b/nipype/interfaces/utility.py
@@ -442,12 +442,7 @@ def _run_interface(self, runtime):
             if isdefined(value):
                 args[name] = value
 
-        # mem stuff
-        import memory_profiler
-        proc = (function_handle, (), args)
-        mem_mb, out = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True)
-        setattr(runtime, 'real_memory2', mem_mb[0]/1024.0)
-        #out = function_handle(**args)
+        out = function_handle(**args)
 
         if len(self._output_names) == 1:
             self._out[self._output_names[0]] = out
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 3a5c63df35..1bca2d1922 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -16,27 +16,11 @@
 def run_node(node, updatehash, plugin_args=None):
     result = dict(result=None, traceback=None)
     try:
-        run_memory = plugin_args['memory_profile']
-    except Exception:
-        run_memory = False
-    if run_memory:
-        import memory_profiler
-        import datetime
-        proc = (node.run, (), {'updatehash' : updatehash})
-        start = datetime.datetime.now()
-        mem_mb, retval = memory_profiler.memory_usage(proc=proc, retval=True, include_children=True, max_usage=True)
-        runtime = (datetime.datetime.now() - start).total_seconds()
-        result['result'] = retval
-        result['real_memory'] = mem_mb[0]/1024.0
-        result['real_memory2'] = retval.runtime.get('real_memory2')
-        result['run_seconds'] = runtime
-    else:
-        try:
-            result['result'] = node.run(updatehash=updatehash)
-        except:
-            etype, eval, etr = sys.exc_info()
-            result['traceback'] = format_exception(etype,eval,etr)
-            result['result'] = node.result
+        result['result'] = node.run(updatehash=updatehash)
+    except:
+        etype, eval, etr = sys.exc_info()
+        result['traceback'] = format_exception(etype,eval,etr)
+        result['result'] = node.result
     return result
 
 

From b7e930937041bbee47b096f70ae4093c412a28be Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Fri, 8 Jan 2016 15:33:43 -0500
Subject: [PATCH 33/45] Removed the memory profiler code to just pull in s3
 datasink code

---
 nipype/interfaces/base.py               | 31 -------------------------
 nipype/pipeline/plugins/base.py         | 11 +++------
 nipype/pipeline/plugins/callback_log.py | 14 ++++-------
 nipype/pipeline/plugins/multiproc.py    |  6 ++---
 4 files changed, 10 insertions(+), 52 deletions(-)

diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py
index 414f36932c..b202453c4f 100644
--- a/nipype/interfaces/base.py
+++ b/nipype/interfaces/base.py
@@ -751,17 +751,8 @@ def __init__(self, **inputs):
                             self.__class__.__name__)
         self.inputs = self.input_spec(**inputs)
         self.estimated_memory = 1
-        self._real_memory = 0
         self.num_threads = 1
 
-    @property
-    def real_memory(self):
-        return self._real_memory
-
-    @real_memory.setter
-    def real_memory(self, value):
-        self._real_memory = value
-
     @classmethod
     def help(cls, returnhelp=False):
         """ Prints class help
@@ -1240,9 +1231,6 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False):
     errfile = os.path.join(runtime.cwd, 'stderr.nipype')
     outfile = os.path.join(runtime.cwd, 'stdout.nipype')
 
-    # Init variables for memory profiling
-    ret = -1
-    interval = 0.1
 
     if output == 'stream':
         streams = [Stream('stdout', proc.stdout), Stream('stderr', proc.stderr)]
@@ -1260,9 +1248,6 @@ def _process(drain=0):
                 for stream in res[0]:
                     stream.read(drain)
         while proc.returncode is None:
-            if mem_prof:
-                ret = max([ret, _get_memory(proc.pid, include_children=True)])
-                time.sleep(interval)
             proc.poll()
             _process()
         _process(drain=1)
@@ -1278,21 +1263,11 @@ def _process(drain=0):
         result['merged'] = [r[1] for r in temp]
 
     if output == 'allatonce':
-        if mem_prof:
-            while proc.returncode is None:
-                ret = max([ret, _get_memory(proc.pid, include_children=True)])
-                time.sleep(interval)
-                proc.poll()
         stdout, stderr = proc.communicate()
         result['stdout'] = stdout.split('\n')
         result['stderr'] = stderr.split('\n')
         result['merged'] = ''
     if output == 'file':
-        if mem_prof:
-            while proc.returncode is None:
-                ret = max([ret, _get_memory(proc.pid, include_children=True)])
-                time.sleep(interval)
-                proc.poll()
         ret_code = proc.wait()
         stderr.flush()
         stdout.flush()
@@ -1300,17 +1275,11 @@ def _process(drain=0):
         result['stderr'] = [line.strip() for line in open(errfile).readlines()]
         result['merged'] = ''
     if output == 'none':
-        if mem_prof:
-            while proc.returncode is None:
-                ret = max([ret, _get_memory(proc.pid, include_children=True)])
-                time.sleep(interval)
-                proc.poll()
         proc.communicate()
         result['stdout'] = []
         result['stderr'] = []
         result['merged'] = ''
 
-    setattr(runtime, 'real_memory2', ret/1024.0)
     runtime.stderr = '\n'.join(result['stderr'])
     runtime.stdout = '\n'.join(result['stdout'])
     runtime.merged = result['merged']
diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py
index ab76520844..cee2c7dad5 100644
--- a/nipype/pipeline/plugins/base.py
+++ b/nipype/pipeline/plugins/base.py
@@ -241,7 +241,7 @@ def run(self, graph, config, updatehash=False):
                             notrun.append(self._clean_queue(jobid, graph,
                                                             result=result))
                         else:
-                            self._task_finished_cb(jobid, result)
+                            self._task_finished_cb(jobid)
                             self._remove_node_dirs()
                         self._clear_task(taskid)
                     else:
@@ -408,7 +408,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
             else:
                 break
 
-    def _task_finished_cb(self, jobid, result=None):
+    def _task_finished_cb(self, jobid):
         """ Extract outputs and assign to inputs of dependent tasks
 
         This is called when a job is completed.
@@ -416,12 +416,7 @@ def _task_finished_cb(self, jobid, result=None):
         logger.info('[Job finished] jobname: %s jobid: %d' %
                     (self.procs[jobid]._id, jobid))
         if self._status_callback:
-            if result == None:
-                if self._taskresult.has_key(jobid):
-                    result = self._taskresult[jobid].get()
-                else:
-                    result = {'real_memory' : 'nokey'}
-            self._status_callback(self.procs[jobid], 'end', result)
+            self._status_callback(self.procs[jobid], 'end')
         # Update job and worker queues
         self.proc_pending[jobid] = False
         # update the job dependency structure
diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
index a20242df95..9d73b7b51a 100644
--- a/nipype/pipeline/plugins/callback_log.py
+++ b/nipype/pipeline/plugins/callback_log.py
@@ -1,14 +1,8 @@
 import datetime
 import logging
 
-def log_nodes_cb(node, status, result=None):
+def log_nodes_cb(node, status):
     logger = logging.getLogger('callback')
-    try:
-        real_mem1 = result['real_memory']
-        real_mem2 = result['real_memory2']
-        run_seconds = result['run_seconds']
-    except Exception as exc:
-        real_mem1 = real_mem2 = run_seconds = 'N/A'
     if status == 'start':
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\
         node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\
@@ -20,15 +14,15 @@ def log_nodes_cb(node, status, result=None):
     elif status == 'end':
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
-        '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
-        + str(node._interface.num_threads) + ',"real_memory1":' + str(real_mem1) + ',"real_memory2":' + str(real_mem2) + ',"run_seconds":' + str(run_seconds) + '}'
+        '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
+        + str(node._interface.num_threads) + '}'
 
         logger.debug(message)
 
     else:
         message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
         node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
-        '"' + ',"memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
+        '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
         + str(node._interface.num_threads) + ',"error":"True"}'
 
         logger.debug(message)
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index 1bca2d1922..ec9a65905e 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -83,8 +83,8 @@ def _submit_job(self, node, updatehash=False):
                 node.inputs.terminal_output = 'allatonce'
         except:
             pass
-        self._taskresult[self._taskid] = self.pool.apply_async(run_node, (node,
-                                                                updatehash,))
+        self._taskresult[self._taskid] = self.pool.apply_async(run_node,
+                                                               (node, updatehash,))
         return self._taskid
 
     def _report_crash(self, node, result=None):
@@ -161,7 +161,7 @@ def _submit_job(self, node, updatehash=False):
         except:
             pass
         self._taskresult[self._taskid] = self.pool.apply_async(run_node,
-                                                               (node, updatehash, self.plugin_args),
+                                                               (node, updatehash,),
                                                                callback=release_lock)
         return self._taskid
 

From 0e5e0e9b2c94de19fd43f5a25ab68917e5056fa6 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Fri, 8 Jan 2016 15:37:00 -0500
Subject: [PATCH 34/45] Removed unneccessary import

---
 nipype/interfaces/io.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 86359756f6..a5ddb41211 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -226,14 +226,9 @@ def __setattr__(self, key, value):
 
 # DataSink outputs
 class DataSinkOutputSpec(TraitedSpec):
-    '''
-    '''
-
-    # Import packages
-    import traits.api as tapi
 
     # Init out file
-    out_file = tapi.Any(desc='datasink output')
+    out_file = traits.Any(desc='datasink output')
 
 
 # Custom DataSink class

From 0f78025b64f16e137f4cb6cc1928e2cac8156478 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Fri, 8 Jan 2016 15:43:47 -0500
Subject: [PATCH 35/45] Removed unncessary function argument

---
 nipype/pipeline/plugins/multiproc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index ec9a65905e..b42213f200 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -13,7 +13,7 @@
 from .base import (DistributedPluginBase, report_crash)
 
 
-def run_node(node, updatehash, plugin_args=None):
+def run_node(node, updatehash):
     result = dict(result=None, traceback=None)
     try:
         result['result'] = node.run(updatehash=updatehash)

From 15f3cedb22a2a4570271f124e3d8125af3ff9d52 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Fri, 8 Jan 2016 15:56:31 -0500
Subject: [PATCH 36/45] Corrected Carol's in fsl interface code

---
 nipype/interfaces/fsl/model.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/nipype/interfaces/fsl/model.py b/nipype/interfaces/fsl/model.py
index 369ecb28f3..d37f8db111 100644
--- a/nipype/interfaces/fsl/model.py
+++ b/nipype/interfaces/fsl/model.py
@@ -250,13 +250,7 @@ def _create_ev_files(
                                                               element=count,
                                                               ctype=ctype, val=val)
                         ev_txt += "\n"
-                    # if con[0] in con_map.keys():
-                    #     for fconidx in con_map[con[0]]:
-                    #         ev_txt += contrast_ftest_element.substitute(
-                    #             cnum=ftest_idx.index(fconidx) + 1,
-                    #             element=tidx,
-                    #             ctype=ctype,
-                    #             val=1)
+
                     for fconidx in ftest_idx:
                         fval=0
                         if con[0] in con_map.keys() and fconidx in con_map[con[0]]:
@@ -266,7 +260,7 @@ def _create_ev_files(
                             element=tidx,
                             ctype=ctype,
                             val=fval)
-                        ev_txt += "\n"
+                    ev_txt += "\n"
 
             # add contrast mask info
             ev_txt += contrastmask_header.substitute()

From ca4bed5a8a2dcd208345a2feb6093a1737f49813 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Mon, 11 Jan 2016 14:48:22 -0500
Subject: [PATCH 37/45] Removed all of the ResourceMultiProc plugin so the S3
 datasink

---
 nipype/interfaces/base.py                     |  11 +-
 nipype/interfaces/fsl/model.py                |  20 +-
 nipype/pipeline/plugins/__init__.py           |   3 -
 nipype/pipeline/plugins/base.py               |   7 +-
 nipype/pipeline/plugins/callback_log.py       |  28 --
 nipype/pipeline/plugins/multiproc.py          | 172 +-----------
 .../pipeline/plugins/semaphore_singleton.py   |   2 -
 .../pipeline/plugins/tests/test_multiproc.py  | 185 +------------
 nipype/utils/draw_gantt_chart.py              | 261 ------------------
 9 files changed, 14 insertions(+), 675 deletions(-)
 delete mode 100644 nipype/pipeline/plugins/callback_log.py
 delete mode 100644 nipype/pipeline/plugins/semaphore_singleton.py
 delete mode 100644 nipype/utils/draw_gantt_chart.py

diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py
index b202453c4f..ac6b7b8af4 100644
--- a/nipype/interfaces/base.py
+++ b/nipype/interfaces/base.py
@@ -750,8 +750,6 @@ def __init__(self, **inputs):
             raise Exception('No input_spec in class: %s' %
                             self.__class__.__name__)
         self.inputs = self.input_spec(**inputs)
-        self.estimated_memory = 1
-        self.num_threads = 1
 
     @classmethod
     def help(cls, returnhelp=False):
@@ -1197,11 +1195,9 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False):
 
     The returned runtime contains a merged stdout+stderr log with timestamps
     """
-
-    # Init variables
     PIPE = subprocess.PIPE
-    cmdline = runtime.cmdline
 
+    cmdline = runtime.cmdline
     if redirect_x:
         exist_xvfb, _ = _exists_in_path('xvfb-run', runtime.environ)
         if not exist_xvfb:
@@ -1230,8 +1226,6 @@ def run_command(runtime, output=None, timeout=0.01, redirect_x=False):
     result = {}
     errfile = os.path.join(runtime.cwd, 'stderr.nipype')
     outfile = os.path.join(runtime.cwd, 'stdout.nipype')
-
-
     if output == 'stream':
         streams = [Stream('stdout', proc.stdout), Stream('stderr', proc.stderr)]
 
@@ -1247,6 +1241,7 @@ def _process(drain=0):
             else:
                 for stream in res[0]:
                     stream.read(drain)
+
         while proc.returncode is None:
             proc.poll()
             _process()
@@ -1261,7 +1256,6 @@ def _process(drain=0):
             result[stream._name] = [r[2] for r in rows]
         temp.sort()
         result['merged'] = [r[1] for r in temp]
-
     if output == 'allatonce':
         stdout, stderr = proc.communicate()
         result['stdout'] = stdout.split('\n')
@@ -1279,7 +1273,6 @@ def _process(drain=0):
         result['stdout'] = []
         result['stderr'] = []
         result['merged'] = ''
-
     runtime.stderr = '\n'.join(result['stderr'])
     runtime.stdout = '\n'.join(result['stdout'])
     runtime.merged = result['merged']
diff --git a/nipype/interfaces/fsl/model.py b/nipype/interfaces/fsl/model.py
index d37f8db111..c2d1c960b4 100644
--- a/nipype/interfaces/fsl/model.py
+++ b/nipype/interfaces/fsl/model.py
@@ -250,17 +250,14 @@ def _create_ev_files(
                                                               element=count,
                                                               ctype=ctype, val=val)
                         ev_txt += "\n"
-
-                    for fconidx in ftest_idx:
-                        fval=0
-                        if con[0] in con_map.keys() and fconidx in con_map[con[0]]:
-                            fval=1
-                        ev_txt += contrast_ftest_element.substitute(
-                            cnum=ftest_idx.index(fconidx) + 1,
-                            element=tidx,
-                            ctype=ctype,
-                            val=fval)
-                    ev_txt += "\n"
+                    if con[0] in con_map.keys():
+                        for fconidx in con_map[con[0]]:
+                            ev_txt += contrast_ftest_element.substitute(
+                                cnum=ftest_idx.index(fconidx) + 1,
+                                element=tidx,
+                                ctype=ctype,
+                                val=1)
+                        ev_txt += "\n"
 
             # add contrast mask info
             ev_txt += contrastmask_header.substitute()
@@ -1959,4 +1956,3 @@ def _list_outputs(self):
                 self.inputs.out_vnscales_name)
 
         return outputs
-
diff --git a/nipype/pipeline/plugins/__init__.py b/nipype/pipeline/plugins/__init__.py
index cf392f0f77..dac14301b2 100644
--- a/nipype/pipeline/plugins/__init__.py
+++ b/nipype/pipeline/plugins/__init__.py
@@ -9,7 +9,6 @@
 from .condor import CondorPlugin
 from .dagman import CondorDAGManPlugin
 from .multiproc import MultiProcPlugin
-from .multiproc import ResourceMultiProcPlugin
 from .ipython import IPythonPlugin
 from .somaflow import SomaFlowPlugin
 from .pbsgraph import PBSGraphPlugin
@@ -17,5 +16,3 @@
 from .lsf import LSFPlugin
 from .slurm import SLURMPlugin
 from .slurmgraph import SLURMGraphPlugin
-
-from .callback_log import log_nodes_cb
diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py
index cee2c7dad5..bb8bd91aef 100644
--- a/nipype/pipeline/plugins/base.py
+++ b/nipype/pipeline/plugins/base.py
@@ -260,15 +260,10 @@ def run(self, graph, config, updatehash=False):
                                             graph=graph)
             else:
                 logger.debug('Not submitting')
-            self._wait()
+            sleep(float(self._config['execution']['poll_sleep_duration']))
         self._remove_node_dirs()
         report_nodes_not_run(notrun)
 
-
-
-    def _wait(self):
-        sleep(float(self._config['execution']['poll_sleep_duration']))
-
     def _get_result(self, taskid):
         raise NotImplementedError
 
diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
deleted file mode 100644
index 9d73b7b51a..0000000000
--- a/nipype/pipeline/plugins/callback_log.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import datetime
-import logging
-
-def log_nodes_cb(node, status):
-    logger = logging.getLogger('callback')
-    if status == 'start':
-        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\
-        node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\
-        '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
-        + str(node._interface.num_threads) + '}'
-
-        logger.debug(message)
-
-    elif status == 'end':
-        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
-        node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
-        '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
-        + str(node._interface.num_threads) + '}'
-
-        logger.debug(message)
-
-    else:
-        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
-        node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
-        '"' + ',"estimate memory":' + str(node._interface.estimated_memory) + ',"num_threads":' \
-        + str(node._interface.num_threads) + ',"error":"True"}'
-
-        logger.debug(message)
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
index b42213f200..0f6b11c30a 100644
--- a/nipype/pipeline/plugins/multiproc.py
+++ b/nipype/pipeline/plugins/multiproc.py
@@ -12,7 +12,6 @@
 
 from .base import (DistributedPluginBase, report_crash)
 
-
 def run_node(node, updatehash):
     result = dict(result=None, traceback=None)
     try:
@@ -23,7 +22,6 @@ def run_node(node, updatehash):
         result['result'] = node.result
     return result
 
-
 class NonDaemonProcess(Process):
     """A non-daemon process to support internal multiprocessing.
     """
@@ -68,7 +66,6 @@ def __init__(self, plugin_args=None):
         else:
             self.pool = Pool(processes=n_procs)
 
-
     def _get_result(self, taskid):
         if taskid not in self._taskresult:
             raise RuntimeError('Multiproc task %d not found'%taskid)
@@ -84,7 +81,8 @@ def _submit_job(self, node, updatehash=False):
         except:
             pass
         self._taskresult[self._taskid] = self.pool.apply_async(run_node,
-                                                               (node, updatehash,))
+                                                               (node,
+                                                                updatehash,))
         return self._taskid
 
     def _report_crash(self, node, result=None):
@@ -98,169 +96,3 @@ def _report_crash(self, node, result=None):
 
     def _clear_task(self, taskid):
         del self._taskresult[taskid]
-
-
-
-import numpy as np
-from copy import deepcopy
-from ..engine import (MapNode, str2bool)
-import datetime
-import psutil
-from ... import logging
-import semaphore_singleton
-logger = logging.getLogger('workflow')
-
-def release_lock(args):
-    semaphore_singleton.semaphore.release()
-
-class ResourceMultiProcPlugin(MultiProcPlugin):
-    """Execute workflow with multiprocessing not sending more jobs at once
-    than the system can support.
-
-    The plugin_args input to run can be used to control the multiprocessing
-    execution and defining the maximum amount of memory and threads that 
-    should be used. When those parameters are not specified,
-    the number of threads and memory of the system is used.
-
-    System consuming nodes should be tagged:
-    memory_consuming_node.interface.memory = 8 #Gb
-    thread_consuming_node.interface.num_threads = 16
-
-    The default number of threads and memory for a node is 1. 
-
-    Currently supported options are:
-
-    - num_thread: maximum number of threads to be executed in parallel
-    - memory: maximum memory that can be used at once.
-
-    """
-
-    def __init__(self, plugin_args=None):
-        super(ResourceMultiProcPlugin, self).__init__(plugin_args=plugin_args)
-        self.plugin_args = plugin_args
-        self.processors = cpu_count()
-        memory = psutil.virtual_memory()
-        self.memory = memory.total / (1024*1024*1024)
-        if self.plugin_args:
-            if 'n_procs' in self.plugin_args:
-                self.processors = self.plugin_args['n_procs']
-            if 'memory' in self.plugin_args:
-                self.memory = self.plugin_args['memory']
-
-    def _wait(self):
-        if len(self.pending_tasks) > 0:
-            semaphore_singleton.semaphore.acquire()
-        semaphore_singleton.semaphore.release()
-
-
-    def _submit_job(self, node, updatehash=False):
-        self._taskid += 1
-        try:
-            if node.inputs.terminal_output == 'stream':
-                node.inputs.terminal_output = 'allatonce'
-        except:
-            pass
-        self._taskresult[self._taskid] = self.pool.apply_async(run_node,
-                                                               (node, updatehash,),
-                                                               callback=release_lock)
-        return self._taskid
-
-    def _send_procs_to_workers(self, updatehash=False, graph=None):
-        """ Sends jobs to workers when system resources are available.
-            Check memory (gb) and cores usage before running jobs.
-        """
-        executing_now = []
-
-        # Check to see if a job is available
-        jobids = np.flatnonzero((self.proc_pending == True) & (self.depidx.sum(axis=0) == 0).__array__())
-
-        #check available system resources by summing all threads and memory used
-        busy_memory = 0
-        busy_processors = 0
-        for jobid in jobids:
-            busy_memory+= self.procs[jobid]._interface.estimated_memory
-            busy_processors+= self.procs[jobid]._interface.num_threads
-                
-        free_memory = self.memory - busy_memory
-        free_processors = self.processors - busy_processors
-
-
-        #check all jobs without dependency not run
-        jobids = np.flatnonzero((self.proc_done == False) & (self.depidx.sum(axis=0) == 0).__array__())
-
-
-        #sort jobs ready to run first by memory and then by number of threads
-        #The most resource consuming jobs run first
-        jobids = sorted(jobids, key=lambda item: (self.procs[item]._interface.estimated_memory, self.procs[item]._interface.num_threads))
-
-        logger.debug('Free memory: %d, Free processors: %d', free_memory, free_processors)
-
-
-        #while have enough memory and processors for first job
-        #submit first job on the list
-        for jobid in jobids:
-            logger.debug('Next Job: %d, memory: %d, threads: %d' %(jobid, self.procs[jobid]._interface.estimated_memory, self.procs[jobid]._interface.num_threads))
-
-            if self.procs[jobid]._interface.estimated_memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors:
-                logger.info('Executing: %s ID: %d' %(self.procs[jobid]._id, jobid))
-                executing_now.append(self.procs[jobid])
-                
-                if isinstance(self.procs[jobid], MapNode):
-                    try:
-                        num_subnodes = self.procs[jobid].num_subnodes()
-                    except Exception:
-                        self._clean_queue(jobid, graph)
-                        self.proc_pending[jobid] = False
-                        continue
-                    if num_subnodes > 1:
-                        submit = self._submit_mapnode(jobid)
-                        if not submit:
-                            continue
-
-                # change job status in appropriate queues
-                self.proc_done[jobid] = True
-                self.proc_pending[jobid] = True
-
-                free_memory -= self.procs[jobid]._interface.estimated_memory
-                free_processors -= self.procs[jobid]._interface.num_threads
-
-                # Send job to task manager and add to pending tasks
-                if self._status_callback:
-                    self._status_callback(self.procs[jobid], 'start')
-                if str2bool(self.procs[jobid].config['execution']['local_hash_check']):
-                    logger.debug('checking hash locally')
-                    try:
-                        hash_exists, _, _, _ = self.procs[
-                            jobid].hash_exists()
-                        logger.debug('Hash exists %s' % str(hash_exists))
-                        if (hash_exists and (self.procs[jobid].overwrite == False or (self.procs[jobid].overwrite == None and not self.procs[jobid]._interface.always_run))):
-                            self._task_finished_cb(jobid)
-                            self._remove_node_dirs()
-                            continue
-                    except Exception:
-                        self._clean_queue(jobid, graph)
-                        self.proc_pending[jobid] = False
-                        continue
-                logger.debug('Finished checking hash')
-
-                if self.procs[jobid].run_without_submitting:
-                    logger.debug('Running node %s on master thread' %self.procs[jobid])
-                    try:
-                        self.procs[jobid].run()
-                    except Exception:
-                        self._clean_queue(jobid, graph)
-                    self._task_finished_cb(jobid)
-                    self._remove_node_dirs()
-
-                else:
-                    logger.debug('submitting', jobid)
-                    tid = self._submit_job(deepcopy(self.procs[jobid]), updatehash=updatehash)
-                    if tid is None:
-                        self.proc_done[jobid] = False
-                        self.proc_pending[jobid] = False
-                    else:
-                        self.pending_tasks.insert(0, (tid, jobid))
-            else:
-                break
-
-        logger.debug('No jobs waiting to execute')
diff --git a/nipype/pipeline/plugins/semaphore_singleton.py b/nipype/pipeline/plugins/semaphore_singleton.py
deleted file mode 100644
index 8894615a14..0000000000
--- a/nipype/pipeline/plugins/semaphore_singleton.py
+++ /dev/null
@@ -1,2 +0,0 @@
-import threading
-semaphore = threading.Semaphore(1)
\ No newline at end of file
diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py
index d2f281eadd..8d9eac3e32 100644
--- a/nipype/pipeline/plugins/tests/test_multiproc.py
+++ b/nipype/pipeline/plugins/tests/test_multiproc.py
@@ -3,7 +3,7 @@
 from tempfile import mkdtemp
 from shutil import rmtree
 
-from nipype.testing import assert_equal, assert_less_equal
+from nipype.testing import assert_equal
 import nipype.pipeline.engine as pe
 
 class InputSpec(nib.TraitedSpec):
@@ -47,186 +47,3 @@ def test_run_multiproc():
     yield assert_equal, result, [1, 1]
     os.chdir(cur_dir)
     rmtree(temp_dir)
-
-
-################################
-
-
-class InputSpecSingleNode(nib.TraitedSpec):
-    input1 = nib.traits.Int(desc='a random int')
-    input2 = nib.traits.Int(desc='a random int')
-
-class OutputSpecSingleNode(nib.TraitedSpec):
-    output1 = nib.traits.Int(desc='a random int')
-
-
-class TestInterfaceSingleNode(nib.BaseInterface):
-    input_spec = InputSpecSingleNode
-    output_spec = OutputSpecSingleNode
-
-    def _run_interface(self, runtime):
-        runtime.returncode = 0
-        return runtime
-
-    def _list_outputs(self):
-        outputs = self._outputs().get()
-        outputs['output1'] =  self.inputs.input1
-        return outputs
-
-
-def find_metrics(nodes, last_node):
-    import json
-    from dateutil.parser import parse
-    from datetime import datetime
-    import datetime as d
-
-
-    start = parse(nodes[0]['start'])
-    total_duration = int((parse(last_node['finish']) - start).total_seconds())
-
-    total_memory = []
-    total_threads = []
-    for i in range(total_duration):
-        total_memory.append(0)
-        total_threads.append(0)
-
-    now = start
-    for i in range(total_duration):
-        start_index = 0
-        node_start = None
-        node_finish = None
-
-        x = now
-
-        for j in range(start_index, len(nodes)):
-            node_start = parse(nodes[j]['start'])
-            node_finish = parse(nodes[j]['finish'])
-
-            if node_start < x and node_finish > x:
-                total_memory[i] += nodes[j]['estimated_memory']
-                total_threads[i] += nodes[j]['num_threads']
-                start_index = j
-
-            if node_start > x:
-                break
-
-        now += d.timedelta(seconds=1)
-
-    return total_memory, total_threads
-
-
-import os
-from nipype.pipeline.plugins.callback_log import log_nodes_cb
-import logging
-import logging.handlers
-import psutil
-from multiprocessing import cpu_count
-
-from nipype.utils import draw_gantt_chart
-
-def test_do_not_use_more_memory_then_specified():
-    LOG_FILENAME = 'callback.log'
-    my_logger = logging.getLogger('callback')
-    my_logger.setLevel(logging.DEBUG)
-
-    # Add the log message handler to the logger
-    handler = logging.FileHandler(LOG_FILENAME)
-    my_logger.addHandler(handler)
-
-    max_memory = 10
-    pipe = pe.Workflow(name='pipe')
-    n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1')
-    n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2')
-    n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3')
-    n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4')
-
-    n1.interface.estimated_memory = 1
-    n2.interface.estimated_memory = 1
-    n3.interface.estimated_memory = 10
-    n4.interface.estimated_memory = 1
-
-    pipe.connect(n1, 'output1', n2, 'input1')
-    pipe.connect(n1, 'output1', n3, 'input1')
-    pipe.connect(n2, 'output1', n4, 'input1')
-    pipe.connect(n3, 'output1', n4, 'input2')
-    n1.inputs.input1 = 10
-
-    pipe.run(plugin='ResourceMultiProc', plugin_args={'memory': max_memory, 
-                                        'status_callback': log_nodes_cb})
-
-
-    nodes, last_node = draw_gantt_chart.log_to_json(LOG_FILENAME)
-    #usage in every second
-    memory, threads = find_metrics(nodes, last_node)
-
-    result = True
-    for m in memory:
-        if m > max_memory:
-            result = False
-            break
-
-    yield assert_equal, result, True
-
-    max_threads = cpu_count()
-
-    result = True
-    for t in threads:
-        if t > max_threads:
-            result = False
-            break
-
-    yield assert_equal, result, True, "using more threads than system has (threads is not specified by user)"
-
-    os.remove(LOG_FILENAME)
-
-
-def test_do_not_use_more_threads_then_specified():
-    LOG_FILENAME = 'callback.log'
-    my_logger = logging.getLogger('callback')
-    my_logger.setLevel(logging.DEBUG)
-
-    # Add the log message handler to the logger
-    handler = logging.FileHandler(LOG_FILENAME)
-    my_logger.addHandler(handler)
-
-    max_threads = 10
-    pipe = pe.Workflow(name='pipe')
-    n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1')
-    n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2')
-    n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3')
-    n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4')
-
-    n1.interface.num_threads = 1
-    n2.interface.num_threads = 1
-    n3.interface.num_threads = 10
-    n4.interface.num_threads = 1
-
-    pipe.connect(n1, 'output1', n2, 'input1')
-    pipe.connect(n1, 'output1', n3, 'input1')
-    pipe.connect(n2, 'output1', n4, 'input1')
-    pipe.connect(n3, 'output1', n4, 'input2')
-    n1.inputs.input1 = 10
-    pipe.config['execution']['poll_sleep_duration'] = 1
-    pipe.run(plugin='ResourceMultiProc', plugin_args={'n_procs': max_threads, 'status_callback': log_nodes_cb})
-
-    nodes, last_node = draw_gantt_chart.log_to_json(LOG_FILENAME)
-    #usage in every second
-    memory, threads = find_metrics(nodes, last_node)
-
-    result = True
-    for t in threads:
-        if t > max_threads:
-            result = False
-            break
-
-    yield assert_equal, result, True, "using more threads than specified"
-
-    max_memory = psutil.virtual_memory().total / (1024*1024)
-    result = True
-    for m in memory:
-        if m > max_memory:
-            result = False
-            break
-    yield assert_equal, result, True, "using more memory than system has (memory is not specified by user)"
-
-    os.remove(LOG_FILENAME)
\ No newline at end of file
diff --git a/nipype/utils/draw_gantt_chart.py b/nipype/utils/draw_gantt_chart.py
deleted file mode 100644
index 84bbc033a0..0000000000
--- a/nipype/utils/draw_gantt_chart.py
+++ /dev/null
@@ -1,261 +0,0 @@
-import json
-from dateutil import parser
-import datetime
-import random
-
-
-def log_to_json(logfile):
-    result = []
-    with open(logfile, 'r') as content:
-
-            #read file separating each line
-            content = content.read()
-            lines = content.split('\n')
-
-            lines = [ json.loads(x) for x in lines[:-1]]
-
-            last_node = [ x for x in lines if x.has_key('finish')][-1]
-
-            for i, line in enumerate(lines):
-                #get first start it finds
-                if not line.has_key('start'):
-                    continue
-
-                #fint the end node for that start
-                for j in range(i+1, len(lines)):
-                    if lines[j].has_key('finish'):
-                        if lines[j]['id'] == line['id'] and lines[j]['name'] == line['name']:
-                            line['finish'] = lines[j]['finish']
-                            line['duration'] = (parser.parse(line['finish']) - parser.parse(line['start'])).total_seconds()
-                            result.append(line)
-                            break
-
-    return result, last_node
-
-
-#total duration in seconds
-def draw_lines(start, total_duration, minute_scale, scale):
-    result = ''
-    next_line = 220
-    next_time = start;
-    num_lines = int((total_duration/60) / minute_scale) +2;
-
-    for i in range(num_lines):
-        new_line = "<hr class='line' width='100%' style='top:"+ str(next_line) + "px;'>"
-        result += new_line
-
-        time = "<p class='time' style='top:" + str(next_line - 20) + "px;'> " + str(next_time.hour) + ':' + str(next_time.minute) + " </p>";
-        result += time
-
-        next_line += minute_scale * scale
-        next_time += datetime.timedelta(minutes=minute_scale)
-    return result
-
-def draw_nodes(start, nodes, cores, scale, colors):
-    result = ''
-    end_times = [datetime.datetime(start.year, start.month, start.day, start.hour, start.minute, start.second) for x in range(cores)]
-
-    for node in nodes:
-        node_start = parser.parse(node['start'])
-        node_finish = parser.parse(node['finish'])
-        offset = ((node_start - start).total_seconds() / 60) * scale + 220
-        scale_duration = (node['duration'] / 60) * scale
-        if scale_duration < 5:
-            scale_duration = 5
-
-        scale_duration -= 2
-        left = 60
-        for j in range(len(end_times)):
-            if end_times[j] < node_start:
-                left += j * 30
-                end_times[j] = datetime.datetime(node_finish.year, node_finish.month, node_finish.day, node_finish.hour, node_finish.minute, node_finish.second)
-                #end_times[j]+=  datetime.timedelta(microseconds=node_finish.microsecond)
-                break
-
-        color = random.choice(colors)
-        new_node = "<div class='node' style=' left:" + str(left) + "px;top: " + str(offset) + "px;height:" + str(scale_duration) + "px; background-color: " + color  + " 'title='" + node['name'] +'\nduration: ' + str(node['duration']/60) + '\nstart: ' + node['start'] + '\nend: ' + node['finish'] + "'></div>";
-        result += new_node
-    return result
-
-
-def draw_thread_bar(start, total_duration, nodes, space_between_minutes, minute_scale):
-    result = "<p class='time' style='top:198px;left:900px;'>Threads</p>"
-
-    total = total_duration/60
-    thread = [0 for x in range(total)]
-
-    now = start
-
-    #calculate nuber of threads in every second
-    for i in range(total):
-        node_start = None
-        node_finish = None
-
-        for j in range(i, len(nodes)):
-            node_start = parser.parse(nodes[j]['start'])
-            node_finish = parser.parse(nodes[j]['finish'])
-
-            if node_start <= now and node_finish >= now:
-                thread[i] += nodes[j]['num_threads']
-            if node_start > now:
-                break
-        now += datetime.timedelta(minutes=1)
-
-
-    #draw thread bar
-    scale = float(space_between_minutes/float(minute_scale))
-
-    for i in range(len(thread)):
-        width = thread[i] * 10
-        t = (i*scale*minute_scale) + 220
-        bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:900px;top:"+str(t)+"px'></div>"
-        result += bar
-
-    return result
-
-
-
-def draw_memory_bar(start, total_duration, nodes, space_between_minutes, minute_scale):
-    result = "<p class='time' style='top:198px;left:1200px;'>Memory</p>"
-
-    total = total_duration/60
-    memory = [0 for x in range(total)]
-
-    now = start
-
-    #calculate nuber of threads in every second
-    for i in range(total):
-        node_start = None
-        node_finish = None
-
-        for j in range(i, len(nodes)):
-            node_start = parser.parse(nodes[j]['start'])
-            node_finish = parser.parse(nodes[j]['finish'])
-
-            if node_start <= now and node_finish >= now:
-                memory[i] += nodes[j]['estimated_memory']
-            if node_start > now:
-                break
-        now += datetime.timedelta(minutes=1)
-
-
-    #draw thread bar
-    scale = float(space_between_minutes/float(minute_scale))
-
-    for i in range(len(memory)):
-        width = memory[i] * 10
-        t = (i*scale*minute_scale) + 220
-        bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:1200px;top:"+str(t)+"px'></div>"
-        result += bar
-
-    return result
-
-
-'''
-Generates a gantt chart in html showing the workflow execution based on a callback log file.
-This script was intended to be used with the ResourceMultiprocPlugin.
-The following code shows how to set up the workflow in order to generate the log file:
-
-# import logging
-# import logging.handlers
-# from nipype.pipeline.plugins.callback_log import log_nodes_cb
-
-# log_filename = 'callback.log'
-# logger = logging.getLogger('callback')
-# logger.setLevel(logging.DEBUG)
-# handler = logging.FileHandler(log_filename)
-# logger.addHandler(handler)
-
-# #create workflow
-# workflow = ...
-
-# workflow.run(plugin='ResourceMultiProc',  
-#     plugin_args={'num_threads':8, 'memory':12, 'status_callback': log_nodes_cb})
-
-# generate_gantt_chart('callback.log', 8)
-'''
-def generate_gantt_chart(logfile, cores, minute_scale=10, space_between_minutes=50, colors=["#7070FF", "#4E4EB2", "#2D2D66", "#9B9BFF"]):
-
-    result, last_node = log_to_json(logfile)
-    scale = space_between_minutes 
-
-    #add the html header
-    html_string = '''<!DOCTYPE html>
-    <head>
-        <style>
-            #content{
-                width:100%;
-                height:100%;
-                position:absolute;
-            }
-
-            .node{
-                background-color:#7070FF;
-                border-radius: 5px;
-                position:absolute;
-                width:20px;
-                white-space:pre-wrap;
-            }
-
-            .line{
-                position: absolute;
-                color: #C2C2C2;
-                opacity: 0.5;
-                margin: 0px;
-            }
-
-            .time{
-                position: absolute;
-                font-size: 16px;
-                color: #666666;
-                margin: 0px;
-            }
-
-            .bar{
-                position: absolute;
-                background-color: #80E680;
-                height: 1px;
-            }
-
-            .dot{
-                position: absolute;
-                width: 1px;
-                height: 1px;
-                background-color: red;
-            }
-        </style>
-    </head>
-
-    <body>
-        <div id="content">'''
-
-
-    #create the header of the report with useful information
-    start = parser.parse(result[0]['start'])
-    duration = int((parser.parse(last_node['finish']) - start).total_seconds())
-
-    html_string += '<p>Start: '+ result[0]['start'] +'</p>'
-    html_string += '<p>Finish: '+ last_node['finish'] +'</p>'
-    html_string += '<p>Duration: '+ str(duration/60) +' minutes</p>'
-    html_string += '<p>Nodes: '+str(len(result))+'</p>'
-    html_string += '<p>Cores: '+str(cores)+'</p>'
-
-
-    #draw lines
-    html_string += draw_lines(start, duration, minute_scale, scale)
-
-    #draw nodes
-    html_string += draw_nodes(start, result, cores, scale, colors)
-
-    html_string += draw_thread_bar(start, duration, result, space_between_minutes, minute_scale)
-    html_string += draw_memory_bar(start, duration, result, space_between_minutes, minute_scale)
-
-    #finish html
-    html_string+= '''
-        </div>
-    </body>'''
-
-    #save file
-    html_file = open(logfile +'.html', 'wb')
-    html_file.write(html_string)
-    html_file.close()
\ No newline at end of file

From ecb05e2c8f87298dbfb4b8d9691e763975cf0c19 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Tue, 12 Jan 2016 13:03:04 -0500
Subject: [PATCH 38/45] Found merge HEAD comment and removed

---
 nipype/pipeline/plugins/base.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py
index 1e32af4f71..162ddd9df4 100644
--- a/nipype/pipeline/plugins/base.py
+++ b/nipype/pipeline/plugins/base.py
@@ -20,16 +20,11 @@
 import numpy as np
 import scipy.sparse as ssp
 
-<<<<<<< HEAD
-from ..utils import (nx, dfs_preorder, topological_sort)
-from ..engine import (MapNode, str2bool)
-=======
 
 from ...utils.filemanip import savepkl, loadpkl
 from ...utils.misc import str2bool
 from ..engine.utils import (nx, dfs_preorder, topological_sort)
 from ..engine import MapNode
->>>>>>> 77ffab33003e8c69712bc3015c213c6979ef77ff
 
 
 from ... import logging

From ee70359bf7e1a06eef329323c7c32f0d0b97e666 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Tue, 12 Jan 2016 13:58:32 -0500
Subject: [PATCH 39/45] Removed print statements from fakes3 checker and made
 it a check at the beginning

---
 nipype/interfaces/tests/test_io.py | 39 +++++++++---------------------
 1 file changed, 11 insertions(+), 28 deletions(-)

diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py
index 17906b3172..efdd1bb483 100644
--- a/nipype/interfaces/tests/test_io.py
+++ b/nipype/interfaces/tests/test_io.py
@@ -34,6 +34,17 @@
 except:
     noboto3 = True
 
+# Check for fakes3
+import subprocess
+try:
+    ret_code = subprocess.check_call(['which', 'fakes3'], stdout=open(os.devnull, 'wb'))
+    if ret_code == 0:
+        fakes3_found = True
+    else:
+        fakes3_found = False
+except:
+    fakes3_found = False
+
 def test_datagrabber():
     dg = nio.DataGrabber()
     yield assert_equal, dg.inputs.template, Undefined
@@ -173,30 +184,6 @@ def test_datasink():
     ds = nio.DataSink(infields=['test'])
     yield assert_true, 'test' in ds.inputs.copyable_trait_names()
 
-# Function to check for fakes3
-def _check_for_fakes3():
-    '''
-    Function used internally to check for fakes3 installation
-    '''
-
-    # Import packages
-    import subprocess
-
-    # Init variables
-    fakes3_found = False
-
-    # Check for fakes3
-    try:
-        ret_code = subprocess.check_call(['which', 'fakes3'], stdout=open(os.devnull, 'wb'))
-        if ret_code == 0:
-            fakes3_found = True
-    except subprocess.CalledProcessError as exc:
-        print 'fakes3 not found, install via \'gem install fakes3\', skipping test...'
-    except:
-        print 'Unable to check for fakes3 installation, skipping test...'
-
-    # Return if found
-    return fakes3_found
 
 def _make_dummy_input():
     '''
@@ -216,10 +203,6 @@ def _make_dummy_input():
     # Return path
     return input_path
 
-# Check for fakes3
-fakes3 = _check_for_fakes3()
-
-
 @skipif(noboto3 or not fakes3)
 # Test datasink writes to s3 properly
 def test_datasink_to_s3():

From 7ecaefd3ba446fa213d4b9b8ca0eff72ac720e3a Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Tue, 12 Jan 2016 14:00:02 -0500
Subject: [PATCH 40/45] Changed fakes3_found to fakes3

---
 nipype/interfaces/tests/test_io.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py
index efdd1bb483..94270cda73 100644
--- a/nipype/interfaces/tests/test_io.py
+++ b/nipype/interfaces/tests/test_io.py
@@ -39,11 +39,11 @@
 try:
     ret_code = subprocess.check_call(['which', 'fakes3'], stdout=open(os.devnull, 'wb'))
     if ret_code == 0:
-        fakes3_found = True
+        fakes3 = True
     else:
-        fakes3_found = False
+        fakes3 = False
 except:
-    fakes3_found = False
+    fakes3 = False
 
 def test_datagrabber():
     dg = nio.DataGrabber()

From 818da998e3e02e21c9ac37c349adf918e3a0702f Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Wed, 13 Jan 2016 14:27:00 -0500
Subject: [PATCH 41/45] Fixed Python3 compatibility bug in exception raising

---
 nipype/interfaces/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index eb92ec967c..1290b56b32 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -685,7 +685,7 @@ def _list_outputs(self):
             if not os.path.exists(outdir):
                 try:
                     os.makedirs(outdir)
-                except OSError, inst:
+                except OSError as inst:
                     if 'File exists' in inst:
                         pass
                     else:

From 49c14f8c58f34dc2b61a00bf28b58802786b46ac Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Wed, 13 Jan 2016 14:30:57 -0500
Subject: [PATCH 42/45] Made exceptions more explicit

---
 nipype/interfaces/tests/test_io.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py
index 94270cda73..ddd6ab7b3a 100644
--- a/nipype/interfaces/tests/test_io.py
+++ b/nipype/interfaces/tests/test_io.py
@@ -23,7 +23,7 @@
 try:
     import boto
     from boto.s3.connection import S3Connection, OrdinaryCallingFormat
-except:
+except ImportError:
     noboto = True
 
 # Check for boto3
@@ -31,7 +31,7 @@
 try:
     import boto3
     from botocore.utils import fix_s3_host
-except:
+except ImportError:
     noboto3 = True
 
 # Check for fakes3
@@ -42,7 +42,7 @@
         fakes3 = True
     else:
         fakes3 = False
-except:
+except subprocess.CalledProcessError:
     fakes3 = False
 
 def test_datagrabber():

From a9dd168c1e791866bb79333dcf72190365a20e3d Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Thu, 14 Jan 2016 13:42:06 -0500
Subject: [PATCH 43/45] Removed S3DataSink and changed dummy file writing to be
 Python2/3 compatible

---
 nipype/interfaces/io.py                       | 182 +++++-------------
 .../interfaces/tests/test_auto_S3DataSink.py  |  44 -----
 nipype/interfaces/tests/test_io.py            | 129 ++++---------
 3 files changed, 87 insertions(+), 268 deletions(-)
 delete mode 100644 nipype/interfaces/tests/test_auto_S3DataSink.py

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index 1290b56b32..dc5decc779 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -205,7 +205,9 @@ class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
 
     # AWS S3 data attributes
     creds_path = traits.Str(desc='Filepath to AWS credentials file for S3 bucket '\
-                              'access')
+                                 'access; if not specified, the credentials will '\
+                                 'be taken from the AWS_ACCESS_KEY_ID and '\
+                                 'AWS_SECRET_ACCESS_KEY environment variables')
     encrypt_bucket_keys = traits.Bool(desc='Flag indicating whether to use S3 '\
                                         'server-side AES-256 encryption')
     # Set this if user wishes to override the bucket with their own
@@ -426,16 +428,15 @@ def _check_s3_base_dir(self):
         return s3_flag
 
     # Function to return AWS secure environment variables
-    def _return_aws_keys(self, creds_path):
+    def _return_aws_keys(self):
         '''
         Method to return AWS access key id and secret access key using
         credentials found in a local file.
 
         Parameters
         ----------
-        creds_path : string (filepath)
-            path to the csv file downloaded from AWS; can either be root
-            or user credentials
+        self : nipype.interfaces.io.DataSink
+            self for instance method
 
         Returns
         -------
@@ -445,28 +446,38 @@ def _return_aws_keys(self, creds_path):
             string of the AWS secret access key
         '''
 
+        # Import packages
+        import os
+
         # Init variables
-        with open(creds_path, 'r') as creds_in:
-            # Grab csv rows
-            row1 = creds_in.readline()
-            row2 = creds_in.readline()
-
-        # Are they root or user keys
-        if 'User Name' in row1:
-            # And split out for keys
-            aws_access_key_id = row2.split(',')[1]
-            aws_secret_access_key = row2.split(',')[2]
-        elif 'AWSAccessKeyId' in row1:
-            # And split out for keys
-            aws_access_key_id = row1.split('=')[1]
-            aws_secret_access_key = row2.split('=')[1]
-        else:
-            err_msg = 'Credentials file not recognized, check file is correct'
-            raise Exception(err_msg)
+        creds_path = self.inputs.creds_path
+
+        # Check if creds exist
+        if creds_path and os.path.exists(creds_path):
+            with open(creds_path, 'r') as creds_in:
+                # Grab csv rows
+                row1 = creds_in.readline()
+                row2 = creds_in.readline()
+
+            # Are they root or user keys
+            if 'User Name' in row1:
+                # And split out for keys
+                aws_access_key_id = row2.split(',')[1]
+                aws_secret_access_key = row2.split(',')[2]
+            elif 'AWSAccessKeyId' in row1:
+                # And split out for keys
+                aws_access_key_id = row1.split('=')[1]
+                aws_secret_access_key = row2.split('=')[1]
+            else:
+                err_msg = 'Credentials file not recognized, check file is correct'
+                raise Exception(err_msg)
 
-        # Strip any carriage return/line feeds
-        aws_access_key_id = aws_access_key_id.replace('\r', '').replace('\n', '')
-        aws_secret_access_key = aws_secret_access_key.replace('\r', '').replace('\n', '')
+            # Strip any carriage return/line feeds
+            aws_access_key_id = aws_access_key_id.replace('\r', '').replace('\n', '')
+            aws_secret_access_key = aws_secret_access_key.replace('\r', '').replace('\n', '')
+        else:
+            aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
+            aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
 
         # Return keys
         return aws_access_key_id, aws_secret_access_key
@@ -479,6 +490,8 @@ def _fetch_bucket(self, bucket_name):
 
         Parameters
         ----------
+        self : nipype.interfaces.io.DataSink
+            self for instance method
         bucket_name : string
             string corresponding to the name of the bucket on S3
 
@@ -504,19 +517,21 @@ def _fetch_bucket(self, bucket_name):
         creds_path = self.inputs.creds_path
         iflogger = logging.getLogger('interface')
 
+        # Get AWS credentials
+        try:
+            aws_access_key_id, aws_secret_access_key = \
+                self._return_aws_keys()
+        except Exception as exc:
+            err_msg = 'There was a problem extracting the AWS credentials '\
+                      'from the credentials file provided: %s. Error:\n%s'\
+                      % (creds_path, exc)
+            raise Exception(err_msg)
+
         # Try and get AWS credentials if a creds_path is specified
-        if creds_path:
-            try:
-                aws_access_key_id, aws_secret_access_key = \
-                    self._return_aws_keys(creds_path)
-            except Exception as exc:
-                err_msg = 'There was a problem extracting the AWS credentials '\
-                          'from the credentials file provided: %s. Error:\n%s'\
-                          % (creds_path, exc)
-                raise Exception(err_msg)
+        if aws_access_key_id and aws_secret_access_key:
             # Init connection
-            iflogger.info('Connecting to S3 bucket: %s with credentials from '\
-                          '%s ...' % (bucket_name, creds_path))
+            iflogger.info('Connecting to S3 bucket: %s with credentials...'\
+                          % bucket_name)
             # Use individual session for each instance of DataSink
             # Better when datasinks are being used in multi-threading, see:
             # http://boto3.readthedocs.org/en/latest/guide/resources.html#multithreading
@@ -762,101 +777,6 @@ def _list_outputs(self):
         return outputs
 
 
-class S3DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
-    testing = traits.Bool(False, usedefault=True,
-                          desc='Flag for using local fakes3 server.'
-                          ' (for testing purposes only)')
-    anon = traits.Bool(False, usedefault=True,
-                       desc='Use anonymous connection to s3')
-    bucket = traits.Str(mandatory=True,
-                        desc='Amazon S3 bucket where your data is stored')
-    bucket_path = traits.Str('', usedefault=True,
-                             desc='Location within your bucket to store '
-                             'data.')
-    base_directory = Directory(
-        desc='Path to the base directory for storing data.')
-    container = traits.Str(
-        desc='Folder within base directory in which to store output')
-    parameterization = traits.Bool(True, usedefault=True,
-                                   desc='store output in parametrized structure')
-    strip_dir = Directory(desc='path to strip out of filename')
-    substitutions = InputMultiPath(traits.Tuple(traits.Str, traits.Str),
-                                   desc=('List of 2-tuples reflecting string '
-                                         'to substitute and string to replace '
-                                         'it with'))
-    regexp_substitutions = InputMultiPath(traits.Tuple(traits.Str, traits.Str),
-                                          desc=('List of 2-tuples reflecting a pair '
-                                                'of a Python regexp pattern and a '
-                                                'replacement string. Invoked after '
-                                                'string `substitutions`'))
-
-    _outputs = traits.Dict(traits.Str, value={}, usedefault=True)
-    remove_dest_dir = traits.Bool(False, usedefault=True,
-                                  desc='remove dest directory when copying dirs')
-    # Set this if user wishes to have local copy of files as well
-    local_copy = traits.Str(desc='Copy files locally as well as to S3 bucket')
-
-    def __setattr__(self, key, value):
-        if key not in self.copyable_trait_names():
-            if not isdefined(value):
-                super(S3DataSinkInputSpec, self).__setattr__(key, value)
-            self._outputs[key] = value
-        else:
-            if key in self._outputs:
-                self._outputs[key] = value
-            super(S3DataSinkInputSpec, self).__setattr__(key, value)
-
-
-class S3DataSink(DataSink):
-    """ Works exactly like DataSink, except the specified files will
-        also be uploaded to Amazon S3 storage in the specified bucket
-        and location.  'bucket_path' is the s3 analog for
-        'base_directory'.
-
-    """
-    input_spec = S3DataSinkInputSpec
-
-    def _list_outputs(self):
-        """Execute this module.
-        """
-        outputs = super(S3DataSink, self)._list_outputs()
-
-        self.localtos3(outputs['out_file'])
-
-        return outputs
-
-    def localtos3(self, paths):
-        if self.inputs.testing:
-            conn = S3Connection(anon=True, is_secure=False, port=4567,
-                                host='localhost',
-                                calling_format=OrdinaryCallingFormat())
-
-        else:
-            conn = S3Connection(anon=self.inputs.anon)
-        bkt = conn.get_bucket(self.inputs.bucket)
-        s3paths = []
-
-        for path in paths:
-            # convert local path to s3 path
-            bd_index = path.find(self.inputs.base_directory)
-            if bd_index != -1:  # base_directory is in path, maintain directory structure
-                s3path = path[bd_index + len(self.inputs.base_directory):]  # cut out base directory
-                if s3path[0] == os.path.sep:
-                    s3path = s3path[1:]
-            else:  # base_directory isn't in path, simply place all files in bucket_path folder
-                s3path = os.path.split(path)[1]  # take filename from path
-            s3path = os.path.join(self.inputs.bucket_path, s3path)
-            if s3path[-1] == os.path.sep:
-                s3path = s3path[:-1]
-            s3paths.append(s3path)
-
-            k = boto.s3.key.Key(bkt)
-            k.key = s3path
-            k.set_contents_from_filename(path)
-
-        return s3paths
-
-
 class S3DataGrabberInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
     anon = traits.Bool(False, usedefault=True,
                        desc='Use anonymous connection to s3.  If this is set to True, boto may print' +
diff --git a/nipype/interfaces/tests/test_auto_S3DataSink.py b/nipype/interfaces/tests/test_auto_S3DataSink.py
deleted file mode 100644
index 9ef342defb..0000000000
--- a/nipype/interfaces/tests/test_auto_S3DataSink.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT
-from ...testing import assert_equal
-from ..io import S3DataSink
-
-
-def test_S3DataSink_inputs():
-    input_map = dict(_outputs=dict(usedefault=True,
-    ),
-    anon=dict(usedefault=True,
-    ),
-    base_directory=dict(),
-    bucket=dict(mandatory=True,
-    ),
-    bucket_path=dict(usedefault=True,
-    ),
-    container=dict(),
-    ignore_exception=dict(nohash=True,
-    usedefault=True,
-    ),
-    parameterization=dict(usedefault=True,
-    ),
-    regexp_substitutions=dict(),
-    remove_dest_dir=dict(usedefault=True,
-    ),
-    strip_dir=dict(),
-    substitutions=dict(),
-    testing=dict(usedefault=True,
-    ),
-    )
-    inputs = S3DataSink.input_spec()
-
-    for key, metadata in list(input_map.items()):
-        for metakey, value in list(metadata.items()):
-            yield assert_equal, getattr(inputs.traits()[key], metakey), value
-
-
-def test_S3DataSink_outputs():
-    output_map = dict(out_file=dict(),
-    )
-    outputs = S3DataSink.output_spec()
-
-    for key, metadata in list(output_map.items()):
-        for metakey, value in list(metadata.items()):
-            yield assert_equal, getattr(outputs.traits()[key], metakey), value
diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py
index ddd6ab7b3a..c1f4ec35f5 100644
--- a/nipype/interfaces/tests/test_io.py
+++ b/nipype/interfaces/tests/test_io.py
@@ -185,26 +185,30 @@ def test_datasink():
     yield assert_true, 'test' in ds.inputs.copyable_trait_names()
 
 
+# Make dummy input file
 def _make_dummy_input():
     '''
+    Function to create a dummy file
     '''
 
     # Import packages
     import tempfile
 
+
     # Init variables
     input_dir = tempfile.mkdtemp()
     input_path = os.path.join(input_dir, 'datasink_test_s3.txt')
 
     # Create input file
     with open(input_path, 'wb') as f:
-        f.write('ABCD1234')
+        f.write(b'ABCD1234')
 
     # Return path
     return input_path
 
-@skipif(noboto3 or not fakes3)
+
 # Test datasink writes to s3 properly
+@skipif(noboto3 or not fakes3)
 def test_datasink_to_s3():
     '''
     This function tests to see if the S3 functionality of a DataSink
@@ -264,6 +268,36 @@ def test_datasink_to_s3():
     # Make sure md5sums match
     yield assert_equal, src_md5, dst_md5
 
+
+# Test AWS creds read from env vars
+@skipif(noboto3 or not fakes3)
+def test_aws_keys_from_env():
+    '''
+    Function to ensure the DataSink can successfully read in AWS
+    credentials from the environment variables
+    '''
+
+    # Import packages
+    import os
+    import nipype.interfaces.io as nio
+
+    # Init variables
+    ds = nio.DataSink()
+    aws_access_key_id = 'ABCDACCESS'
+    aws_secret_access_key = 'DEFGSECRET'
+
+    # Set env vars
+    os.environ['AWS_ACCESS_KEY_ID'] = aws_access_key_id
+    os.environ['AWS_SECRET_ACCESS_KEY'] = aws_secret_access_key
+
+    # Call function to return creds
+    access_key_test, secret_key_test = ds._return_aws_keys()
+
+    # Assert match
+    yield assert_equal, aws_access_key_id, access_key_test
+    yield assert_equal, aws_secret_access_key, secret_key_test
+
+
 # Test the local copy attribute
 def test_datasink_localcopy():
     '''
@@ -308,19 +342,6 @@ def test_datasink_localcopy():
     yield assert_equal, src_md5, dst_md5
 
 
-@skipif(noboto)
-def test_s3datasink():
-    ds = nio.S3DataSink()
-    yield assert_true, ds.inputs.parameterization
-    yield assert_equal, ds.inputs.base_directory, Undefined
-    yield assert_equal, ds.inputs.strip_dir, Undefined
-    yield assert_equal, ds.inputs._outputs, {}
-    ds = nio.S3DataSink(base_directory='foo')
-    yield assert_equal, ds.inputs.base_directory, 'foo'
-    ds = nio.S3DataSink(infields=['test'])
-    yield assert_true, 'test' in ds.inputs.copyable_trait_names()
-
-
 def test_datasink_substitutions():
     indir = mkdtemp(prefix='-Tmp-nipype_ds_subs_in')
     outdir = mkdtemp(prefix='-Tmp-nipype_ds_subs_out')
@@ -349,84 +370,6 @@ def test_datasink_substitutions():
     shutil.rmtree(indir)
     shutil.rmtree(outdir)
 
-@skipif(noboto or not fakes3)
-
-def test_s3datasink_substitutions():
-    indir = mkdtemp(prefix='-Tmp-nipype_ds_subs_in')
-    outdir = mkdtemp(prefix='-Tmp-nipype_ds_subs_out')
-    files = []
-    for n in ['ababab.n', 'xabababyz.n']:
-        f = os.path.join(indir, n)
-        files.append(f)
-        open(f, 'w')
-
-    # run fakes3 server and set up bucket
-    fakes3dir = op.expanduser('~/fakes3')
-    try:
-        proc = Popen(
-            ['fakes3', '-r', fakes3dir, '-p', '4567'], stdout=open(os.devnull, 'wb'))
-    except OSError as ose:
-        if 'No such file or directory' in str(ose):
-            return  # fakes3 not installed. OK!
-        raise ose
-
-    conn = S3Connection(anon=True, is_secure=False, port=4567,
-                        host='localhost',
-                        calling_format=OrdinaryCallingFormat())
-    conn.create_bucket('test')
-
-    ds = nio.S3DataSink(
-        testing=True,
-        anon=True,
-        bucket='test',
-        bucket_path='output/',
-        parametrization=False,
-        base_directory=outdir,
-        substitutions=[('ababab', 'ABABAB')],
-        # end archoring ($) is used to assure operation on the filename
-        # instead of possible temporary directories names matches
-        # Patterns should be more comprehendable in the real-world usage
-        # cases since paths would be quite more sensible
-        regexp_substitutions=[(r'xABABAB(\w*)\.n$', r'a-\1-b.n'),
-                              ('(.*%s)[-a]([^%s]*)$' % ((os.path.sep,) * 2),
-                               r'\1!\2')])
-    setattr(ds.inputs, '@outdir', files)
-    ds.run()
-    yield assert_equal, \
-        sorted([os.path.basename(x) for
-                x in glob.glob(os.path.join(outdir, '*'))]), \
-        ['!-yz-b.n', 'ABABAB.n']  # so we got re used 2nd and both patterns
-
-    bkt = conn.get_bucket(ds.inputs.bucket)
-    bkt_files = list(k for k in bkt.list())
-
-    found = [False, False]
-    failed_deletes = 0
-    for k in bkt_files:
-        if '!-yz-b.n' in k.key:
-            found[0] = True
-            try:
-                bkt.delete_key(k)
-            except:
-                failed_deletes += 1
-        elif 'ABABAB.n' in k.key:
-            found[1] = True
-            try:
-                bkt.delete_key(k)
-            except:
-                failed_deletes += 1
-
-    # ensure delete requests were successful
-    yield assert_equal, failed_deletes, 0
-
-    # ensure both keys are found in bucket
-    yield assert_equal, found.count(True), 2
-
-    proc.kill()
-    shutil.rmtree(fakes3dir)
-    shutil.rmtree(indir)
-    shutil.rmtree(outdir)
-
 
 def _temp_analyze_files():
     """Generate temporary analyze file pair."""

From c2eedc7128f3e9553d75c2a84798a365b3fbec11 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Tue, 2 Feb 2016 16:17:58 -0500
Subject: [PATCH 44/45] Added aws.rst file documenting use of new S3
 capabilities in the DataSink class

---
 doc/users/aws.rst   | 102 ++++++++++++++++++++++++++++++++++++++++++++
 doc/users/index.rst |   1 +
 2 files changed, 103 insertions(+)
 create mode 100644 doc/users/aws.rst

diff --git a/doc/users/aws.rst b/doc/users/aws.rst
new file mode 100644
index 0000000000..832072ba62
--- /dev/null
+++ b/doc/users/aws.rst
@@ -0,0 +1,102 @@
+.. _aws:
+
+============================================
+Using Nipype with Amazon Web Services (AWS)
+============================================
+Several groups have been successfully using Nipype on AWS. This procedure
+involves setting a temporary cluster using StarCluster and potentially
+transferring files to/from S3. The latter is supported by Nipype through
+DataSink and S3DataGrabber.
+
+
+Using DataSink with S3
+======================
+The DataSink class now supports sending output data directly to an AWS S3
+bucket. It does this through the introduction of several input attributes to the
+DataSink interface and by parsing the `base_directory` attribute. This class
+uses the `boto3 <https://boto3.readthedocs.org/en/latest/>`_ and
+`botocore <https://botocore.readthedocs.org/en/latest/>`_ Python packages to
+interact with AWS. To configure the DataSink to write data to S3, the user must
+set the ``base_directory`` property to an S3-style filepath. For example:
+
+::
+
+	import nipype.interfaces.io as nio
+	ds = nio.DataSink()
+	ds.inputs.base_directory = 's3://mybucket/path/to/output/dir'
+
+With the "s3://" prefix in the path, the DataSink knows that the output
+directory to send files is on S3 in the bucket "mybucket". "path/to/output/dir"
+is the relative directory path within the bucket "mybucket" where output data
+will be uploaded to (NOTE: if the relative path specified contains folders that
+don’t exist in the bucket, the DataSink will create them). The DataSink treats
+the S3 base directory exactly as it would a local directory, maintaining support
+for containers, substitutions, subfolders, "." notation, etc to route output
+data appropriately.
+
+There are four new attributes introduced with S3-compatibility: ``creds_path``,
+``encrypt_bucket_keys``, ``local_copy``, and ``bucket``.
+
+::
+
+	ds.inputs.creds_path = '/home/user/aws_creds/credentials.csv'
+	ds.inputs.encrypt_bucket_keys = True
+	ds.local_copy = '/home/user/workflow_outputs/local_backup'
+
+``creds_path`` is a file path where the user's AWS credentials file (typically
+a csv) is stored. This credentials file should contain the AWS access key id and
+secret access key and should be formatted as one of the following (these formats
+are how Amazon provides the credentials file by default when first downloaded).
+
+Root-account user:
+
+::
+
+	AWSAccessKeyID=ABCDEFGHIJKLMNOP
+	AWSSecretKey=zyx123wvu456/ABC890+gHiJk
+
+IAM-user:
+
+::
+
+	User Name,Access Key Id,Secret Access Key
+	"username",ABCDEFGHIJKLMNOP,zyx123wvu456/ABC890+gHiJk
+
+The ``creds_path`` is necessary when writing files to a bucket that has
+restricted access (almost no buckets are publicly writable). If ``creds_path``
+is not specified, the DataSink will check the ``AWS_ACCESS_KEY_ID`` and
+``AWS_SECRET_ACCESS_KEY`` environment variables and use those values for bucket
+access.
+
+``encrypt_bucket_keys`` is a boolean flag that indicates whether to encrypt the
+output data on S3, using server-side AES-256 encryption. This is useful if the
+data being output is sensitive and one desires an extra layer of security on the
+data. By default, this is turned off.
+
+``local_copy`` is a string of the filepath where local copies of the output data
+are stored in addition to those sent to S3. This is useful if one wants to keep
+a backup version of the data stored on their local computer. By default, this is
+turned off.
+
+``bucket`` is a boto3 Bucket object that the user can use to overwrite the
+bucket specified in their ``base_directory``. This can be useful if one has to
+manually create a bucket instance on their own using special credentials (or
+using a mock server like `fakes3 <https://github.com/jubos/fake-s3>`_). This is
+typically used for developers unit-testing the DataSink class. Most users do not
+need to use this attribute for actual workflows. This is an optional argument.
+
+Finally, the user needs only to specify the input attributes for any incoming
+data to the node, and the outputs will be written to their S3 bucket.
+
+::
+
+	workflow.connect(inputnode, 'subject_id', ds, 'container')
+	workflow.connect(realigner, 'realigned_files', ds, 'motion')
+
+So, for example, outputs for sub001’s realigned_file1.nii.gz will be in:
+s3://mybucket/path/to/output/dir/sub001/motion/realigned_file1.nii.gz
+
+
+Using S3DataGrabber
+======================
+Coming soon...
\ No newline at end of file
diff --git a/doc/users/index.rst b/doc/users/index.rst
index 3a432135a6..13c1487ae0 100644
--- a/doc/users/index.rst
+++ b/doc/users/index.rst
@@ -38,6 +38,7 @@
    spmmcr
    mipav
    nipypecmd
+   aws
 
 
 

From c0d148aec7505e9c0df439c5b53d452ae478f352 Mon Sep 17 00:00:00 2001
From: dclark87 <danieljclark87@gmail.com>
Date: Wed, 3 Feb 2016 13:15:22 -0500
Subject: [PATCH 45/45] Removed bucket from being an attribute of the DataSink
 and just made it a local variable; pickle is not able to pickle the Bucket
 object. Functionally, the DataSink is the same

---
 nipype/interfaces/io.py | 73 +++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 40 deletions(-)

diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py
index dc5decc779..6f0ad3bc32 100644
--- a/nipype/interfaces/io.py
+++ b/nipype/interfaces/io.py
@@ -375,8 +375,7 @@ def _check_s3_base_dir(self):
         '''
         Method to see if the datasink's base directory specifies an
         S3 bucket path; if it does, it parses the path for the bucket
-        name in the form 's3://bucket_name/...' and adds a bucket
-        attribute to the data sink instance, i.e. self.bucket
+        name in the form 's3://bucket_name/...' and returns it
 
         Parameters
         ----------
@@ -386,15 +385,19 @@ def _check_s3_base_dir(self):
         s3_flag : boolean
             flag indicating whether the base_directory contained an
             S3 bucket path
+        bucket_name : string
+            name of the S3 bucket to connect to; if the base directory
+            is not a valid S3 path, defaults to '<N/A>'
         '''
 
         # Init variables
         s3_str = 's3://'
+        bucket_name = '<N/A>'
         base_directory = self.inputs.base_directory
 
         if not isdefined(base_directory):
             s3_flag = False
-            return s3_flag
+            return s3_flag, bucket_name
 
         # Explicitly lower-case the "s3"
         if base_directory.lower().startswith(s3_str):
@@ -404,28 +407,15 @@ def _check_s3_base_dir(self):
 
         # Check if 's3://' in base dir
         if base_directory.startswith(s3_str):
-            # Attempt to access bucket
-            try:
-                # Expects bucket name to be 's3://bucket_name/base_dir/..'
-                bucket_name = base_directory.split(s3_str)[1].split('/')[0]
-                # Get the actual bucket object
-                if self.inputs.bucket:
-                    self.bucket = self.inputs.bucket
-                else:
-                    self.bucket = self._fetch_bucket(bucket_name)
-            # Report error in case of exception
-            except Exception as exc:
-                err_msg = 'Unable to access S3 bucket. Error:\n%s. Exiting...'\
-                          % exc
-                raise Exception(err_msg)
-            # Bucket access was a success, set flag
+            # Expects bucket name to be 's3://bucket_name/base_dir/..'
+            bucket_name = base_directory.split(s3_str)[1].split('/')[0]
             s3_flag = True
         # Otherwise it's just a normal datasink
         else:
             s3_flag = False
 
         # Return s3_flag
-        return s3_flag
+        return s3_flag, bucket_name
 
     # Function to return AWS secure environment variables
     def _return_aws_keys(self):
@@ -576,7 +566,7 @@ def _fetch_bucket(self, bucket_name):
         return bucket
 
     # Send up to S3 method
-    def _upload_to_s3(self, src, dst):
+    def _upload_to_s3(self, bucket, src, dst):
         '''
         Method to upload outputs to S3 bucket instead of on local disk
         '''
@@ -589,7 +579,6 @@ def _upload_to_s3(self, src, dst):
         from botocore.exceptions import ClientError
 
         # Init variables
-        bucket = self.bucket
         iflogger = logging.getLogger('interface')
         s3_str = 's3://'
         s3_prefix = s3_str + bucket.name
@@ -668,30 +657,34 @@ def _list_outputs(self):
                 outdir = '.'
 
         # Check if base directory reflects S3 bucket upload
-        try:
-            s3_flag = self._check_s3_base_dir()
-            if s3_flag:
-                s3dir = self.inputs.base_directory
-                if isdefined(self.inputs.container):
-                    s3dir = os.path.join(s3dir, self.inputs.container)
+        s3_flag, bucket_name = self._check_s3_base_dir()
+        if s3_flag:
+            s3dir = self.inputs.base_directory
+            # If user overrides bucket object, use that
+            if self.inputs.bucket:
+                bucket = self.inputs.bucket
+            # Otherwise fetch bucket object using name
             else:
-                s3dir = '<N/A>'
-        # If encountering an exception during bucket access, set output
-        # base directory to a local folder
-        except Exception as exc:
+                try:
+                    bucket = self._fetch_bucket(bucket_name)
+                # If encountering an exception during bucket access, set output
+                # base directory to a local folder
+                except Exception as exc:
+                    s3dir = '<N/A>'
+                    if not isdefined(self.inputs.local_copy):
+                        local_out_exception = os.path.join(os.path.expanduser('~'),
+                                                           's3_datasink_' + bucket_name)
+                        outdir = local_out_exception
+                    # Log local copying directory
+                    iflogger.info('Access to S3 failed! Storing outputs locally at: '\
+                                  '%s\nError: %s' %(outdir, exc))
+        else:
             s3dir = '<N/A>'
-            s3_flag = False
-            if not isdefined(self.inputs.local_copy):
-                local_out_exception = os.path.join(os.path.expanduser('~'),
-                                                   's3_datasink_' + self.bucket.name)
-                outdir = local_out_exception
-            # Log local copying directory
-            iflogger.info('Access to S3 failed! Storing outputs locally at: '\
-                          '%s\nError: %s' %(outdir, exc))
 
         # If container input is given, append that to outdir
         if isdefined(self.inputs.container):
             outdir = os.path.join(outdir, self.inputs.container)
+            s3dir = os.path.join(s3dir, self.inputs.container)
 
         # If sinking to local folder
         if outdir != s3dir:
@@ -743,7 +736,7 @@ def _list_outputs(self):
 
                 # If we're uploading to S3
                 if s3_flag:
-                    self._upload_to_s3(src, s3dst)
+                    self._upload_to_s3(bucket, src, s3dst)
                     out_files.append(s3dst)
                 # Otherwise, copy locally src -> dst
                 if not s3_flag or isdefined(self.inputs.local_copy):