From b44ef0d600b50ca5ab4c38ab725e05bea452fb96 Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Sun, 17 Jul 2016 14:03:48 -0500
Subject: [PATCH 01/10] first pass at working event transformer interface

---
 nipype/algorithms/events.py | 349 ++++++++++++++++++++++++++++++++++++
 1 file changed, 349 insertions(+)
 create mode 100644 nipype/algorithms/events.py

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
new file mode 100644
index 0000000000..83941485bd
--- /dev/null
+++ b/nipype/algorithms/events.py
@@ -0,0 +1,349 @@
+from __future__ import division
+from builtins import range
+
+from copy import deepcopy
+import os
+
+from nibabel import load
+import numpy as np
+from nipype.external.six import string_types
+from nipype.interfaces.base import (BaseInterface, TraitedSpec, InputMultiPath,
+                               traits, File, Bunch, BaseInterfaceInputSpec,
+                               isdefined, OutputMultiPath)
+from nipype import config, logging
+import re
+from glob import glob
+from os.path import basename
+iflogger = logging.getLogger('interface')
+
+# Eventually, move the following inside the interface, or wrap in an import check
+import pandas as pd
+import sklearn.preprocessing as preproc
+from sklearn.linear_model import LinearRegression
+
+
+def alias(target, append=False):
+    def decorator(func):
+        def wrapper(self, cols, groupby=None, names=None, *args, **kwargs):
+
+            cols = self._select_cols(cols)
+
+            # groupby can be either a single column, in which case it's
+            # interpreted as a categorical variable to groupby directly, or a
+            # list of column names, in which case it's interpreted as a set of
+            # dummy columns to reconstruct a categorical from.
+            if groupby is not None:
+                groupby = self._select_cols(groupby)
+                if len(groupby) > 1:
+                    group_results = []
+                    names = ['%s_%s' % (cn, gn)for cn in cols for gn in groupby]
+                    for i, col in enumerate(groupby):
+                        _result = np.zeros((len(self.data), len(cols)))
+                        inds = self.data[col].nonzero()
+                        _result[inds] = target(self.data[cols].iloc[inds], *args, **kwargs)
+                        group_results.extend(_result.T)
+                    result = np.c_[group_results].squeeze().T
+                    result = pd.DataFrame(result, columns=names)
+                else:
+                    result = self.data.groupby(groupby).apply(target, *args, **kwargs)
+            else:
+                result = target(self.data[cols], *args, **kwargs)
+
+            if append:
+                names = result.columns
+
+            if names is not None:
+                cols = names
+
+            self.data[cols] = result
+        return wrapper
+    return decorator
+
+
+class EventTransformer(object):
+
+    def __init__(self, events, orig_hz=1, target_hz=1000):
+        self.events = events
+        self.orig_hz = orig_hz
+        self.target_hz = target_hz
+        self._to_dense()
+
+    def _select_cols(self, cols):
+        if isinstance(cols, string_types) and '*' in cols:
+            # if '*' in cols:
+            patt = re.compile(cols.replace('*', '.*'))
+            cols = [l for l in self.data.columns.tolist() for m in [patt.search(l)] if m]
+        return cols
+
+    @alias(preproc.scale)
+    def standardize(): pass
+
+    @alias(np.log)
+    def log(): pass
+
+    @alias(preproc.binarize)
+    def binarize(): pass
+
+    @alias(np.logical_or)
+    def or_(): pass
+
+    @alias(np.logical_and)
+    def and_(): pass
+
+    @alias(np.logical_not)
+    def not_(): pass
+
+    @alias(pd.get_dummies, append=True)
+    def factor(): pass
+
+    def orthogonalize(self, y_cols, x_cols):
+        y_cols, x_cols = self._select_cols(y_cols), listify(x_cols)
+        x = self.data[x_cols].values
+        y = self.data[y_cols].values
+        est = LinearRegression()
+        est.fit(x, y)
+        y -= est.predict(x)
+        if np.linalg.norm(y, 1) > np.exp(-32):
+            self.data[y_cols] = y
+
+    def formula(self, f, target=None, replace=False, *args, **kwargs):
+        from patsy import dmatrix
+        result = dmatrix(f, self.data, return_type='dataframe', *args, **kwargs)
+        if target is not None:
+            self.data[target] = result
+        elif replace:
+            self.data[result.columns] = result
+        else:
+            raise ValueError("Either a target column must be passed or replace"
+                             " must be True.")
+
+    def multiply(self, y_cols, x_cols):
+        x_cols = self._select_cols(x_cols)
+        result = self.data[x_cols].apply(lambda x: np.multiply(x, self.data[y_cols]))
+        names = ['%s_%s' % (y_cols, x) for x in x_cols]
+        self.data[names] = result
+
+    def query(self, q, *args, **kwargs):
+        self.data = self.data.query(filter)
+
+    def apply(self, func, *args, **kwargs):
+        self.data = func(self.data, *args, **kwargs)
+
+    def _to_dense(self):
+
+        end = int((self.events['onset'] + self.events['duration']).max())
+
+        targ_hz, orig_hz = self.target_hz, self.orig_hz
+        len_ts = end * targ_hz
+        conditions = self.events['condition'].unique().tolist()
+        n_conditions = len(conditions)
+        ts = np.zeros((len_ts, n_conditions))
+
+        _events = self.events.copy().reset_index()
+        _events[['onset', 'duration']] = _events[
+            ['onset', 'duration']] * targ_hz / orig_hz
+
+        cond_index = [conditions.index(c) for c in _events['condition']]
+        ev_end = np.round(_events['onset'] + _events['duration']).astype(int)
+        onsets = np.round(_events['onset']).astype(int)
+
+        for i, row in _events.iterrows():
+            ts[onsets[i]:ev_end[i], cond_index[i]] = row['amplitude']
+
+        self.data = pd.DataFrame(ts, columns=conditions)
+        onsets = np.arange(len(ts)) / self.target_hz
+        self.data.insert(0, 'onset', onsets)
+
+    def resample(self, sampling_rate):
+        sampling_rate = np.round(sampling_rate * 1000)
+        self.data.index = pd.to_datetime(self.data['onset'], unit='s')
+        self.data = self.data.resample('%dL' % sampling_rate).mean()
+        self.data['onset'] = self.data.index.astype(np.int64) / int(1e9)
+        self.data = self.data.reset_index(drop=True)
+
+
+class EventReader(object):
+
+    def __init__(self, columns=None, header='infer', sep=None,
+                 default_duration=0., default_amplitude=1.,
+                 condition_pattern=None, subject_pattern=None,
+                 run_pattern=None):
+        '''
+        Args:
+            columns (list): Optional list of column names to use. If passed,
+                number of elements must match number of columns in the text
+                files to be read. If omitted, column names are inferred by
+                pandas (depending on value of header).
+            header (str): passed to pandas; see pd.read_table docs for details.
+            sep (str): column separator; see pd.read_table docs for details.
+            default_duration (float): Optional default duration to set for all
+                events. Will be ignored if a column named 'duration' is found.
+            default_amplitude (float): Optional default amplitude to set for
+                all events. Will be ignored if an amplitude column is found.
+            condition_pattern (str): regex with which to capture condition
+                names from input text file filenames. Only the first captured
+                group will be used.
+            subject_pattern (str): regex with which to capture subject
+                names from input text file filenames. Only the first captured
+                group will be used.
+            run_pattern (str): regex with which to capture run names from input
+                text file filenames. Only the first captured group will be used.
+        '''
+
+        self.columns = columns
+        self.header = header
+        self.sep = sep
+        self.default_duration = default_duration
+        self.default_amplitude = default_amplitude
+        self.condition_pattern = condition_pattern
+        self.subject_pattern = subject_pattern
+        self.run_pattern = run_pattern
+
+    def read(self, path, condition=None, subject=None, run=None, rename=None):
+
+        dfs = []
+
+        if isinstance(path, string_types):
+            path = glob(path)
+
+        for f in path:
+            _data = pd.read_table(f, names=self.columns, header=self.header,
+                                  sep=self.sep)
+
+            if rename is not None:
+                _data = _data.rename(rename)
+
+            # Validate and set CODA columns
+            cols = _data.columns
+
+            if 'onset' not in cols:
+                raise ValueError(
+                    "DataFrame is missing mandatory 'onset' column.")
+
+            if 'duration' not in cols:
+                if self.default_duration is None:
+                    raise ValueError(
+                        'Event file "%s" is missing \'duration\''
+                        ' column, and no default_duration was provided.' % f)
+                else:
+                    _data['duration'] = self.default_duration
+
+            if 'amplitude' not in cols:
+                _data['amplitude'] = self.default_amplitude
+
+            if condition is not None:
+                _data['condition'] = condition_name
+            elif 'condition' not in cols:
+                cp = self.condition_pattern
+                if cp is None:
+                    cp = '(.*)\.[a-zA-Z0-9]{3,4}'
+                m = re.search(cp, basename(f))
+                if m is None:
+                    raise ValueError(
+                        "No condition column found in event file, no "
+                        "condition_name argument passed, and attempt to "
+                        "automatically extract condition from filename failed."
+                        " Please make sure a condition is specified.")
+                _data['condition'] = m.group(1)
+
+            if subject is not None:
+                _data['subject'] = subject
+            elif self.subject_pattern is not None:
+                m = re.search(self.subject_pattern, f)
+                if m is None:
+                    raise ValueError(
+                        "Subject pattern '%s' failed to match any part of "
+                        "filename '%s'." % (self.subject_pattern, f))
+                _data['subject'] = m.group(1)
+
+            if run is not None:
+                _data['run'] = run
+            elif self.run_pattern is not None:
+                m = re.search(self.run_pattern, f)
+                if m is None:
+                    raise ValueError(
+                        "Run pattern '%s' failed to match any part of "
+                        "filename '%s'." % (self.run_pattern, f))
+                _data['run'] = m.group(1)
+
+            dfs.append(_data)
+
+        return pd.concat(dfs, axis=0)
+
+
+class TransformEventsInputSpec(BaseInterfaceInputSpec):
+    subject_info = InputMultiPath(Bunch, mandatory=True, xor=['subject_info',
+                                                              'event_files'],
+                                  desc=("Bunch or List(Bunch) subject specific condition information. "
+                                        "see :ref:`SpecifyModel` or SpecifyModel.__doc__ for details"))
+    event_files = InputMultiPath(traits.List(File(exists=True)), mandatory=True,
+                                 xor=['subject_info', 'event_files'],
+                                 desc=('list of event description files in 1, 2, 3, or 4 column format '
+                                       'corresponding to onsets, durations, amplitudes, and names'))
+    input_units = traits.Enum('secs', 'scans', mandatory=True,
+                              desc=("Units of event onsets and durations (secs or scans). Output "
+                                    "units are always in secs"))
+    time_repetition = traits.Float(mandatory=True,
+                                   desc=("Time between the start of one volume to the start of "
+                                         "the next image volume."))
+    # transformations = InputMultiPath(traits.List(File(exists=True)), mandatory=True,
+    #                                  desc=("JSON specification of the transformations to perform."))
+
+
+class TransformEventsOutputSpec(TraitedSpec):
+    subject_info = OutputMultiPath(Bunch, mandatory=True,
+                                  desc=("Bunch or List(Bunch) subject specific condition information. "
+                                        "see :ref:`SpecifyModel` or SpecifyModel.__doc__ for details"))
+
+
+class TransformEvents(BaseInterface):
+
+    input_spec = TransformEventsInputSpec
+    output_spec = TransformEventsOutputSpec
+
+    def _get_event_data(self):
+        if isdefined(self.inputs.subject_info):
+            info = self.inputs.subject_info
+            return pd.from_records(info)
+        else:
+            info = self.inputs.event_files
+            reader = EventReader(columns=['onset', 'duration', 'amplitude'])
+            return reader.read(info[0])
+
+    def _transform_events(self):
+        events = self._get_event_data()
+        self.transformer = EventTransformer(events)
+        print(self.transformer.data.shape)
+        # Transformation application logic goes here later
+        # ...
+        self.transformer.resample(self.inputs.time_repetition)
+        print(self.transformer.data.shape)
+
+    def _run_interface(self, runtime):
+        self._transform_events()
+        return runtime
+
+    def _df_to_bunch(self):
+        
+        if not hasattr(self, 'transformer'):
+            self._transform_events()
+
+        _data = self.transformer.data
+
+        info = Bunch(conditions=[], onsets=[], durations=[], amplitudes=[])
+        cols = [c for c in _data.columns if c not in {'onset'}]
+        onsets = _data['onset'].values.tolist()
+        info.conditions = cols
+
+        for col in _data.columns:
+            info.onsets.append(onsets)
+            info.durations.append(self.inputs.time_repetition)
+            info.amplitudes.append(_data[col].values.tolist())
+
+        return info
+
+    def _list_outputs(self):
+        outputs = self._outputs().get()
+        outputs['subject_info'] = self._df_to_bunch()
+        return outputs
+

From 772d280237a3ca1e6a2d9b39b9a5cbf13dbfa81c Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Sun, 17 Jul 2016 19:06:15 -0500
Subject: [PATCH 02/10] adds Transformation class to represent one or more
 chained operations on variables

---
 nipype/algorithms/events.py | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
index 83941485bd..74f68cda12 100644
--- a/nipype/algorithms/events.py
+++ b/nipype/algorithms/events.py
@@ -130,7 +130,9 @@ def apply(self, func, *args, **kwargs):
         self.data = func(self.data, *args, **kwargs)
 
     def _to_dense(self):
-
+        """ Convert the sparse [onset, duration, amplitude] representation
+        typical of event files to a dense matrix where each row represents
+        a fixed unit of time. """
         end = int((self.events['onset'] + self.events['duration']).max())
 
         targ_hz, orig_hz = self.target_hz, self.orig_hz
@@ -155,6 +157,11 @@ def _to_dense(self):
         self.data.insert(0, 'onset', onsets)
 
     def resample(self, sampling_rate):
+        """
+        Resample the design matrix to the specified sampling rate. Primarily
+        useful for downsampling to match the TR, so as to export the design as
+        a n(TR) x n(conds) matrix.
+        """
         sampling_rate = np.round(sampling_rate * 1000)
         self.data.index = pd.to_datetime(self.data['onset'], unit='s')
         self.data = self.data.resample('%dL' % sampling_rate).mean()
@@ -296,6 +303,25 @@ class TransformEventsOutputSpec(TraitedSpec):
                                         "see :ref:`SpecifyModel` or SpecifyModel.__doc__ for details"))
 
 
+class Transformation(object):
+
+    def __init__(self, transformer, inputs, steps, outputs=None):
+
+        self.transformer = transformer
+        self.inputs = inputs
+        self.steps = steps
+        self.outputs = outputs
+        self._validate()
+
+    def _validate(self):
+        missing = set(transform['input'] - set(self.transformer.data.columns)):
+        if missing:
+            raise ValueError("Invalid column(s): %s" % missing)
+
+    def run(self):
+        pass
+
+
 class TransformEvents(BaseInterface):
 
     input_spec = TransformEventsInputSpec
@@ -313,11 +339,10 @@ def _get_event_data(self):
     def _transform_events(self):
         events = self._get_event_data()
         self.transformer = EventTransformer(events)
-        print(self.transformer.data.shape)
+
         # Transformation application logic goes here later
         # ...
         self.transformer.resample(self.inputs.time_repetition)
-        print(self.transformer.data.shape)
 
     def _run_interface(self, runtime):
         self._transform_events()

From e68ad8c18c804296ec9c7099f1d31017ddc49ea9 Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Sun, 17 Jul 2016 19:30:37 -0500
Subject: [PATCH 03/10] drop sklearn dependency in orthogonalization

---
 nipype/algorithms/events.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
index 74f68cda12..50bb3b82a2 100644
--- a/nipype/algorithms/events.py
+++ b/nipype/algorithms/events.py
@@ -19,7 +19,6 @@
 # Eventually, move the following inside the interface, or wrap in an import check
 import pandas as pd
 import sklearn.preprocessing as preproc
-from sklearn.linear_model import LinearRegression
 
 
 def alias(target, append=False):
@@ -96,15 +95,16 @@ def not_(): pass
     @alias(pd.get_dummies, append=True)
     def factor(): pass
 
-    def orthogonalize(self, y_cols, x_cols):
-        y_cols, x_cols = self._select_cols(y_cols), listify(x_cols)
-        x = self.data[x_cols].values
+    def orthogonalize(self, y_cols, X_cols):
+        ''' Orthogonalize each of the variables in y_cols with respect to all
+        of the variables in x_cols.
+        '''
+        y_cols, X_cols = self._select_cols(y_cols), self._select_cols(X_cols)
+        X = self.data[X_cols].values
         y = self.data[y_cols].values
-        est = LinearRegression()
-        est.fit(x, y)
-        y -= est.predict(x)
-        if np.linalg.norm(y, 1) > np.exp(-32):
-            self.data[y_cols] = y
+        _aX = np.c_[np.ones(len(y)), X]
+        coefs, resids, rank, s = np.linalg.lstsq(_aX, y)
+        self.data[y_cols] = y - X.dot(coefs[1:])
 
     def formula(self, f, target=None, replace=False, *args, **kwargs):
         from patsy import dmatrix
@@ -314,7 +314,7 @@ def __init__(self, transformer, inputs, steps, outputs=None):
         self._validate()
 
     def _validate(self):
-        missing = set(transform['input'] - set(self.transformer.data.columns)):
+        missing = set(transform['input'] - set(self.transformer.data.columns))
         if missing:
             raise ValueError("Invalid column(s): %s" % missing)
 

From 78f7b707fe09379e2cec242a997a47f226e16fb7 Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Sun, 17 Jul 2016 20:08:23 -0500
Subject: [PATCH 04/10] replace sklearn preproc dependencies

---
 nipype/algorithms/events.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
index 50bb3b82a2..3502602436 100644
--- a/nipype/algorithms/events.py
+++ b/nipype/algorithms/events.py
@@ -18,7 +18,6 @@
 
 # Eventually, move the following inside the interface, or wrap in an import check
 import pandas as pd
-import sklearn.preprocessing as preproc
 
 
 def alias(target, append=False):
@@ -74,15 +73,9 @@ def _select_cols(self, cols):
             cols = [l for l in self.data.columns.tolist() for m in [patt.search(l)] if m]
         return cols
 
-    @alias(preproc.scale)
-    def standardize(): pass
-
     @alias(np.log)
     def log(): pass
 
-    @alias(preproc.binarize)
-    def binarize(): pass
-
     @alias(np.logical_or)
     def or_(): pass
 
@@ -95,6 +88,19 @@ def not_(): pass
     @alias(pd.get_dummies, append=True)
     def factor(): pass
 
+    def standardize(self, cols, demean=True, rescale=True, copy=True):
+        cols = self._select_cols(cols)
+        X = self.data[cols]
+        self.data[cols] = (X - X.mean()) / np.std(X, 0)
+
+    def binarize(self, cols, threshold=0.0):
+        cols = self._select_cols(cols)
+        X = self.data[cols].values
+        above = X > threshold
+        X[above] = 1
+        X[~above] = 0
+        self.data[cols] = X
+
     def orthogonalize(self, y_cols, X_cols):
         ''' Orthogonalize each of the variables in y_cols with respect to all
         of the variables in x_cols.

From 8b9cd569c5e4bfd1b7ec072939b7eca8365ef56a Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Mon, 1 Aug 2016 09:03:31 -0700
Subject: [PATCH 05/10] renamed interface; minor tweaks

---
 nipype/algorithms/events.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
index 3502602436..13f1c8af64 100644
--- a/nipype/algorithms/events.py
+++ b/nipype/algorithms/events.py
@@ -17,7 +17,10 @@
 iflogger = logging.getLogger('interface')
 
 # Eventually, move the following inside the interface, or wrap in an import check
-import pandas as pd
+try:
+    import pandas as pd
+except ImportError:
+    raise ImportError("The events module requires pandas.")
 
 
 def alias(target, append=False):
@@ -88,12 +91,15 @@ def not_(): pass
     @alias(pd.get_dummies, append=True)
     def factor(): pass
 
-    def standardize(self, cols, demean=True, rescale=True, copy=True):
+    def _standardize(self, cols, demean=True, rescale=True, copy=True):
         cols = self._select_cols(cols)
         X = self.data[cols]
         self.data[cols] = (X - X.mean()) / np.std(X, 0)
 
-    def binarize(self, cols, threshold=0.0):
+    @alias(_standardize)
+    def standardize(): pass
+
+    def _binarize(self, cols, threshold=0.0):
         cols = self._select_cols(cols)
         X = self.data[cols].values
         above = X > threshold
@@ -101,6 +107,9 @@ def binarize(self, cols, threshold=0.0):
         X[~above] = 0
         self.data[cols] = X
 
+    @alias(_binarize)
+    def binarize(): pass
+
     def orthogonalize(self, y_cols, X_cols):
         ''' Orthogonalize each of the variables in y_cols with respect to all
         of the variables in x_cols.
@@ -148,8 +157,7 @@ def _to_dense(self):
         ts = np.zeros((len_ts, n_conditions))
 
         _events = self.events.copy().reset_index()
-        _events[['onset', 'duration']] = _events[
-            ['onset', 'duration']] * targ_hz / orig_hz
+        _events[['onset', 'duration']] = _events[['onset', 'duration']] * targ_hz / orig_hz
 
         cond_index = [conditions.index(c) for c in _events['condition']]
         ev_end = np.round(_events['onset'] + _events['duration']).astype(int)
@@ -284,7 +292,7 @@ def read(self, path, condition=None, subject=None, run=None, rename=None):
         return pd.concat(dfs, axis=0)
 
 
-class TransformEventsInputSpec(BaseInterfaceInputSpec):
+class SpecifyEventsInputSpec(BaseInterfaceInputSpec):
     subject_info = InputMultiPath(Bunch, mandatory=True, xor=['subject_info',
                                                               'event_files'],
                                   desc=("Bunch or List(Bunch) subject specific condition information. "
@@ -303,7 +311,7 @@ class TransformEventsInputSpec(BaseInterfaceInputSpec):
     #                                  desc=("JSON specification of the transformations to perform."))
 
 
-class TransformEventsOutputSpec(TraitedSpec):
+class SpecifyEventsOutputSpec(TraitedSpec):
     subject_info = OutputMultiPath(Bunch, mandatory=True,
                                   desc=("Bunch or List(Bunch) subject specific condition information. "
                                         "see :ref:`SpecifyModel` or SpecifyModel.__doc__ for details"))
@@ -328,10 +336,10 @@ def run(self):
         pass
 
 
-class TransformEvents(BaseInterface):
+class SpecifyEvents(BaseInterface):
 
-    input_spec = TransformEventsInputSpec
-    output_spec = TransformEventsOutputSpec
+    input_spec = SpecifyEventsInputSpec
+    output_spec = SpecifyEventsOutputSpec
 
     def _get_event_data(self):
         if isdefined(self.inputs.subject_info):

From 095154afa296d98db3e155311a4e0551e75671c1 Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Mon, 1 Aug 2016 14:24:50 -0700
Subject: [PATCH 06/10] working data transformations

---
 nipype/algorithms/events.py | 91 ++++++++++++++++++++++---------------
 1 file changed, 55 insertions(+), 36 deletions(-)

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
index 13f1c8af64..98394ac29b 100644
--- a/nipype/algorithms/events.py
+++ b/nipype/algorithms/events.py
@@ -14,6 +14,7 @@
 import re
 from glob import glob
 from os.path import basename
+import json
 iflogger = logging.getLogger('interface')
 
 # Eventually, move the following inside the interface, or wrap in an import check
@@ -25,38 +26,38 @@
 
 def alias(target, append=False):
     def decorator(func):
-        def wrapper(self, cols, groupby=None, names=None, *args, **kwargs):
+        def wrapper(self, cols, groupby=None, output=None, *args, **kwargs):
 
             cols = self._select_cols(cols)
 
             # groupby can be either a single column, in which case it's
             # interpreted as a categorical variable to groupby directly, or a
-            # list of column names, in which case it's interpreted as a set of
+            # list of column output, in which case it's interpreted as a set of
             # dummy columns to reconstruct a categorical from.
             if groupby is not None:
                 groupby = self._select_cols(groupby)
                 if len(groupby) > 1:
                     group_results = []
-                    names = ['%s_%s' % (cn, gn)for cn in cols for gn in groupby]
+                    output = ['%s_%s' % (cn, gn)for cn in cols for gn in groupby]
                     for i, col in enumerate(groupby):
                         _result = np.zeros((len(self.data), len(cols)))
-                        inds = self.data[col].nonzero()
-                        _result[inds] = target(self.data[cols].iloc[inds], *args, **kwargs)
+                        _result[inds] = target(self, cols, *args, **kwargs)
                         group_results.extend(_result.T)
                     result = np.c_[group_results].squeeze().T
-                    result = pd.DataFrame(result, columns=names)
+                    result = pd.DataFrame(result, columns=output)
                 else:
-                    result = self.data.groupby(groupby).apply(target, *args, **kwargs)
+                    result = self.data.groupby(groupby).apply(target, self, cols, *args, **kwargs)
             else:
-                result = target(self.data[cols], *args, **kwargs)
+                result = target(self, cols, *args, **kwargs)
 
             if append:
-                names = result.columns
+                output = result.columns
 
-            if names is not None:
-                cols = names
-
-            self.data[cols] = result
+            if output is not None:
+                result = pd.DataFrame(result, columns=output)
+                self.data = self.data.join(result)
+            else:
+                self.data[cols] = result
         return wrapper
     return decorator
 
@@ -94,7 +95,13 @@ def factor(): pass
     def _standardize(self, cols, demean=True, rescale=True, copy=True):
         cols = self._select_cols(cols)
         X = self.data[cols]
-        self.data[cols] = (X - X.mean()) / np.std(X, 0)
+        return (X - X.mean()) / np.std(X, 0)
+
+    def select(self, cols):
+        # Always retain onsets
+        if 'onset' not in cols:
+            cols.insert(0, 'onset')
+        self.data = self.data[self._select_cols(cols)]
 
     @alias(_standardize)
     def standardize(): pass
@@ -105,21 +112,24 @@ def _binarize(self, cols, threshold=0.0):
         above = X > threshold
         X[above] = 1
         X[~above] = 0
-        self.data[cols] = X
+        return X
 
     @alias(_binarize)
     def binarize(): pass
 
-    def orthogonalize(self, y_cols, X_cols):
-        ''' Orthogonalize each of the variables in y_cols with respect to all
+    def _orthogonalize(self, cols, X_cols):
+        ''' Orthogonalize each of the variables in cols with respect to all
         of the variables in x_cols.
         '''
-        y_cols, X_cols = self._select_cols(y_cols), self._select_cols(X_cols)
+        cols, X_cols = self._select_cols(cols), self._select_cols(X_cols)
         X = self.data[X_cols].values
-        y = self.data[y_cols].values
+        y = self.data[cols].values
         _aX = np.c_[np.ones(len(y)), X]
         coefs, resids, rank, s = np.linalg.lstsq(_aX, y)
-        self.data[y_cols] = y - X.dot(coefs[1:])
+        return  y - X.dot(coefs[1:])
+
+    @alias(_orthogonalize)
+    def orthogonalize(): pass
 
     def formula(self, f, target=None, replace=False, *args, **kwargs):
         from patsy import dmatrix
@@ -132,17 +142,19 @@ def formula(self, f, target=None, replace=False, *args, **kwargs):
             raise ValueError("Either a target column must be passed or replace"
                              " must be True.")
 
-    def multiply(self, y_cols, x_cols):
+    def multiply(self, cols, x_cols):
         x_cols = self._select_cols(x_cols)
-        result = self.data[x_cols].apply(lambda x: np.multiply(x, self.data[y_cols]))
-        names = ['%s_%s' % (y_cols, x) for x in x_cols]
-        self.data[names] = result
+        result = self.data[x_cols].apply(lambda x: np.multiply(x, self.data[cols]))
+        output = ['%s_%s' % (cols, x) for x in x_cols]
+        self.data[output] = result
 
     def query(self, q, *args, **kwargs):
         self.data = self.data.query(filter)
 
     def apply(self, func, *args, **kwargs):
-        self.data = func(self.data, *args, **kwargs)
+        if isinstance(func, string_types):
+            func = getattr(self, func)
+        func(*args, **kwargs)
 
     def _to_dense(self):
         """ Convert the sparse [onset, duration, amplitude] representation
@@ -191,9 +203,9 @@ def __init__(self, columns=None, header='infer', sep=None,
                  run_pattern=None):
         '''
         Args:
-            columns (list): Optional list of column names to use. If passed,
+            columns (list): Optional list of column output to use. If passed,
                 number of elements must match number of columns in the text
-                files to be read. If omitted, column names are inferred by
+                files to be read. If omitted, column output are inferred by
                 pandas (depending on value of header).
             header (str): passed to pandas; see pd.read_table docs for details.
             sep (str): column separator; see pd.read_table docs for details.
@@ -202,13 +214,13 @@ def __init__(self, columns=None, header='infer', sep=None,
             default_amplitude (float): Optional default amplitude to set for
                 all events. Will be ignored if an amplitude column is found.
             condition_pattern (str): regex with which to capture condition
-                names from input text file filenames. Only the first captured
+                output from input text file fileoutput. Only the first captured
                 group will be used.
             subject_pattern (str): regex with which to capture subject
-                names from input text file filenames. Only the first captured
+                output from input text file fileoutput. Only the first captured
                 group will be used.
-            run_pattern (str): regex with which to capture run names from input
-                text file filenames. Only the first captured group will be used.
+            run_pattern (str): regex with which to capture run output from input
+                text file fileoutput. Only the first captured group will be used.
         '''
 
         self.columns = columns
@@ -300,15 +312,15 @@ class SpecifyEventsInputSpec(BaseInterfaceInputSpec):
     event_files = InputMultiPath(traits.List(File(exists=True)), mandatory=True,
                                  xor=['subject_info', 'event_files'],
                                  desc=('list of event description files in 1, 2, 3, or 4 column format '
-                                       'corresponding to onsets, durations, amplitudes, and names'))
+                                       'corresponding to onsets, durations, amplitudes, and output'))
     input_units = traits.Enum('secs', 'scans', mandatory=True,
                               desc=("Units of event onsets and durations (secs or scans). Output "
                                     "units are always in secs"))
     time_repetition = traits.Float(mandatory=True,
                                    desc=("Time between the start of one volume to the start of "
                                          "the next image volume."))
-    # transformations = InputMultiPath(traits.List(File(exists=True)), mandatory=True,
-    #                                  desc=("JSON specification of the transformations to perform."))
+    transformations = traits.File(exists=True, mandatory=False,
+                                     desc=("JSON specification of the transformations to perform."))
 
 
 class SpecifyEventsOutputSpec(TraitedSpec):
@@ -353,9 +365,16 @@ def _get_event_data(self):
     def _transform_events(self):
         events = self._get_event_data()
         self.transformer = EventTransformer(events)
+        if isdefined(self.inputs.transformations):
+            tf = json.load(open(self.inputs.transformations))
+            for t in tf['steps']:
+                name = t.pop('name')
+                cols = t.pop('input', None)
+                # args = t.get('args', {})
+                self.transformer.apply(name, cols, **t)
+            if 'select' in tf:
+                self.transformer.select(tf['select'])
 
-        # Transformation application logic goes here later
-        # ...
         self.transformer.resample(self.inputs.time_repetition)
 
     def _run_interface(self, runtime):

From bb4f6f40a4345770683451d0524f91a34275d58e Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Tue, 2 Aug 2016 09:03:12 -0700
Subject: [PATCH 07/10] moved import exception raising into _run_interface

---
 nipype/algorithms/events.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
index 98394ac29b..62d0d0d8af 100644
--- a/nipype/algorithms/events.py
+++ b/nipype/algorithms/events.py
@@ -1,27 +1,23 @@
 from __future__ import division
-from builtins import range
 
-from copy import deepcopy
-import os
-
-from nibabel import load
 import numpy as np
 from nipype.external.six import string_types
 from nipype.interfaces.base import (BaseInterface, TraitedSpec, InputMultiPath,
                                traits, File, Bunch, BaseInterfaceInputSpec,
                                isdefined, OutputMultiPath)
-from nipype import config, logging
+from nipype import logging
 import re
 from glob import glob
 from os.path import basename
 import json
 iflogger = logging.getLogger('interface')
 
-# Eventually, move the following inside the interface, or wrap in an import check
+
+have_pandas = True
 try:
     import pandas as pd
-except ImportError:
-    raise ImportError("The events module requires pandas.")
+except:
+    have_nipy = False
 
 
 def alias(target, append=False):
@@ -370,7 +366,6 @@ def _transform_events(self):
             for t in tf['steps']:
                 name = t.pop('name')
                 cols = t.pop('input', None)
-                # args = t.get('args', {})
                 self.transformer.apply(name, cols, **t)
             if 'select' in tf:
                 self.transformer.select(tf['select'])
@@ -378,6 +373,9 @@ def _transform_events(self):
         self.transformer.resample(self.inputs.time_repetition)
 
     def _run_interface(self, runtime):
+        if not have_pandas:
+            raise ImportError("The SpecifyEvents interface requires pandas. "
+                              "Please make sure that pandas is installed.")
         self._transform_events()
         return runtime
 

From 2878f47adb2f94c87191e8266e28d3c03d0c6f1c Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Tue, 2 Aug 2016 09:03:28 -0700
Subject: [PATCH 08/10] added column renaming

---
 nipype/algorithms/events.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
index 62d0d0d8af..ab9399a1b8 100644
--- a/nipype/algorithms/events.py
+++ b/nipype/algorithms/events.py
@@ -144,6 +144,10 @@ def multiply(self, cols, x_cols):
         output = ['%s_%s' % (cols, x) for x in x_cols]
         self.data[output] = result
 
+    def rename(self, cols, output):
+        rename = dict(zip(cols, output))
+        self.data = self.data.rename(columns=rename)
+
     def query(self, q, *args, **kwargs):
         self.data = self.data.query(filter)
 

From c17af770c83cba26e5355f2a88837a8ee2d8ce73 Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Tue, 2 Aug 2016 09:21:56 -0700
Subject: [PATCH 09/10] removed Transformation class

---
 nipype/algorithms/events.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
index ab9399a1b8..7022ce900b 100644
--- a/nipype/algorithms/events.py
+++ b/nipype/algorithms/events.py
@@ -329,25 +329,6 @@ class SpecifyEventsOutputSpec(TraitedSpec):
                                         "see :ref:`SpecifyModel` or SpecifyModel.__doc__ for details"))
 
 
-class Transformation(object):
-
-    def __init__(self, transformer, inputs, steps, outputs=None):
-
-        self.transformer = transformer
-        self.inputs = inputs
-        self.steps = steps
-        self.outputs = outputs
-        self._validate()
-
-    def _validate(self):
-        missing = set(transform['input'] - set(self.transformer.data.columns))
-        if missing:
-            raise ValueError("Invalid column(s): %s" % missing)
-
-    def run(self):
-        pass
-
-
 class SpecifyEvents(BaseInterface):
 
     input_spec = SpecifyEventsInputSpec

From db642fa0461d174879dafe665e471471ead43332 Mon Sep 17 00:00:00 2001
From: tyarkoni <tyarkoni@gmail.com>
Date: Tue, 2 Aug 2016 10:32:52 -0700
Subject: [PATCH 10/10] better approach to transformation mapping

---
 nipype/algorithms/events.py | 97 ++++++++++++++++++++-----------------
 1 file changed, 52 insertions(+), 45 deletions(-)

diff --git a/nipype/algorithms/events.py b/nipype/algorithms/events.py
index 7022ce900b..180fc3e92f 100644
--- a/nipype/algorithms/events.py
+++ b/nipype/algorithms/events.py
@@ -20,11 +20,44 @@
     have_nipy = False
 
 
+class Transformations(object):
+
+    @staticmethod
+    def standardize(data, demean=True, rescale=True):
+        if demean:
+            data -= data.mean(0)
+        if rescale:
+            data /= data.std(0)
+        return data
+
+    @staticmethod
+    def orthogonalize(data, other):
+        ''' Orthogonalize each of the variables in cols with respect to all
+        of the variables in x_cols.
+        '''
+        y = data.values
+        X = other.values
+        _aX = np.c_[np.ones(len(y)), X]
+        coefs, resids, rank, s = np.linalg.lstsq(_aX, y)
+        return  y - X.dot(coefs[1:])
+
+    @staticmethod
+    def binarize(data, threshold=0.0):
+        above = data > threshold
+        data[above] = 1
+        data[~above] = 0
+        return data
+
+
 def alias(target, append=False):
     def decorator(func):
         def wrapper(self, cols, groupby=None, output=None, *args, **kwargs):
 
             cols = self._select_cols(cols)
+            data = self.data[cols]
+
+            if 'other' in kwargs:
+                kwargs['other'] = self.data[kwargs['other']]
 
             # groupby can be either a single column, in which case it's
             # interpreted as a categorical variable to groupby directly, or a
@@ -36,15 +69,14 @@ def wrapper(self, cols, groupby=None, output=None, *args, **kwargs):
                     group_results = []
                     output = ['%s_%s' % (cn, gn)for cn in cols for gn in groupby]
                     for i, col in enumerate(groupby):
-                        _result = np.zeros((len(self.data), len(cols)))
-                        _result[inds] = target(self, cols, *args, **kwargs)
+                        _result = target(data, *args, **kwargs)
                         group_results.extend(_result.T)
                     result = np.c_[group_results].squeeze().T
                     result = pd.DataFrame(result, columns=output)
                 else:
-                    result = self.data.groupby(groupby).apply(target, self, cols, *args, **kwargs)
+                    result = self.data.groupby(groupby).apply(target, data, *args, **kwargs)
             else:
-                result = target(self, cols, *args, **kwargs)
+                result = target(data, *args, **kwargs)
 
             if append:
                 output = result.columns
@@ -66,13 +98,7 @@ def __init__(self, events, orig_hz=1, target_hz=1000):
         self.target_hz = target_hz
         self._to_dense()
 
-    def _select_cols(self, cols):
-        if isinstance(cols, string_types) and '*' in cols:
-            # if '*' in cols:
-            patt = re.compile(cols.replace('*', '.*'))
-            cols = [l for l in self.data.columns.tolist() for m in [patt.search(l)] if m]
-        return cols
-
+    ### Aliased functions ###
     @alias(np.log)
     def log(): pass
 
@@ -88,45 +114,22 @@ def not_(): pass
     @alias(pd.get_dummies, append=True)
     def factor(): pass
 
-    def _standardize(self, cols, demean=True, rescale=True, copy=True):
-        cols = self._select_cols(cols)
-        X = self.data[cols]
-        return (X - X.mean()) / np.std(X, 0)
+    @alias(Transformations.standardize)
+    def standardize(): pass
+
+    @alias(Transformations.binarize)
+    def binarize(): pass
+
+    @alias(Transformations.orthogonalize)
+    def orthogonalize(): pass
 
+    ### Standard instance methods ###
     def select(self, cols):
         # Always retain onsets
         if 'onset' not in cols:
             cols.insert(0, 'onset')
         self.data = self.data[self._select_cols(cols)]
 
-    @alias(_standardize)
-    def standardize(): pass
-
-    def _binarize(self, cols, threshold=0.0):
-        cols = self._select_cols(cols)
-        X = self.data[cols].values
-        above = X > threshold
-        X[above] = 1
-        X[~above] = 0
-        return X
-
-    @alias(_binarize)
-    def binarize(): pass
-
-    def _orthogonalize(self, cols, X_cols):
-        ''' Orthogonalize each of the variables in cols with respect to all
-        of the variables in x_cols.
-        '''
-        cols, X_cols = self._select_cols(cols), self._select_cols(X_cols)
-        X = self.data[X_cols].values
-        y = self.data[cols].values
-        _aX = np.c_[np.ones(len(y)), X]
-        coefs, resids, rank, s = np.linalg.lstsq(_aX, y)
-        return  y - X.dot(coefs[1:])
-
-    @alias(_orthogonalize)
-    def orthogonalize(): pass
-
     def formula(self, f, target=None, replace=False, *args, **kwargs):
         from patsy import dmatrix
         result = dmatrix(f, self.data, return_type='dataframe', *args, **kwargs)
@@ -156,6 +159,12 @@ def apply(self, func, *args, **kwargs):
             func = getattr(self, func)
         func(*args, **kwargs)
 
+    def _select_cols(self, cols):
+        if isinstance(cols, string_types) and '*' in cols:
+            patt = re.compile(cols.replace('*', '.*'))
+            cols = [l for l in self.data.columns.tolist() for m in [patt.search(l)] if m]
+        return cols
+
     def _to_dense(self):
         """ Convert the sparse [onset, duration, amplitude] representation
         typical of event files to a dense matrix where each row represents
@@ -352,8 +361,6 @@ def _transform_events(self):
                 name = t.pop('name')
                 cols = t.pop('input', None)
                 self.transformer.apply(name, cols, **t)
-            if 'select' in tf:
-                self.transformer.select(tf['select'])
 
         self.transformer.resample(self.inputs.time_repetition)