pandas-dev · jowens · Dec 15, 2016 · Dec 16, 2016 · Dec 16, 2016 · Dec 16, 2016
diff --git a/.travis.yml b/.travis.yml
@@ -66,19 +66,6 @@ matrix:
         apt:
           packages:
           - python-gtk2
-    - python: 3.4
-      env:
-        - PYTHON_VERSION=3.4
-        - JOB_NAME: "34_nslow"
-        - NOSE_ARGS="not slow and not disabled"
-        - FULL_DEPS=true
-        - CLIPBOARD=xsel
-        - CACHE_NAME="34_nslow"
-        - USE_CACHE=true
-      addons:
-        apt:
-          packages:
-          - xsel
     - python: 3.5
       env:
         - PYTHON_VERSION=3.5
@@ -93,6 +80,33 @@ matrix:
         apt:
           packages:
           - xsel
+    - python: 3.6-dev
+      env:
+        - PYTHON_VERSION=3.6
+        - JOB_NAME: "36_dev"
+        - JOB_TAG=_DEV
+        - NOSE_ARGS="not slow and not network and not disabled"
+        - PANDAS_TESTING_MODE="deprecate"
+      addons:
+        apt:
+          packages:
+          - libatlas-base-dev
+          - gfortran
+#    In allow_failures
+    - python: 2.7
+      env:
+        - PYTHON_VERSION=2.7
+        - JOB_NAME: "27_nslow_nnet_COMPAT"
+        - NOSE_ARGS="not slow and not network and not disabled"
+        - LOCALE_OVERRIDE="it_IT.UTF-8"
+        - INSTALL_TEST=true
+        - JOB_TAG=_COMPAT
+        - CACHE_NAME="27_nslow_nnet_COMPAT"
+        - USE_CACHE=true
+      addons:
+        apt:
+          packages:
+          - language-pack-it
 #    In allow_failures
     - python: 2.7
       env:
@@ -103,45 +117,46 @@ matrix:
         - FULL_DEPS=true
         - CACHE_NAME="27_slow"
         - USE_CACHE=true
+#    In allow_failures
+    - python: 2.7
+      env:
+        - PYTHON_VERSION=2.7
+        - JOB_NAME: "27_build_test_conda"
+        - JOB_TAG=_BUILD_TEST
+        - NOSE_ARGS="not slow and not disabled"
+        - FULL_DEPS=true
+        - BUILD_TEST=true
+        - CACHE_NAME="27_build_test_conda"
+        - USE_CACHE=true
 #    In allow_failures
     - python: 3.4
       env:
         - PYTHON_VERSION=3.4
-        - JOB_NAME: "34_slow"
-        - JOB_TAG=_SLOW
-        - NOSE_ARGS="slow and not network and not disabled"
+        - JOB_NAME: "34_nslow"
+        - NOSE_ARGS="not slow and not disabled"
         - FULL_DEPS=true
         - CLIPBOARD=xsel
-        - CACHE_NAME="34_slow"
+        - CACHE_NAME="34_nslow"
         - USE_CACHE=true
       addons:
         apt:
           packages:
           - xsel
 #    In allow_failures
-    - python: 2.7
+    - python: 3.4
       env:
-        - PYTHON_VERSION=2.7
-        - JOB_NAME: "27_build_test_conda"
-        - JOB_TAG=_BUILD_TEST
-        - NOSE_ARGS="not slow and not disabled"
+        - PYTHON_VERSION=3.4
+        - JOB_NAME: "34_slow"
+        - JOB_TAG=_SLOW
+        - NOSE_ARGS="slow and not network and not disabled"
         - FULL_DEPS=true
-        - BUILD_TEST=true
-        - CACHE_NAME="27_build_test_conda"
+        - CLIPBOARD=xsel
+        - CACHE_NAME="34_slow"
         - USE_CACHE=true
-#    In allow_failures
-    - python: 3.6-dev
-      env:
-        - PYTHON_VERSION=3.6
-        - JOB_NAME: "36_dev"
-        - JOB_TAG=_DEV
-        - NOSE_ARGS="not slow and not network and not disabled"
-        - PANDAS_TESTING_MODE="deprecate"
       addons:
         apt:
           packages:
-          - libatlas-base-dev
-          - gfortran
+          - xsel
 #    In allow_failures
     - python: 3.5
       env:
@@ -157,21 +172,6 @@ matrix:
           packages:
           - libatlas-base-dev
           - gfortran
-#    In allow_failures
-    - python: 2.7
-      env:
-        - PYTHON_VERSION=2.7
-        - JOB_NAME: "27_nslow_nnet_COMPAT"
-        - NOSE_ARGS="not slow and not network and not disabled"
-        - LOCALE_OVERRIDE="it_IT.UTF-8"
-        - INSTALL_TEST=true
-        - JOB_TAG=_COMPAT
-        - CACHE_NAME="27_nslow_nnet_COMPAT"
-        - USE_CACHE=true
-      addons:
-        apt:
-          packages:
-          - language-pack-it
 #    In allow_failures
     - python: 3.5
       env:
@@ -226,18 +226,19 @@ matrix:
         - BUILD_TEST=true
         - CACHE_NAME="27_build_test_conda"
         - USE_CACHE=true
-      - python: 3.6-dev
+      - python: 3.4
         env:
-        - PYTHON_VERSION=3.6
-        - JOB_NAME: "36_dev"
-        - JOB_TAG=_DEV
-        - NOSE_ARGS="not slow and not network and not disabled"
-        - PANDAS_TESTING_MODE="deprecate"
+          - PYTHON_VERSION=3.4
+          - JOB_NAME: "34_nslow"
+          - NOSE_ARGS="not slow and not disabled"
+          - FULL_DEPS=true
+          - CLIPBOARD=xsel
+          - CACHE_NAME="34_nslow"
+          - USE_CACHE=true
         addons:
           apt:
             packages:
-            - libatlas-base-dev
-            - gfortran
+            - xsel
       - python: 3.5
         env:
           - PYTHON_VERSION=3.5

diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
@@ -18,14 +18,17 @@ def setup(self):
         self.float = pd.Float64Index(np.random.randn(N).repeat(5))
 
         # Convenience naming.
-        self.checked_add = pd.core.nanops._checked_add_with_arr
+        self.checked_add = pd.core.algorithms.checked_add_with_arr
 
         self.arr = np.arange(1000000)
         self.arrpos = np.arange(1000000)
         self.arrneg = np.arange(-1000000, 0)
         self.arrmixed = np.array([1, -1]).repeat(500000)
         self.strings = tm.makeStringIndex(100000)
 
+        self.arr_nan = np.random.choice([True, False], size=1000000)
+        self.arrmixed_nan = np.random.choice([True, False], size=1000000)
+
         # match
         self.uniques = tm.makeStringIndex(1000).values
         self.all = self.uniques.repeat(10)
@@ -69,6 +72,16 @@ def time_add_overflow_neg_arr(self):
     def time_add_overflow_mixed_arr(self):
         self.checked_add(self.arr, self.arrmixed)
 
+    def time_add_overflow_first_arg_nan(self):
+        self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan)
+
+    def time_add_overflow_second_arg_nan(self):
+        self.checked_add(self.arr, self.arrmixed, b_mask=self.arrmixed_nan)
+
+    def time_add_overflow_both_arg_nan(self):
+        self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan,
+                         b_mask=self.arrmixed_nan)
+
 
 class Hashing(object):
     goal_time = 0.2

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -68,6 +68,8 @@ class Iteration(object):
     def setup(self):
         self.df = DataFrame(randn(10000, 1000))
         self.df2 = DataFrame(np.random.randn(50000, 10))
+        self.df3 = pd.DataFrame(np.random.randn(1000,5000),
+                                columns=['C'+str(c) for c in range(5000)])
 
     def f(self):
         if hasattr(self.df, '_item_cache'):
@@ -85,6 +87,11 @@ def time_iteritems(self):
     def time_iteritems_cached(self):
         self.g()
 
+    def time_iteritems_indexing(self):
+        df = self.df3
+        for col in df:
+            df[col]
+
     def time_itertuples(self):
         for row in self.df2.itertuples():
             pass

diff --git a/asv_bench/benchmarks/io_bench.py b/asv_bench/benchmarks/io_bench.py
@@ -153,7 +153,7 @@ def setup(self, compression, engine):
             # The Python 2 C parser can't read bz2 from open files.
             raise NotImplementedError
         try:
-            import boto
+            import s3fs
         except ImportError:
             # Skip these benchmarks if `boto` is not installed.
             raise NotImplementedError

diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
@@ -302,12 +302,19 @@ def setup(self):
         self.df1 = self.df1.sort_values('time')
         self.df2 = self.df2.sort_values('time')
 
+        self.df1['time32'] = np.int32(self.df1.time)
+        self.df2['time32'] = np.int32(self.df2.time)
+
         self.df1a = self.df1[['time', 'value1']]
         self.df2a = self.df2[['time', 'value2']]
         self.df1b = self.df1[['time', 'key', 'value1']]
         self.df2b = self.df2[['time', 'key', 'value2']]
         self.df1c = self.df1[['time', 'key2', 'value1']]
         self.df2c = self.df2[['time', 'key2', 'value2']]
+        self.df1d = self.df1[['time32', 'value1']]
+        self.df2d = self.df2[['time32', 'value2']]
+        self.df1e = self.df1[['time', 'key', 'key2', 'value1']]
+        self.df2e = self.df2[['time', 'key', 'key2', 'value2']]
 
     def time_noby(self):
         merge_asof(self.df1a, self.df2a, on='time')
@@ -318,6 +325,12 @@ def time_by_object(self):
     def time_by_int(self):
         merge_asof(self.df1c, self.df2c, on='time', by='key2')
 
+    def time_on_int32(self):
+        merge_asof(self.df1d, self.df2d, on='time32')
+
+    def time_multiby(self):
+        merge_asof(self.df1e, self.df2e, on='time', by=['key', 'key2'])
+
 
 #----------------------------------------------------------------------
 # data alignment

diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py
@@ -49,3 +49,28 @@ def time_value_counts_pindex(self):
         self.i.value_counts()
 
 
+class period_standard_indexing(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.index = PeriodIndex(start='1985', periods=1000, freq='D')
+        self.series = Series(range(1000), index=self.index)
+        self.period = self.index[500]
+
+    def time_get_loc(self):
+        self.index.get_loc(self.period)
+
+    def time_shape(self):
+        self.index.shape
+
+    def time_shallow_copy(self):
+        self.index._shallow_copy()
+
+    def time_series_loc(self):
+        self.series.loc[self.period]
+
+    def time_align(self):
+        pd.DataFrame({'a': self.series, 'b': self.series[:500]})
+
+    def time_intersection(self):
+        self.index[:750].intersection(self.index[250:])
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -8,13 +8,28 @@ def setup(self):
         self.dr = pd.date_range(
             start=datetime(2015,10,26),
             end=datetime(2016,1,1),
-            freq='10s'
-        )  # ~500k long
+            freq='50s'
+        )  # ~100k long
 
     def time_series_constructor_no_data_datetime_index(self):
         Series(data=None, index=self.dr)
 
 
+class series_constructor_dict_data_datetime_index(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.dr = pd.date_range(
+            start=datetime(2015, 10, 26),
+            end=datetime(2016, 1, 1),
+            freq='50s'
+        )  # ~100k long
+        self.data = {d: v for d, v in zip(self.dr, range(len(self.dr)))}
+
+    def time_series_constructor_no_data_datetime_index(self):
+        Series(data=self.data, index=self.dr)
+
+
 class series_isin_int64(object):
     goal_time = 0.2
 

diff --git a/ci/lint.sh b/ci/lint.sh
@@ -7,6 +7,8 @@ source activate pandas
 RET=0
 
 if [ "$LINT" ]; then
+    pip install cpplint
+
     # pandas/rpy is deprecated and will be removed.
     # pandas/src is C code, so no need to search there.
     echo "Linting  *.py"
@@ -43,13 +45,11 @@ if [ "$LINT" ]; then
     # from Cython files nor do we want to lint C files that we didn't modify for
     # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
     # we can lint all header files since they aren't "generated" like C files are.
-    pip install cpplint
-
     echo "Linting *.c and *.h"
     for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson'
     do
         echo "linting -> pandas/src/$path"
-        cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
+        cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
         if [ $? -ne "0" ]; then
             RET=1
         fi

diff --git a/ci/requirements-2.7-64.run b/ci/requirements-2.7-64.run
@@ -11,7 +11,7 @@ sqlalchemy
 lxml=3.2.1
 scipy
 xlsxwriter
-boto
+s3fs
 bottleneck
 html5lib
 beautiful-soup

diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run
@@ -11,7 +11,7 @@ sqlalchemy=0.9.6
 lxml=3.2.1
 scipy
 xlsxwriter=0.4.6
-boto=2.36.0
+s3fs
 bottleneck
 psycopg2=2.5.2
 patsy

diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run
@@ -13,7 +13,7 @@ numexpr
 pytables
 sqlalchemy
 lxml
-boto
+s3fs
 bottleneck
 psycopg2
 pymysql

diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run
@@ -17,7 +17,7 @@ sqlalchemy
 pymysql
 psycopg2
 xarray
-boto
+s3fs
 
 # incompat with conda ATM
 # beautiful-soup
diff --git a/ci/requirements-3.5_OSX.run b/ci/requirements-3.5_OSX.run
@@ -12,7 +12,7 @@ matplotlib
 jinja2
 bottleneck
 xarray
-boto
+s3fs
 
 # incompat with conda ATM
 # beautiful-soup
diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet.pdf b/doc/cheatsheet/Pandas_Cheat_Sheet.pdf
diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet.pptx b/doc/cheatsheet/Pandas_Cheat_Sheet.pptx