pandas-dev
diff --git a/‎.travis.yml
Lines changed: 2 additions & 0 deletions b/‎.travis.yml
Lines changed: 2 additions & 0 deletions
diff --git a/‎MANIFEST.in
Lines changed: 1 addition & 1 deletion b/‎MANIFEST.in
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/index_object.py
Lines changed: 20 additions & 0 deletions b/‎asv_bench/benchmarks/index_object.py
Lines changed: 20 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/io_bench.py
Lines changed: 30 additions & 0 deletions b/‎asv_bench/benchmarks/io_bench.py
Lines changed: 30 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/sparse.py
Lines changed: 66 additions & 3 deletions b/‎asv_bench/benchmarks/sparse.py
Lines changed: 66 additions & 3 deletions
diff --git a/‎asv_bench/benchmarks/timeseries.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/timeseries.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/timestamp.py
Lines changed: 6 additions & 0 deletions b/‎asv_bench/benchmarks/timestamp.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎ci/check_imports.py
Lines changed: 36 additions & 0 deletions b/‎ci/check_imports.py
Lines changed: 36 additions & 0 deletions
diff --git a/‎ci/requirements-2.7.run
Lines changed: 1 addition & 1 deletion b/‎ci/requirements-2.7.run
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/requirements-2.7_LOCALE.run
Lines changed: 1 addition & 1 deletion b/‎ci/requirements-2.7_LOCALE.run
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/requirements-2.7_SLOW.run
Lines changed: 1 addition & 1 deletion b/‎ci/requirements-2.7_SLOW.run
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/requirements-2.7_WIN.run
Lines changed: 2 additions & 2 deletions b/‎ci/requirements-2.7_WIN.run
Lines changed: 2 additions & 2 deletions
diff --git a/‎ci/requirements-3.6_NUMPY_DEV.build
Lines changed: 0 additions & 1 deletion b/‎ci/requirements-3.6_NUMPY_DEV.build
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci/requirements-3.6_NUMPY_DEV.build.sh
Lines changed: 3 additions & 0 deletions b/‎ci/requirements-3.6_NUMPY_DEV.build.sh
Lines changed: 3 additions & 0 deletions
diff --git a/‎ci/requirements_all.txt
Lines changed: 1 addition & 1 deletion b/‎ci/requirements_all.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/10min.rst
Lines changed: 2 additions & 12 deletions b/‎doc/source/10min.rst
Lines changed: 2 additions & 12 deletions
diff --git a/‎doc/source/advanced.rst
Lines changed: 27 additions & 2 deletions b/‎doc/source/advanced.rst
Lines changed: 27 additions & 2 deletions
@@ -121,6 +121,8 @@ script:
   - ci/script_single.sh
   - ci/script_multi.sh
   - ci/lint.sh
+  - echo "checking imports"
+  - source activate pandas && python ci/check_imports.py
   - echo "script done"
 
 after_success:
 
@@ -1,7 +1,7 @@
 include MANIFEST.in
 include LICENSE
 include RELEASE.md
-include README.rst
+include README.md
 include setup.py
 include pyproject.toml
 
 
@@ -199,3 +199,23 @@ def time_datetime_level_values_full(self):
 
     def time_datetime_level_values_sliced(self):
         self.mi[:10].values
+
+
+class Range(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.idx_inc = RangeIndex(start=0, stop=10**7, step=3)
+        self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)
+
+    def time_max(self):
+        self.idx_inc.max()
+
+    def time_max_trivial(self):
+        self.idx_dec.max()
+
+    def time_min(self):
+        self.idx_dec.min()
+
+    def time_min_trivial(self):
+        self.idx_inc.min()
@@ -1,3 +1,4 @@
+import os
 from .pandas_vb_common import *
 from pandas import concat, Timestamp, compat
 try:
@@ -192,3 +193,32 @@ def time_read_nrows(self, compression, engine):
             ext = ".bz2"
         pd.read_csv(self.big_fname + ext, nrows=10,
                     compression=compression, engine=engine)
+
+
+class read_json_lines(object):
+    goal_time = 0.2
+    fname = "__test__.json"
+
+    def setup(self):
+        self.N = 100000
+        self.C = 5
+        self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]))
+        self.df.to_json(self.fname,orient="records",lines=True)
+
+    def teardown(self):
+        try:
+            os.remove(self.fname)
+        except:
+            pass
+
+    def time_read_json_lines(self):
+        pd.read_json(self.fname, lines=True)
+
+    def time_read_json_lines_chunk(self):
+        pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
+
+    def peakmem_read_json_lines(self):
+        pd.read_json(self.fname, lines=True)
+
+    def peakmem_read_json_lines_chunk(self):
+        pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
@@ -1,8 +1,8 @@
-from itertools import repeat
+import itertools
 
 from .pandas_vb_common import *
 import scipy.sparse
-from pandas import SparseSeries, SparseDataFrame
+from pandas import SparseSeries, SparseDataFrame, SparseArray
 
 
 class sparse_series_to_frame(object):
@@ -23,6 +23,69 @@ def time_sparse_series_to_frame(self):
         SparseDataFrame(self.series)
 
 
+class sparse_array_constructor(object):
+    goal_time = 0.2
+
+    def setup(self):
+        np.random.seed(1)
+        self.int64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=0, dtype=np.int64)
+        self.int64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=0, dtype=np.int64)
+
+        self.float64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=np.nan, dtype=np.float64)
+        self.float64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=np.nan, dtype=np.float64)
+
+        self.object_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=np.nan)
+        self.object_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=np.nan)
+
+        self.object_non_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=0)
+        self.object_non_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=0)
+
+    def make_numeric_array(self, length, dense_size, fill_value, dtype):
+        arr = np.array([fill_value] * length, dtype=dtype)
+        indexer = np.unique(np.random.randint(0, length, dense_size))
+        arr[indexer] = np.random.randint(0, 100, len(indexer))
+        return (arr, fill_value, dtype)
+
+    def make_object_array(self, length, dense_size, fill_value):
+        elems = np.array(['a', 0.0, False, 1, 2], dtype=np.object)
+        arr = np.array([fill_value] * length, dtype=np.object)
+        indexer = np.unique(np.random.randint(0, length, dense_size))
+        arr[indexer] = np.random.choice(elems, len(indexer))
+        return (arr, fill_value, np.object)
+
+    def time_sparse_array_constructor_int64_10percent(self):
+        arr, fill_value, dtype = self.int64_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_int64_1percent(self):
+        arr, fill_value, dtype = self.int64_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_float64_10percent(self):
+        arr, fill_value, dtype = self.float64_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_float64_1percent(self):
+        arr, fill_value, dtype = self.float64_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_nan_fill_value_10percent(self):
+        arr, fill_value, dtype = self.object_nan_fill_value_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_nan_fill_value_1percent(self):
+        arr, fill_value, dtype = self.object_nan_fill_value_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_non_nan_fill_value_10percent(self):
+        arr, fill_value, dtype = self.object_non_nan_fill_value_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_non_nan_fill_value_1percent(self):
+        arr, fill_value, dtype = self.object_non_nan_fill_value_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+
 class sparse_frame_constructor(object):
     goal_time = 0.2
 
@@ -33,7 +96,7 @@ def time_sparse_from_scipy(self):
         SparseDataFrame(scipy.sparse.rand(1000, 1000, 0.005))
 
     def time_sparse_from_dict(self):
-        SparseDataFrame(dict(zip(range(1000), repeat([0]))))
+        SparseDataFrame(dict(zip(range(1000), itertools.repeat([0]))))
 
 
 class sparse_series_from_coo(object):
 
@@ -56,7 +56,7 @@ def setup(self):
         self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
         self.d_freq = self.rng7[:50000].append(self.rng7[50000:])
 
-        self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
+        self.rng8 = date_range(start='1/1/1700', freq='B', periods=75000)
         self.b_freq = self.rng8[:50000].append(self.rng8[50000:])
 
     def time_add_timedelta(self):
 
@@ -81,3 +81,9 @@ def time_replace_across_dst(self):
 
     def time_replace_None(self):
         self.ts_tz.replace(tzinfo=None)
+
+    def time_to_pydatetime(self):
+        self.ts.to_pydatetime()
+
+    def time_to_pydatetime_tz(self):
+        self.ts_tz.to_pydatetime()
@@ -0,0 +1,36 @@
+"""
+Check that certain modules are not loaded by `import pandas`
+"""
+import sys
+
+blacklist = {
+    'bs4',
+    'html5lib',
+    'ipython',
+    'jinja2'
+    'lxml',
+    'matplotlib',
+    'numexpr',
+    'openpyxl',
+    'py',
+    'pytest',
+    's3fs',
+    'scipy',
+    'tables',
+    'xlrd',
+    'xlsxwriter',
+    'xlwt',
+}
+
+
+def main():
+    import pandas  # noqa
+
+    modules = set(x.split('.')[0] for x in sys.modules)
+    imported = modules & blacklist
+    if modules & blacklist:
+        sys.exit("Imported {}".format(imported))
+
+
+if __name__ == '__main__':
+    main()
@@ -8,7 +8,7 @@ matplotlib
 openpyxl=1.6.2
 xlrd=0.9.2
 sqlalchemy=0.9.6
-lxml=3.2.1
+lxml
 scipy
 xlsxwriter=0.5.2
 s3fs
 
@@ -8,5 +8,5 @@ xlrd=0.9.2
 bottleneck=1.0.0
 matplotlib=1.4.3
 sqlalchemy=0.8.1
-lxml=3.2.1
+lxml
 scipy
@@ -16,4 +16,4 @@ s3fs
 psycopg2
 pymysql
 html5lib
-beautiful-soup
+beautifulsoup4
@@ -8,11 +8,11 @@ matplotlib
 openpyxl
 xlrd
 sqlalchemy
-lxml=3.2.1
+lxml
 scipy
 xlsxwriter
 s3fs
 bottleneck
 html5lib
-beautiful-soup
+beautifulsoup4
 jinja2=2.8
@@ -1,3 +1,2 @@
 python=3.6*
 pytz
-cython
@@ -14,4 +14,7 @@ pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy
 # install dateutil from master
 pip install -U git+git://github.com/dateutil/dateutil.git
 
+# cython via pip
+pip install cython
+
 true
@@ -13,7 +13,7 @@ xlrd
 xlwt
 html5lib
 patsy
-beautiful-soup
+beautifulsoup4
 numpy
 cython
 scipy
 
@@ -11,7 +11,7 @@
    np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    pd.options.display.max_rows = 15
 
    #### portions of this were borrowed from the
@@ -95,17 +95,7 @@ will be completed:
    df2.append             df2.combine_first
    df2.apply              df2.compound
    df2.applymap           df2.consolidate
-   df2.as_blocks          df2.convert_objects
-   df2.asfreq             df2.copy
-   df2.as_matrix          df2.corr
-   df2.astype             df2.corrwith
-   df2.at                 df2.count
-   df2.at_time            df2.cov
-   df2.axes               df2.cummax
-   df2.B                  df2.cummin
-   df2.between_time       df2.cumprod
-   df2.bfill              df2.cumsum
-   df2.blocks             df2.D
+   df2.D
 
 As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically
 tab completed. ``E`` is there as well; the rest of the attributes have been
 
@@ -638,9 +638,11 @@ and allows efficient indexing and storage of an index with a large number of dup
 
 .. ipython:: python
 
+   from pandas.api.types import CategoricalDtype
+
    df = pd.DataFrame({'A': np.arange(6),
                       'B': list('aabbca')})
-   df['B'] = df['B'].astype('category', categories=list('cab'))
+   df['B'] = df['B'].astype(CategoricalDtype(list('cab')))
    df
    df.dtypes
    df.B.cat.categories
@@ -831,12 +833,21 @@ Of course if you need integer based selection, then use ``iloc``
 IntervalIndex
 ~~~~~~~~~~~~~
 
+:class:`IntervalIndex` together with its own dtype, ``interval`` as well as the
+:class:`Interval` scalar type,  allow first-class support in pandas for interval
+notation.
+
+The ``IntervalIndex`` allows some unique indexing and is also used as a
+return type for the categories in :func:`cut` and :func:`qcut`.
+
 .. versionadded:: 0.20.0
 
 .. warning::
 
    These indexing behaviors are provisional and may change in a future version of pandas.
 
+An ``IntervalIndex`` can be used in ``Series`` and in ``DataFrame`` as the index.
+
 .. ipython:: python
 
    df = pd.DataFrame({'A': [1, 2, 3, 4]},
@@ -858,6 +869,20 @@ If you select a lable *contained* within an interval, this will also select the
    df.loc[2.5]
    df.loc[[2.5, 3.5]]
 
+``Interval`` and ``IntervalIndex`` are used by ``cut`` and ``qcut``:
+
+.. ipython:: python
+
+   c = pd.cut(range(4), bins=2)
+   c
+   c.categories
+
+Furthermore, ``IntervalIndex`` allows one to bin *other* data with these same
+bins, with ``NaN`` representing a missing value similar to other dtypes.
+
+.. ipython:: python
+
+   pd.cut([0, 3, 5, 1], bins=c.categories)
 
 Miscellaneous indexing FAQ
 --------------------------
@@ -984,7 +1009,7 @@ The different indexing operation can potentially change the dtype of a ``Series`
 
    series1 = pd.Series([1, 2, 3])
    series1.dtype
-   res = series1[[0,4]]
+   res = series1.reindex([0, 4])
    res.dtype
    res
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,2 @@`
`1`	`1`	`python=3.6*`
`2`	`2`	`pytz`
`3`		`-cython`