pandas-dev · springcoil · Oct 27, 2014 · Oct 30, 2014 · Oct 30, 2014 · Oct 30, 2014
diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt
@@ -77,6 +77,7 @@ Enhancements
 
 Performance
 ~~~~~~~~~~~
+- Reduce memory usage when skiprows is an integer in read_csv (:issue:`8681`)
 
 .. _whatsnew_0152.experimental:
 
@@ -99,6 +100,7 @@ Bug Fixes
 - ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`)
 - ``slice`` string method now takes step into account (:issue:`8754`)
 - Bug in ``BlockManager`` where setting values with different type would break block integrity (:issue:`8850`)
+- Bug in ``DatetimeIndex`` when using ``time`` object as key (:issue:`8667`)
 - Fix negative step support for label-based slices (:issue:`8753`)
 
   Old behavior:
@@ -144,7 +146,7 @@ Bug Fixes
 
 
 
-
+- BUG: Option context applies on __enter__ (:issue:`8514`)
 
 
 
@@ -153,8 +155,9 @@ Bug Fixes
 - Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference
   when the index contained DST days (:issue:`8772`).
 - Bug where index name was still used when plotting a series with ``use_index=False`` (:issue:`8558`).
-
 - Bugs when trying to stack multiple columns, when some (or all)
   of the level names are numbers (:issue:`8584`).
 - Bug in ``MultiIndex`` where ``__contains__`` returns wrong result if index is
   not lexically sorted or unique (:issue:`7724`)
+- BUG CSV: fix problem with trailing whitespace in skipped rows, (:issue:`8679`), (:issue:`8661`)
+- Regression in ``Timestamp`` does not parse 'Z' zone designator for UTC (:issue:`8771`)
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -404,9 +404,13 @@ def array_equivalent(left, right, strict_nan=False):
 
     Examples
     --------
-    >>> array_equivalent(np.array([1, 2, nan]), np.array([1, 2, nan]))
+    >>> array_equivalent(
+    ...     np.array([1, 2, np.nan]),
+    ...     np.array([1, 2, np.nan]))
     True
-    >>> array_equivalent(np.array([1, nan, 2]), np.array([1, 2, nan]))
+    >>> array_equivalent(
+    ...     np.array([1, np.nan, 2]),
+    ...     np.array([1, 2, np.nan]))
     False
     """
 
@@ -2171,8 +2175,8 @@ def iterpairs(seq):
 
     Examples
     --------
-    >>> iterpairs([1, 2, 3, 4])
-    [(1, 2), (2, 3), (3, 4)
+    >>> list(iterpairs([1, 2, 3, 4]))
+    [(1, 2), (2, 3), (3, 4)]
     """
     # input may not be sliceable
     seq_it = iter(seq)

diff --git a/pandas/core/config.py b/pandas/core/config.py
@@ -51,6 +51,7 @@
 import re
 
 from collections import namedtuple
+from contextlib import contextmanager
 import warnings
 from pandas.compat import map, lmap, u
 import pandas.compat as compat
@@ -384,19 +385,18 @@ def __init__(self, *args):
                 'option_context(pat, val, [(pat, val), ...)).'
             )
 
-        ops = list(zip(args[::2], args[1::2]))
+        self.ops = list(zip(args[::2], args[1::2]))
+
+    def __enter__(self):
         undo = []
-        for pat, val in ops:
+        for pat, val in self.ops:
             undo.append((pat, _get_option(pat, silent=True)))
 
         self.undo = undo
 
-        for pat, val in ops:
+        for pat, val in self.ops:
             _set_option(pat, val, silent=True)
 
-    def __enter__(self):
-        pass
-
     def __exit__(self, *args):
         if self.undo:
             for pat, val in self.undo:
@@ -681,8 +681,6 @@ def pp(name, ks):
 #
 # helpers
 
-from contextlib import contextmanager
-
 
 @contextmanager
 def config_prefix(prefix):

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3279,7 +3279,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
         Parameters
         ----------
         other : DataFrame, or object coercible into a DataFrame
-        join : {'left', 'right', 'outer', 'inner'}, default 'left'
+        join : {'left'}, default 'left'
         overwrite : boolean, default True
             If True then overwrite values for common keys in the calling frame
         filter_func : callable(1d-array) -> 1d-array<boolean>, default None

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2872,11 +2872,13 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         GroupBy object
 
         """
-
         from pandas.core.groupby import groupby
+
+        if level is None and by is None:
+            raise TypeError('You have to specify at least one of "by" and "level"')
         axis = self._get_axis_number(axis)
-        return groupby(self, by, axis=axis, level=level, as_index=as_index,
-                       sort=sort, group_keys=group_keys, squeeze=squeeze)
+        return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
+                    sort=sort, group_keys=group_keys, squeeze=squeeze)
 
     def asfreq(self, freq, method=None, how=None, normalize=False):
         """

diff --git a/pandas/index.pyx b/pandas/index.pyx
@@ -545,8 +545,14 @@ cdef class DatetimeEngine(Int64Engine):
                 val = _to_i8(val)
                 return self._get_loc_duplicates(val)
             values = self._get_index_values()
-            conv = _to_i8(val)
-            loc = values.searchsorted(conv, side='left')
+
+            try:
+                conv = _to_i8(val)
+                loc = values.searchsorted(conv, side='left')
+            except TypeError:
+                self._date_check_type(val)
+                raise KeyError(val)
+
             if loc == len(values) or util.get_value_at(values, loc) != conv:
                 raise KeyError(val)
             return loc

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -3048,6 +3048,29 @@ def test_comment_skiprows(self):
         df = self.read_csv(StringIO(data), comment='#', skiprows=4)
         tm.assert_almost_equal(df.values, expected)
 
+    def test_trailing_spaces(self):
+        data = """skip
+random line with trailing spaces    
+skip
+1,2,3
+1,2.,4.
+random line with trailing tabs\t\t\t
+
+5.,NaN,10.0
+"""
+        expected = pd.DataFrame([[1., 2., 4.],
+                    [5., np.nan, 10.]])
+        # this should ignore six lines including lines with trailing 
+        # whitespace and blank lines.  issues 8661, 8679
+        df = self.read_csv(StringIO(data.replace(',', '  ')), 
+                           header=None, delim_whitespace=True,
+                           skiprows=[0,1,2,3,5,6], skip_blank_lines=True)
+        tm.assert_frame_equal(df, expected)
+        df = self.read_table(StringIO(data.replace(',', '  ')), 
+                             header=None, delim_whitespace=True,
+                             skiprows=[0,1,2,3,5,6], skip_blank_lines=True)
+        tm.assert_frame_equal(df, expected)
+
     def test_comment_header(self):
         data = """# empty
 # second empty line

diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -86,6 +86,7 @@ cdef extern from "parser/tokenizer.h":
         EAT_COMMENT
         EAT_LINE_COMMENT
         WHITESPACE_LINE
+        SKIP_LINE
         FINISHED
 
     enum: ERROR_OVERFLOW
@@ -158,6 +159,7 @@ cdef extern from "parser/tokenizer.h":
         int header_end # header row end
 
         void *skipset
+        int64_t skip_first_N_rows
         int skip_footer
         double (*converter)(const char *, char **, char, char, char, int)
 
@@ -181,6 +183,8 @@ cdef extern from "parser/tokenizer.h":
     void parser_free(parser_t *self) nogil
     int parser_add_skiprow(parser_t *self, int64_t row)
 
+    int parser_set_skipfirstnrows(parser_t *self, int64_t nrows)
+
     void parser_set_default_options(parser_t *self)
 
     int parser_consume_rows(parser_t *self, size_t nrows)
@@ -524,10 +528,10 @@ cdef class TextReader:
 
     cdef _make_skiprow_set(self):
         if isinstance(self.skiprows, (int, np.integer)):
-            self.skiprows = range(self.skiprows)
-
-        for i in self.skiprows:
-            parser_add_skiprow(self.parser, i)
+            parser_set_skipfirstnrows(self.parser, self.skiprows)
+        else:
+            for i in self.skiprows:
+                parser_add_skiprow(self.parser, i)
 
     cdef _setup_parser_source(self, source):
         cdef:

diff --git a/pandas/src/datetime/np_datetime_strings.c b/pandas/src/datetime/np_datetime_strings.c
@@ -363,7 +363,8 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
  *           to be cast to the 'unit' parameter.
  *
  * 'out' gets filled with the parsed date-time.
- * 'out_local' gets whether returned value contains timezone. 0 for UTC, 1 for local time.
+ * 'out_local' gets set to 1 if the parsed time contains timezone, 
+ *      to 0 otherwise.
  * 'out_tzoffset' gets set to timezone offset by minutes
  *      if the parsed time was in local time,
  *      to 0 otherwise. The values 'now' and 'today' don't get counted
@@ -785,11 +786,15 @@ parse_iso_8601_datetime(char *str, int len,
 
     /* UTC specifier */
     if (*substr == 'Z') {
-        /* "Z" means not local */
+        /* "Z" should be equivalent to tz offset "+00:00" */
         if (out_local != NULL) {
-            *out_local = 0;
+            *out_local = 1;
         }
 
+        if (out_tzoffset != NULL) {
+            *out_tzoffset = 0;
+         }
+
         if (sublen == 1) {
             goto finish;
         }