From 6bd6ddd02efa729ec302a0d5a36b6877f3a3af02 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 29 Jan 2018 14:06:45 +0700
Subject: [PATCH 01/11] BUG: don't assume series is length > 0

---
 doc/source/whatsnew/v0.23.0.txt           |  1 +
 pandas/_libs/lib.pyx                      |  3 ++-
 pandas/tests/sparse/series/test_series.py | 11 +++++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 1890636bc8e1a..8dc06b6819b3c 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -557,3 +557,4 @@ Other
 ^^^^^
 
 - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
+- Bug in `Series.memory_usage` which assumes series will always have more than one element (:issue:`19368`)
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index c3a654b01022c..b8ace1d2aa50d 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -67,7 +67,8 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
     cdef int64_t s = 0
 
     n = len(arr)
-    for i from 0 <= i < n:
+    # Hrm why was this 0 <= i < n
+    for i from 0 < i < n:
         s += arr[i].__sizeof__()
     return s
 
diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
index 2ea1e63433520..4e997b055f25b 100644
--- a/pandas/tests/sparse/series/test_series.py
+++ b/pandas/tests/sparse/series/test_series.py
@@ -971,6 +971,17 @@ def test_combine_first(self):
         tm.assert_sp_series_equal(result, result2)
         tm.assert_sp_series_equal(result, expected)
 
+    @pytest.mark.parametrize('deep,fill_values', [([True, False],
+                                                   [0, 1, np.nan, None])])
+    def test_memory_usage_deep(self, deep, fill_values):
+        for fv in fill_values:
+            sparse_series = SparseSeries(fill_values, fill_value=fv)
+            dense_series = Series(fill_values)
+            sparse_usage = sparse_series.memory_usage(deep=deep)
+            dense_usage = dense_series.memory_usage(deep=deep)
+
+            assert sparse_usage < dense_usage
+
 
 class TestSparseHandlingMultiIndexes(object):
 

From 7fdd03e946012bb1671a88a250eaf52f55e4ddde Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 29 Jan 2018 19:30:32 +0700
Subject: [PATCH 02/11] Take out comment and use product

---
 pandas/_libs/lib.pyx                      | 1 -
 pandas/tests/sparse/series/test_series.py | 5 +++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index b8ace1d2aa50d..8874a1a37c0ed 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -67,7 +67,6 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
     cdef int64_t s = 0
 
     n = len(arr)
-    # Hrm why was this 0 <= i < n
     for i from 0 < i < n:
         s += arr[i].__sizeof__()
     return s
diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
index 4e997b055f25b..4f0aae8d61c04 100644
--- a/pandas/tests/sparse/series/test_series.py
+++ b/pandas/tests/sparse/series/test_series.py
@@ -23,6 +23,7 @@
 from pandas.core.sparse.api import SparseSeries
 from pandas.tests.series.test_api import SharedWithSparse
 
+from itertools import product
 
 def _test_data1():
     # nan-based
@@ -974,8 +975,8 @@ def test_combine_first(self):
     @pytest.mark.parametrize('deep,fill_values', [([True, False],
                                                    [0, 1, np.nan, None])])
     def test_memory_usage_deep(self, deep, fill_values):
-        for fv in fill_values:
-            sparse_series = SparseSeries(fill_values, fill_value=fv)
+        for deep, fill_value in product(deep, fill_values):
+            sparse_series = SparseSeries(fill_values, fill_value=fill_value)
             dense_series = Series(fill_values)
             sparse_usage = sparse_series.memory_usage(deep=deep)
             dense_usage = dense_series.memory_usage(deep=deep)

From e25158713a2acb2132c97f45215ac113dc4d7091 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Fri, 2 Feb 2018 15:54:28 +0700
Subject: [PATCH 03/11] Fix failing test with indexing

---
 pandas/_libs/lib.pyx                      | 2 +-
 pandas/tests/sparse/series/test_series.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 87ce382584cf0..0954a74cb4a7b 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -60,7 +60,7 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
     cdef int64_t s = 0
 
     n = len(arr)
-    for i from 0 < i < n:
+    for i in range(n):
         s += arr[i].__sizeof__()
     return s
 
diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
index 4f0aae8d61c04..3f5d5a59cc540 100644
--- a/pandas/tests/sparse/series/test_series.py
+++ b/pandas/tests/sparse/series/test_series.py
@@ -25,6 +25,7 @@
 
 from itertools import product
 
+
 def _test_data1():
     # nan-based
     arr = np.arange(20, dtype=float)

From 5e59e9cbcc3653d0faa4505bad1a7ed8c3f797f8 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Fri, 2 Feb 2018 16:07:40 +0700
Subject: [PATCH 04/11] Use range over 0 <= for loops

---
 pandas/_libs/lib.pyx | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 0954a74cb4a7b..28b1b47e3bbcd 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -131,10 +131,10 @@ def fast_unique_multiple(list arrays):
         dict table = {}
         object val, stub = 0
 
-    for i from 0 <= i < k:
+    for i in range(k):
         buf = arrays[i]
         n = len(buf)
-        for j from 0 <= j < n:
+        for j in range(n):
             val = buf[j]
             if val not in table:
                 table[val] = stub
@@ -158,10 +158,10 @@ def fast_unique_multiple_list(list lists):
         dict table = {}
         object val, stub = 0
 
-    for i from 0 <= i < k:
+    for i in range(k):
         buf = lists[i]
         n = len(buf)
-        for j from 0 <= j < n:
+        for j in range(n):
             val = buf[j]
             if val not in table:
                 table[val] = stub
@@ -200,7 +200,7 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True):
 
     for buf in gen:
         n = len(buf)
-        for j from 0 <= j < n:
+        for j in range(n): 
             val = buf[j]
             if val not in table:
                 table[val] = stub
@@ -830,15 +830,15 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
     if axis == 0:
         counts = np.zeros((max_bin, k), dtype='i8')
         with nogil:
-            for i from 0 <= i < n:
-                for j from 0 <= j < k:
+            for i in range(n):
+                for j in range(n):
                     counts[labels[i], j] += mask[i, j]
 
     else:  # axis == 1
         counts = np.zeros((n, max_bin), dtype='i8')
         with nogil:
-            for i from 0 <= i < n:
-                for j from 0 <= j < k:
+            for i in range(n):
+                for j in range(k):
                     counts[i, labels[j]] += mask[i, j]
 
     return counts

From 3f52a44f6ffb3ae52c5fd5b029804228215bbcb2 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Fri, 2 Feb 2018 16:23:04 +0700
Subject: [PATCH 05/11] Ah ha I think I got it

---
 pandas/_libs/lib.pyx |  6 ++++++
 pandas/core/base.py  | 14 ++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 28b1b47e3bbcd..a4b36c880fe9f 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -59,6 +59,12 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
     cdef Py_ssize_t i, n
     cdef int64_t s = 0
 
+    # The problem here is that... 
+    # A SparseArray of size 1 that has fill_value = the only value
+    # will cause this
+
+    # n = 1
+    #   
     n = len(arr)
     for i in range(n):
         s += arr[i].__sizeof__()
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 54d25a16a10a3..8ee9aaa71f75f 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1069,12 +1069,18 @@ def memory_usage(self, deep=False):
         --------
         numpy.ndarray.nbytes
         """
-        if hasattr(self.values, 'memory_usage'):
-            return self.values.memory_usage(deep=deep)
+        # Use sparse values if they exist for memory consumption
+        if hasattr(self.values, 'sp_values'):
+            values = self.values.sp_values
+        else:
+            values = self.values
+
+        if hasattr(values, 'memory_usage'):
+            return values.memory_usage(deep=deep)
 
-        v = self.values.nbytes
+        v = values.nbytes
         if deep and is_object_dtype(self) and not PYPY:
-            v += lib.memory_usage_of_objects(self.values)
+            v += lib.memory_usage_of_objects(values)
         return v
 
     def factorize(self, sort=False, na_sentinel=-1):

From 21ae14707dfbadc365ce65769f3849dd62dee598 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 5 Feb 2018 10:19:16 +0700
Subject: [PATCH 06/11] FIX: revert change to lib.pyx

---
 pandas/_libs/lib.pyx | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index a4b36c880fe9f..e1d59f807a7fd 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -59,14 +59,8 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
     cdef Py_ssize_t i, n
     cdef int64_t s = 0
 
-    # The problem here is that... 
-    # A SparseArray of size 1 that has fill_value = the only value
-    # will cause this
-
-    # n = 1
-    #   
     n = len(arr)
-    for i in range(n):
+    for i from 0 <= i < n:
         s += arr[i].__sizeof__()
     return s
 
@@ -137,10 +131,10 @@ def fast_unique_multiple(list arrays):
         dict table = {}
         object val, stub = 0
 
-    for i in range(k):
+    for i from 0 <= i < k:
         buf = arrays[i]
         n = len(buf)
-        for j in range(n):
+        for j from 0 <= j < n:
             val = buf[j]
             if val not in table:
                 table[val] = stub
@@ -164,10 +158,10 @@ def fast_unique_multiple_list(list lists):
         dict table = {}
         object val, stub = 0
 
-    for i in range(k):
+    for i from 0 <= i < k:
         buf = lists[i]
         n = len(buf)
-        for j in range(n):
+        for j from 0 <= j < n:
             val = buf[j]
             if val not in table:
                 table[val] = stub
@@ -206,7 +200,7 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True):
 
     for buf in gen:
         n = len(buf)
-        for j in range(n): 
+        for j from 0 <= j < n:
             val = buf[j]
             if val not in table:
                 table[val] = stub
@@ -836,15 +830,15 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
     if axis == 0:
         counts = np.zeros((max_bin, k), dtype='i8')
         with nogil:
-            for i in range(n):
-                for j in range(n):
+            for i from 0 <= i < n:
+                for j from 0 <= j < k:
                     counts[labels[i], j] += mask[i, j]
 
     else:  # axis == 1
         counts = np.zeros((n, max_bin), dtype='i8')
         with nogil:
-            for i in range(n):
-                for j in range(k):
+            for i from 0 <= i < n:
+                for j from 0 <= j < k:
                     counts[i, labels[j]] += mask[i, j]
 
     return counts

From 207bc74d2e4825102b88bde86cecdd11c589adb4 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 5 Feb 2018 11:30:18 +0700
Subject: [PATCH 07/11] Define memory_usage on SparseArray

---
 pandas/core/base.py         |  8 ++++----
 pandas/core/sparse/array.py | 38 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 8ee9aaa71f75f..ebbc50795a3db 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1070,10 +1070,10 @@ def memory_usage(self, deep=False):
         numpy.ndarray.nbytes
         """
         # Use sparse values if they exist for memory consumption
-        if hasattr(self.values, 'sp_values'):
-            values = self.values.sp_values
-        else:
-            values = self.values
+        # if hasattr(self.values, 'sp_values'):
+        #    values = self.values.sp_values
+        # else:
+        values = self.values
 
         if hasattr(values, 'memory_usage'):
             return values.memory_usage(deep=deep)
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index fa07400a0706e..b3a2a0ac7a502 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -11,7 +11,7 @@
 from pandas.core.base import PandasObject
 
 from pandas import compat
-from pandas.compat import range
+from pandas.compat import range, PYPY
 from pandas.compat.numpy import function as nv
 
 from pandas.core.dtypes.generic import ABCSparseSeries
@@ -30,6 +30,7 @@
 from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype
 
 import pandas._libs.sparse as splib
+import pandas._libs.lib as lib
 from pandas._libs.sparse import SparseIndex, BlockIndex, IntIndex
 from pandas._libs import index as libindex
 import pandas.core.algorithms as algos
@@ -238,6 +239,41 @@ def kind(self):
         elif isinstance(self.sp_index, IntIndex):
             return 'integer'
 
+    def memory_usage(self, deep=False):
+        """Memory usage of SparseArray
+
+        Parameters
+        ----------
+        deep : bool
+            Introspect the data deeply, interrogate
+            `object` dtypes for system-level memory consumption
+
+        Returns
+        -------
+        scalar bytes of memory consumed
+
+        Notes
+        -----
+        Memory usage does not include memory of empty cells filled by
+        fill_value. And it does not include memory consumed by 
+        elements that are not components of the array if deep=False
+
+        See also
+        --------
+        Series.memory_usage
+        """
+
+        values = self.sp_values
+        if hasattr(values, 'memory_usage'):
+            return values.memory_usage(deep=deep)
+
+        v = values.nbytes
+
+        if deep and is_object_dtype(self) and not PYPY:
+            v += lib.memory_usage_of_objects(values)
+
+        return v
+
     def __array_wrap__(self, out_arr, context=None):
         """
         NumPy calls this method when ufunc is applied

From cdd4141e4137375053d68eba6ccab1eecb5c464e Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 5 Feb 2018 11:31:14 +0700
Subject: [PATCH 08/11] Fix linting error

---
 pandas/core/sparse/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index b3a2a0ac7a502..8d5a5e1dbfe1e 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -255,7 +255,7 @@ def memory_usage(self, deep=False):
         Notes
         -----
         Memory usage does not include memory of empty cells filled by
-        fill_value. And it does not include memory consumed by 
+        fill_value. And it does not include memory consumed by
         elements that are not components of the array if deep=False
 
         See also

From ae9f74d588fd34db80feacd21b630eba0599617b Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 5 Feb 2018 11:33:43 +0700
Subject: [PATCH 09/11] Revert base.py

---
 pandas/core/base.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index ebbc50795a3db..54d25a16a10a3 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1069,18 +1069,12 @@ def memory_usage(self, deep=False):
         --------
         numpy.ndarray.nbytes
         """
-        # Use sparse values if they exist for memory consumption
-        # if hasattr(self.values, 'sp_values'):
-        #    values = self.values.sp_values
-        # else:
-        values = self.values
+        if hasattr(self.values, 'memory_usage'):
+            return self.values.memory_usage(deep=deep)
 
-        if hasattr(values, 'memory_usage'):
-            return values.memory_usage(deep=deep)
-
-        v = values.nbytes
+        v = self.values.nbytes
         if deep and is_object_dtype(self) and not PYPY:
-            v += lib.memory_usage_of_objects(values)
+            v += lib.memory_usage_of_objects(self.values)
         return v
 
     def factorize(self, sort=False, na_sentinel=-1):

From 4ead141c0422200638a1729bf8a109c77411c645 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 5 Feb 2018 11:37:30 +0700
Subject: [PATCH 10/11] Move whatsnew doc to Sparse

---
 doc/source/whatsnew/v0.23.0.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 8579d16bcb140..e8d2ec5eb0d9e 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -555,7 +555,7 @@ Sparse
 
 - Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`)
 - Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`)
--
+- Bug in :class:`SparseArray.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`)
 
 Reshaping
 ^^^^^^^^^
@@ -591,4 +591,3 @@ Other
 ^^^^^
 
 - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
-- Bug in `Series.memory_usage` which assumes series will always have more than one element (:issue:`19368`)

From f9433d844ccdda495bc17aad2da23673020aa8dc Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 5 Feb 2018 19:38:53 +0700
Subject: [PATCH 11/11] Use shared docstring and get rid of if condition

---
 pandas/core/base.py         |  2 +-
 pandas/core/sparse/array.py | 28 ++--------------------------
 2 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 54d25a16a10a3..d5b204dba063e 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1048,7 +1048,7 @@ def is_monotonic_decreasing(self):
 
     def memory_usage(self, deep=False):
         """
-        Memory usage of my values
+        Memory usage of the values
 
         Parameters
         ----------
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 8d5a5e1dbfe1e..65aefd9fb8c0a 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -8,7 +8,7 @@
 import warnings
 
 import pandas as pd
-from pandas.core.base import PandasObject
+from pandas.core.base import PandasObject, IndexOpsMixin
 
 from pandas import compat
 from pandas.compat import range, PYPY
@@ -239,33 +239,9 @@ def kind(self):
         elif isinstance(self.sp_index, IntIndex):
             return 'integer'
 
+    @Appender(IndexOpsMixin.memory_usage.__doc__)
     def memory_usage(self, deep=False):
-        """Memory usage of SparseArray
-
-        Parameters
-        ----------
-        deep : bool
-            Introspect the data deeply, interrogate
-            `object` dtypes for system-level memory consumption
-
-        Returns
-        -------
-        scalar bytes of memory consumed
-
-        Notes
-        -----
-        Memory usage does not include memory of empty cells filled by
-        fill_value. And it does not include memory consumed by
-        elements that are not components of the array if deep=False
-
-        See also
-        --------
-        Series.memory_usage
-        """
-
         values = self.sp_values
-        if hasattr(values, 'memory_usage'):
-            return values.memory_usage(deep=deep)
 
         v = values.nbytes