From 053c66923b84af19524097f092b0ea61073f105c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Mar 2018 13:50:50 -0500
Subject: [PATCH 1/6] REF: Base class for array

---
 cyberpandas/__init__.py |   1 -
 cyberpandas/base.py     | 132 +++++++++++++
 cyberpandas/ip_array.py | 423 +++++++++++++---------------------------
 3 files changed, 267 insertions(+), 289 deletions(-)
 create mode 100644 cyberpandas/base.py

diff --git a/cyberpandas/__init__.py b/cyberpandas/__init__.py
index c9f2712..c617d35 100644
--- a/cyberpandas/__init__.py
+++ b/cyberpandas/__init__.py
@@ -4,7 +4,6 @@
     IPType,
     IPArray,
     IPAccessor,
-    IPAddressIndex,
 )
 from .parser import to_ipaddress  # noqa
 
diff --git a/cyberpandas/base.py b/cyberpandas/base.py
new file mode 100644
index 0000000..429cb96
--- /dev/null
+++ b/cyberpandas/base.py
@@ -0,0 +1,132 @@
+import operator
+
+import numpy as np
+
+import pandas as pd
+from pandas.core.arrays import ExtensionArray
+
+from ._utils import refactorize
+
+
+class NumPyBackedExtensionArrayMixin(ExtensionArray):
+    @property
+    def dtype(self):
+        """The dtype for this extension array, IPType"""
+        return self._dtype
+
+    @classmethod
+    def _constructor_from_sequence(cls, scalars):
+        return cls(scalars)
+
+    @property
+    def shape(self):
+        return (len(self.data),)
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, *args):
+        result = operator.getitem(self.data, *args)
+        if isinstance(result, tuple):
+            return self._box_scalar(result)
+        elif result.ndim == 0:
+            return self._box_scalar(result.item())
+        else:
+            return type(self)(result)
+
+    def setitem(self, indexer, value):
+        """Set the 'value' inplace.
+        """
+        # I think having a separate than __setitem__ is good
+        # since we have to return here, but __setitem__ doesn't.
+        self[indexer] = value
+        return self
+
+    @property
+    def nbytes(self):
+        return self._itemsize * len(self)
+
+    def _formatting_values(self):
+        return np.array(self._format_values(), dtype='object')
+
+    def copy(self, deep=False):
+        return type(self)(self.data.copy())
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        return cls(np.concatenate([array.data for array in to_concat]))
+
+    def tolist(self):
+        return self.data.tolist()
+
+    def argsort(self, axis=-1, kind='quicksort', order=None):
+        return self.data.argsort()
+
+    def unique(self):
+        # type: () -> ExtensionArray
+        # https://github.com/pandas-dev/pandas/pull/19869
+        _, indices = np.unique(self.data, return_index=True)
+        data = self.data.take(np.sort(indices))
+        return self._from_ndarray(data)
+
+    def factorize(self, na_sentinel=-1):
+        """Factorize an IPArray into integer labels and unique values.
+
+        Calling :meth:`pandas.Series.factorize` or :meth:`pandas.factorize`
+        will dispatch to this method.
+
+        Parameters
+        ----------
+        na_sentinel : int, default -1
+            The value in `labels` to use for indicating missing values in
+            `self`.
+
+        Returns
+        -------
+        labels : ndarray
+            An integer-type ndarray the same length as `self`. Each newly-
+            observed value in `self` will be assigned the next integer.
+            Missing values in self are assigned `na_sentinel`.
+        uniques : IPArray
+            The unique values in `self` in order of appereance, not including
+            the missing value ``IPv4Address('0.0.0.0')``.
+
+        See Also
+        --------
+        pandas.factorize, pandas.Series.factorize
+
+        Examples
+        --------
+        >>> arr = IPArray([2, 2, 0, 1, 2, 2**64 + 1])
+        >>> arr
+        IPArray(['0.0.0.2', '0.0.0.2', '0.0.0.0', '0.0.0.1',
+                 '0.0.0.2', '::1:0:0:0:1'])
+
+        >>> labels, uniques = arr.factorize()
+        >>> labels
+        array([ 0,  0, -1,  1,  0,  2])
+
+        Notice that `uniques` does not include the missing value.
+        >>> uniques
+        IPArray(['0.0.0.2', '0.0.0.1', '::1:0:0:0:1'])
+        """
+        # OK, so here's the plan.
+        # Start with factorizing `self.data`, which has two unfortunate issues
+        # 1. Requires casting to object.
+        # 2. Gets the NA logic wrong, since (0, 0) isn't NA to pandas.
+        # For now, we can't help with 1. Maybe someday.
+        # For 2, we can "fix" things with a little post-factorization cleanup.
+        l, u = pd.factorize(self.data)
+        mask = self.isna()
+        any_na = mask.any()
+
+        if any_na:
+            first_na = mask.argmax()
+            refactorize(l, first_na, na_sentinel=na_sentinel)  # inplace op
+
+        # u is an ndarray of tuples. Go to our record type, then an IPArray
+        u2 = type(self)((u.astype(self.dtype._record_type)))
+        # May have a missing value.
+        if any_na:
+            u2 = u2[~u2.isna()]
+        return l, u2
diff --git a/cyberpandas/ip_array.py b/cyberpandas/ip_array.py
index f37e54c..baf592e 100644
--- a/cyberpandas/ip_array.py
+++ b/cyberpandas/ip_array.py
@@ -7,12 +7,12 @@
 import numpy as np
 import pandas as pd
 # TODO: public API
-from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.dtypes import ExtensionDtype
 
 from ._accessor import (DelegatedMethod, DelegatedProperty,
                         delegated_method)
 from ._utils import combine, pack, unpack, refactorize
+from .base import NumPyBackedExtensionArrayMixin
 from .common import _U8_MAX, _IPv4_MAX
 from .parser import _to_ipaddress_pyint, _as_ip_object
 
@@ -51,7 +51,7 @@ def construct_from_string(cls, string):
 # -----------------------------------------------------------------------------
 
 
-class IPArray(ExtensionArray):
+class IPArray(NumPyBackedExtensionArrayMixin):
     """Holder for IP Addresses."""
     # A note on the internal data layout. IPv6 addresses require 128 bits,
     # which is more than a uint64 can store. So we use a NumPy structured array
@@ -60,6 +60,7 @@ class IPArray(ExtensionArray):
     # all IP traffic is big-endian.
     __array_priority__ = 1000
     _dtype = IPType()
+    _itemsize = 16
     ndim = 1
     can_hold_na = True
 
@@ -69,123 +70,6 @@ def __init__(self, values):
         values = _to_ip_array(values)  # TODO: avoid potential copy
         self.data = values
 
-    @classmethod
-    def _constructor_from_sequence(cls, scalars):
-        return cls(scalars)
-
-    # -------------------------------------------------------------------------
-    # Pandas Interface
-    # -------------------------------------------------------------------------
-    @property
-    def dtype(self):
-        """The dtype for this extension array, IPType"""
-        return self._dtype
-
-    @property
-    def shape(self):
-        """A length-tuple with the length of the array."""
-        return (len(self.data),)
-
-    @property
-    def nbytes(self):
-        """The number of bytes taken to store this array.
-
-        It takes 16 bytes to store each addresses.
-        """
-        return 16 * len(self)
-
-    def take(self, indexer, allow_fill=True, fill_value=None):
-        mask = indexer == -1
-        result = self.data.take(indexer)
-        result[mask] = unpack(pack(int(self.na_value)))
-        return type(self)(result)
-
-    def _formatting_values(self):
-        return np.array(self._format_values(), dtype='object')
-
-    @classmethod
-    def _concat_same_type(cls, to_concat):
-        return cls(np.concatenate([array.data for array in to_concat]))
-
-    def take_nd(self, indexer, allow_fill=True, fill_value=None):
-        return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value)
-
-    def copy(self, deep=False):
-        return type(self)(self.data.copy())
-
-    # -------------------------------------------------------------------------
-    # Iterator / Sequence interface
-    # -------------------------------------------------------------------------
-    def __len__(self):
-        return len(self.data)
-
-    def __getitem__(self, *args):
-        result = operator.getitem(self.data, *args)
-        if isinstance(result, tuple):
-            return ipaddress.ip_address(combine(*result))
-        elif isinstance(result, np.void):
-            result = result.item()
-            return ipaddress.ip_address(combine(*result))
-        else:
-            return type(self)(result)
-
-    def __setitem__(self, key, value):
-        from .parser import to_ipaddress
-
-        value = to_ipaddress(value).data
-        self.data[key] = value
-
-    def __iter__(self):
-        return iter(self.to_pyipaddress())
-
-    @property
-    def na_value(self):
-        return self.dtype.na_value
-
-    def to_pyipaddress(self):
-        import ipaddress
-        return [ipaddress.ip_address(x) for x in self._format_values()]
-
-    def to_pyints(self):
-        return [combine(*map(int, x)) for x in self.data]
-
-    def to_bytes(self):
-        """Serialize the IPArray as a Python bytestring.
-
-        Examples
-        --------
-        >>> arr = IPArray([10, 20])
-        >>> arr.to_bytes()
-        b'\x00\x00\...x00\x02'
-        """
-        return self.data.tobytes()
-
-    def __repr__(self):
-        formatted = self._format_values()
-        return "IPArray({!r})".format(formatted)
-
-    def _format_values(self):
-        formatted = []
-        # TODO: perf
-        for i in range(len(self)):
-            hi, lo = self.data[i]
-            if lo == -1:
-                formatted.append("NA")
-            elif hi == 0 and lo <= _IPv4_MAX:
-                formatted.append(ipaddress.IPv4Address._string_from_ip_int(
-                    int(lo)))
-            elif hi == 0:
-                formatted.append(ipaddress.IPv6Address._string_from_ip_int(
-                    int(lo)))
-            else:
-                # TODO:
-                formatted.append(ipaddress.IPv6Address._string_from_ip_int(
-                    (int(hi) << 64) + int(lo)))
-        return formatted
-
-    def tolist(self):
-        return self.data.tolist()
-
     @classmethod
     def from_pyints(cls, values):
         # type: T.Sequence[int]) -> 'IPArray'
@@ -244,6 +128,90 @@ def _from_ndarray(cls, data, copy=False):
         new.data = data
         return new
 
+    # -------------------------------------------------------------------------
+    # Properties
+    # -------------------------------------------------------------------------
+    @property
+    def na_value(self):
+        return self.dtype.na_value
+
+    def take(self, indexer, allow_fill=True, fill_value=None):
+        mask = indexer == -1
+        result = self.data.take(indexer)
+        result[mask] = unpack(pack(int(self.na_value)))
+        return type(self)(result)  # TODO: check for copy
+
+    # -------------------------------------------------------------------------
+    # Interfaces
+    # -------------------------------------------------------------------------
+
+    def __repr__(self):
+        formatted = self._format_values()
+        return "IPArray({!r})".format(formatted)
+
+    def _format_values(self):
+        formatted = []
+        # TODO: perf
+        for i in range(len(self)):
+            hi, lo = self.data[i]
+            if lo == -1:
+                formatted.append("NA")
+            elif hi == 0 and lo <= _IPv4_MAX:
+                formatted.append(ipaddress.IPv4Address._string_from_ip_int(
+                    int(lo)))
+            elif hi == 0:
+                formatted.append(ipaddress.IPv6Address._string_from_ip_int(
+                    int(lo)))
+            else:
+                # TODO:
+                formatted.append(ipaddress.IPv6Address._string_from_ip_int(
+                    (int(hi) << 64) + int(lo)))
+        return formatted
+
+    @staticmethod
+    def _box_scalar(scalar):
+        return ipaddress.ip_address(combine(*scalar))
+
+    @property
+    def _parser(self):
+        from .parser import to_ipaddress
+        return to_ipaddress
+
+    def __setitem__(self, key, value):
+        from .parser import to_ipaddress
+
+        value = to_ipaddress(value).data
+        self.data[key] = value
+
+    def __iter__(self):
+        return iter(self.to_pyipaddress())
+
+    # ------------------------------------------------------------------------
+    # Serializaiton / Export
+    # ------------------------------------------------------------------------
+
+    def to_pyipaddress(self):
+        import ipaddress
+        return [ipaddress.ip_address(x) for x in self._format_values()]
+
+    def to_pyints(self):
+        return [combine(*map(int, x)) for x in self.data]
+
+    def to_bytes(self):
+        """Serialize the IPArray as a Python bytestring.
+
+        Examples
+        --------
+        >>> arr = IPArray([10, 20])
+        >>> arr.to_bytes()
+        b'\x00\x00\...x00\x02'
+        """
+        return self.data.tobytes()
+
+    # ------------------------------------------------------------------------
+    # Ops
+    # ------------------------------------------------------------------------
+
     def __eq__(self, other):
         # TDOO: scalar ipaddress
         if not isinstance(other, IPArray):
@@ -293,69 +261,6 @@ def isna(self):
         ips = self.data
         return (ips['lo'] == 0) & (ips['hi'] == 0)
 
-    def argsort(self, axis=-1, kind='quicksort', order=None):
-        return self.data.argsort()
-
-    @property
-    def is_ipv4(self):
-        # TODO: NA should be NA
-        ips = self.data
-        return (ips['hi'] == 0) & (ips['lo'] < _U8_MAX)
-
-    @property
-    def is_ipv6(self):
-        ips = self.data
-        return (ips['hi'] > 0) | (ips['lo'] > _U8_MAX)
-
-    @property
-    def version(self):
-        return np.where(self.is_ipv4, 4, 6)
-
-    @property
-    def is_multicast(self):
-        pyips = self.to_pyipaddress()
-        return np.array([ip.is_multicast for ip in pyips])
-
-    @property
-    def is_private(self):
-        pyips = self.to_pyipaddress()
-        return np.array([ip.is_private for ip in pyips])
-
-    @property
-    def is_global(self):
-        pyips = self.to_pyipaddress()
-        return np.array([ip.is_global for ip in pyips])
-
-    @property
-    def is_unspecified(self):
-        pyips = self.to_pyipaddress()
-        return np.array([ip.is_unspecified for ip in pyips])
-
-    @property
-    def is_reserved(self):
-        pyips = self.to_pyipaddress()
-        return np.array([ip.is_reserved for ip in pyips])
-
-    @property
-    def is_loopback(self):
-        pyips = self.to_pyipaddress()
-        return np.array([ip.is_loopback for ip in pyips])
-
-    @property
-    def is_link_local(self):
-        pyips = self.to_pyipaddress()
-        return np.array([ip.is_link_local for ip in pyips])
-
-    @property
-    def packed(self):
-        """Bytestring of the IP addresses
-
-        Each address takes 16 bytes. IPv4 addresses are prefixed
-        by zeros.
-        """
-        # TODO: I wonder if that should be post-fixed by 0s.
-        return self.data.tobytes()
-
     def isin(self, other):
         """Check whether elements of 'self' are in 'other'.
 
@@ -445,127 +350,69 @@ def _isin_addresses(self, other):
         # TODO(factorize): replace this
         return isin(self, other)
 
-    def setitem(self, indexer, value):
-        """Set the 'value' inplace.
-        """
-        # I think having a separate than __setitem__ is good
-        # since we have to return here, but __setitem__ doesn't.
-        self[indexer] = value
-        return self
+    # ------------------------------------------------------------------------
+    # IP Specific
+    # ------------------------------------------------------------------------
 
     @property
-    def index_type(self):
-        return IPAddressIndex
-
-    def unique(self):
-        # type: () -> ExtensionArray
-        # https://github.com/pandas-dev/pandas/pull/19869
-        _, indices = np.unique(self.data, return_index=True)
-        data = self.data.take(np.sort(indices))
-        return self._from_ndarray(data)
-
-    def factorize(self, na_sentinel=-1):
-        """Factorize an IPArray into integer labels and unique values.
-
-        Calling :meth:`pandas.Series.factorize` or :meth:`pandas.factorize`
-        will dispatch to this method.
+    def is_ipv4(self):
+        # TODO: NA should be NA
+        ips = self.data
+        return (ips['hi'] == 0) & (ips['lo'] < _U8_MAX)
 
-        Parameters
-        ----------
-        na_sentinel : int, default -1
-            The value in `labels` to use for indicating missing values in
-            `self`.
+    @property
+    def is_ipv6(self):
+        ips = self.data
+        return (ips['hi'] > 0) | (ips['lo'] > _U8_MAX)
 
-        Returns
-        -------
-        labels : ndarray
-            An integer-type ndarray the same length as `self`. Each newly-
-            observed value in `self` will be assigned the next integer.
-            Missing values in self are assigned `na_sentinel`.
-        uniques : IPArray
-            The unique values in `self` in order of appereance, not including
-            the missing value ``IPv4Address('0.0.0.0')``.
+    @property
+    def version(self):
+        return np.where(self.is_ipv4, 4, 6)
 
-        See Also
-        --------
-        pandas.factorize, pandas.Series.factorize
+    @property
+    def is_multicast(self):
+        pyips = self.to_pyipaddress()
+        return np.array([ip.is_multicast for ip in pyips])
 
-        Examples
-        --------
-        >>> arr = IPArray([2, 2, 0, 1, 2, 2**64 + 1])
-        >>> arr
-        IPArray(['0.0.0.2', '0.0.0.2', '0.0.0.0', '0.0.0.1',
-                 '0.0.0.2', '::1:0:0:0:1'])
-
-        >>> labels, uniques = arr.factorize()
-        >>> labels
-        array([ 0,  0, -1,  1,  0,  2])
-
-        Notice that `uniques` does not include the missing value.
-        >>> uniques
-        IPArray(['0.0.0.2', '0.0.0.1', '::1:0:0:0:1'])
-        """
-        # OK, so here's the plan.
-        # Start with factorizing `self.data`, which has two unfortunate issues
-        # 1. Requires casting to object.
-        # 2. Gets the NA logic wrong, since (0, 0) isn't NA to pandas.
-        # For now, we can't help with 1. Maybe someday.
-        # For 2, we can "fix" things with a little post-factorization cleanup.
-        l, u = pd.factorize(self.data)
-        mask = self.isna()
-        any_na = mask.any()
-
-        if any_na:
-            first_na = mask.argmax()
-            refactorize(l, first_na, na_sentinel=na_sentinel)  # inplace op
-
-        # u is an ndarray of tuples. Go to our record type, then an IPArray
-        u2 = type(self)((u.astype(self.dtype._record_type)))
-        # May have a missing value.
-        if any_na:
-            u2 = u2[~u2.isna()]
-        return l, u2
-
-
-# -----
-# Index
-# -----
-
-class IPAddressIndex(pd.Index):
-    _typ = 'ipaddressindex'
-    _attributes = ['name']
-    _holder = IPArray
-
-    def __new__(cls, data=None, name=None):
-        from .parser import _to_ip_array
+    @property
+    def is_private(self):
+        pyips = self.to_pyipaddress()
+        return np.array([ip.is_private for ip in pyips])
 
-        if data is None:
-            data = []
+    @property
+    def is_global(self):
+        pyips = self.to_pyipaddress()
+        return np.array([ip.is_global for ip in pyips])
 
-        data = _to_ip_array(data)
-        return cls._simple_new(data, name=name)
+    @property
+    def is_unspecified(self):
+        pyips = self.to_pyipaddress()
+        return np.array([ip.is_unspecified for ip in pyips])
 
-    @classmethod
-    def _simple_new(cls, data, name=None):
-        result = object.__new__(cls)
-        values = cls._holder(data)
-        result._data = values
-        result._name = name
-        result._reset_identity()
-        return result
+    @property
+    def is_reserved(self):
+        pyips = self.to_pyipaddress()
+        return np.array([ip.is_reserved for ip in pyips])
 
-    def __repr__(self):
-        tpl = 'IPAddressIndex({})'
-        return tpl.format(self._data._format_values())
+    @property
+    def is_loopback(self):
+        pyips = self.to_pyipaddress()
+        return np.array([ip.is_loopback for ip in pyips])
 
     @property
-    def inferred_type(self):
-        return self._typ
+    def is_link_local(self):
+        pyips = self.to_pyipaddress()
+        return np.array([ip.is_link_local for ip in pyips])
 
     @property
-    def values(self):
-        return self._data
+    def packed(self):
+        """Bytestring of the IP addresses
 
+        Each address takes 16 bytes. IPv4 addresses are prefixed
+        by zeros.
+        """
+        # TODO: I wonder if that should be post-fixed by 0s.
+        return self.data.tobytes()
 
 # -----------------------------------------------------------------------------
 # Accessor

From a7a4c6f9d8629043b250434089d0cd94cf91490f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Mar 2018 13:53:30 -0500
Subject: [PATCH 2/6] TST: Reorganize module

---
 tests/ip/__init__.py                          |  0
 {cyberpandas => tests/ip}/test_dtypes.py      |  0
 {cyberpandas => tests/ip}/test_interface.py   |  0
 {cyberpandas => tests/ip}/test_ip.py          |  0
 {cyberpandas => tests/ip}/test_ip_pandas.py   | 26 -------------------
 .../ip}/test_pandas_methods.py                |  0
 {cyberpandas => tests/ip}/test_parser.py      |  0
 7 files changed, 26 deletions(-)
 create mode 100644 tests/ip/__init__.py
 rename {cyberpandas => tests/ip}/test_dtypes.py (100%)
 rename {cyberpandas => tests/ip}/test_interface.py (100%)
 rename {cyberpandas => tests/ip}/test_ip.py (100%)
 rename {cyberpandas => tests/ip}/test_ip_pandas.py (82%)
 rename {cyberpandas => tests/ip}/test_pandas_methods.py (100%)
 rename {cyberpandas => tests/ip}/test_parser.py (100%)

diff --git a/tests/ip/__init__.py b/tests/ip/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/cyberpandas/test_dtypes.py b/tests/ip/test_dtypes.py
similarity index 100%
rename from cyberpandas/test_dtypes.py
rename to tests/ip/test_dtypes.py
diff --git a/cyberpandas/test_interface.py b/tests/ip/test_interface.py
similarity index 100%
rename from cyberpandas/test_interface.py
rename to tests/ip/test_interface.py
diff --git a/cyberpandas/test_ip.py b/tests/ip/test_ip.py
similarity index 100%
rename from cyberpandas/test_ip.py
rename to tests/ip/test_ip.py
diff --git a/cyberpandas/test_ip_pandas.py b/tests/ip/test_ip_pandas.py
similarity index 82%
rename from cyberpandas/test_ip_pandas.py
rename to tests/ip/test_ip_pandas.py
index 9065f65..757fba5 100644
--- a/cyberpandas/test_ip_pandas.py
+++ b/tests/ip/test_ip_pandas.py
@@ -66,24 +66,6 @@ def test_dataframe_from_series():
     assert isinstance(result.dtypes['A'], ip.IPType)
 
 
-def test_index_constructor():
-    result = ip.IPAddressIndex([0, 1, 2])
-    assert isinstance(result, ip.IPAddressIndex)
-    assert result._data.equals(ip.IPArray([0, 1, 2]))
-    if six.PY2:
-        assert repr(result) == ("IPAddressIndex([u'0.0.0.0', u'0.0.0.1', "
-                                "u'0.0.0.2'])")
-    else:
-        assert repr(result) == ("IPAddressIndex(['0.0.0.0', '0.0.0.1', "
-                                "'0.0.0.2'])")
-
-
-@pytest.mark.xfail(reason="ExtensionIndex not implemented")
-def test_series_with_index():
-    ser = pd.Series([1, 2, 3], index=ip.IPAddressIndex([0, 1, 2]))
-    repr(ser)
-
-
 def test_getitem_scalar():
     ser = pd.Series(ip.IPArray([0, 1, 2]))
     result = ser[1]
@@ -109,14 +91,6 @@ def test_setitem_scalar():
 # --------------
 
 
-@pytest.mark.xfail(reason="upstream")
-def test_value_counts():
-    result = pd.Series(ip.IPArray([1, 1, 2, 3, 3, 3])).value_counts()
-    expected = pd.Series([3, 2, 1],
-                         index=ip.IPAddressIndex([3, 1, 2]))
-    tm.assert_series_equal(result, expected)
-
-
 @given(lists(integers(min_value=1, max_value=2**128 - 1)))
 def test_argsort(ints):
     pass
diff --git a/cyberpandas/test_pandas_methods.py b/tests/ip/test_pandas_methods.py
similarity index 100%
rename from cyberpandas/test_pandas_methods.py
rename to tests/ip/test_pandas_methods.py
diff --git a/cyberpandas/test_parser.py b/tests/ip/test_parser.py
similarity index 100%
rename from cyberpandas/test_parser.py
rename to tests/ip/test_parser.py

From 21fbcfffd43add66f6aa61e109006176cc0fde76 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Mar 2018 13:53:50 -0500
Subject: [PATCH 3/6] ENH: Added MAC address

---
 cyberpandas/mac_array.py    | 126 ++++++++++++++++++++++++++++++++++++
 tests/__init__.py           |   0
 tests/mac/__init__.py       |   0
 tests/mac/test_interface.py |  97 +++++++++++++++++++++++++++
 4 files changed, 223 insertions(+)
 create mode 100644 cyberpandas/mac_array.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/mac/__init__.py
 create mode 100644 tests/mac/test_interface.py

diff --git a/cyberpandas/mac_array.py b/cyberpandas/mac_array.py
new file mode 100644
index 0000000..3f4da37
--- /dev/null
+++ b/cyberpandas/mac_array.py
@@ -0,0 +1,126 @@
+import numpy as np
+
+from pandas.core.dtypes.dtypes import ExtensionDtype
+
+from .base import NumPyBackedExtensionArrayMixin
+
+
+class MACType(ExtensionDtype):
+    """Dtype for MAC Address Data."""
+    name = 'mac'
+    type = int
+    kind = 'u'
+    na_value = 0  # TODO: Check this.
+
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from "
+                            "'{}'".format(cls, string))
+
+
+class MACArray(NumPyBackedExtensionArrayMixin):
+    """Array for MAC Address data.
+
+    * https://en.wikipedia.org/wiki/MAC_address
+    * https://tools.ietf.org/html/rfc5342
+    """
+    # What type(s) do we support?
+    # MAC-48 or EUI-64?
+    _dtype = MACType()
+    _itemsize = 8
+    ndim = 1
+    can_hold_na = True
+
+    def __init__(self, values, copy=True):
+        # TODO: parse hex / strings
+        self.data = np.array(values, dtype='uint64', copy=copy)
+
+    @classmethod
+    def _from_ndarray(cls, data, copy=False):
+        return cls(data, copy=copy)
+
+    @property
+    def na_value(self):
+        return self.dtype.na_value
+
+    def __repr__(self):
+        formatted = self._format_values()
+        return "MACArray({!r})".format(formatted)
+
+    def _format_values(self):
+        return [_format(x) for x in self.data]
+
+    @staticmethod
+    def _box_scalar(scalar):
+        return scalar
+
+    def __setitem__(self, key, value):
+        from .parser import to_ipaddress
+
+        value = to_ipaddress(value).data
+        self.data[key] = value
+
+    def __iter__(self):
+        return iter(self.data.tolist())
+
+    def __lt__(self, other):
+        return self.data < other
+
+    def __le__(self, other):
+        return self.data <= other
+
+    def __eq__(self, other):
+        return self.data == other
+
+    def __ge__(self, other):
+        return other <= self
+
+    def __gt__(self, other):
+        return other < self
+
+    def equals(self, other):
+        if not isinstance(other, type(self)):
+            raise TypeError
+        return (self.data == other.data).all()
+
+    def isna(self):
+        return (self.data == 0)
+
+    @property
+    def _parser(self):
+        return lambda x: x
+
+    def take(self, indexer, allow_fill=True, fill_value=None):
+        mask = indexer == -1
+        result = self.data.take(indexer)
+        result[mask] = self.dtype.na_value
+        return type(self)(result, copy=False)
+
+    def _formatting_values(self):
+        return np.array(self._format_values(), dtype='object')
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        return cls(np.concatenate([array.data for array in to_concat]))
+
+    def take_nd(self, indexer, allow_fill=True, fill_value=None):
+        return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value)
+
+    def copy(self, deep=False):
+        return type(self)(self.data.copy())
+
+
+def _format(mac):
+    # https://stackoverflow.com/a/36883363/1889400
+    mac_hex = "{:012x}".format(mac)
+    mac_str = ":".join(mac_hex[i:i+2] for i in range(0, len(mac_hex), 2))
+    return mac_str
+
+
+def _parse(mac):
+    # https://stackoverflow.com/a/36883363/1889400
+    mac_int = int(mac.replace(":", "").replace("-", ""), 16)
+    return mac_int
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/mac/__init__.py b/tests/mac/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/mac/test_interface.py b/tests/mac/test_interface.py
new file mode 100644
index 0000000..d3e2892
--- /dev/null
+++ b/tests/mac/test_interface.py
@@ -0,0 +1,97 @@
+import pytest
+
+from pandas.tests.extension import base
+
+from cyberpandas.mac_array import MACArray, MACType
+
+
+
+@pytest.fixture
+def dtype():
+    return MACType()
+
+
+@pytest.fixture
+def data():
+    return MACArray(list(range(100)))
+
+
+@pytest.fixture
+def data_missing():
+    return MACArray([0, 1])
+
+
+@pytest.fixture(params=['data', 'data_missing'])
+def all_data(request, data, data_missing):
+    """Parametrized fixture giving 'data' and 'data_missing'"""
+    if request.param == 'data':
+        return data
+    elif request.param == 'data_missing':
+        return data_missing
+
+
+@pytest.fixture
+def data_for_sorting():
+    return MACArray([10, 2 ** 64 + 1, 1])
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return MACArray([2 ** 64 + 1, 0, 1])
+
+
+@pytest.fixture
+def data_for_grouping():
+    b = 1
+    a = 2 ** 32 + 1
+    c = 2 ** 32 + 10
+    return MACArray([
+        b, b, 0, 0, a, a, b, c
+    ])
+
+
+@pytest.fixture
+def na_cmp():
+    """Binary operator for comparing NA values.
+
+    Should return a function of two arguments that returns
+    True if both arguments are (scalar) NA for your type.
+
+    By defult, uses ``operator.or``
+    """
+    return lambda x, y: int(x) == int(y) == 0
+
+
+@pytest.fixture
+def na_value():
+    return MACType.na_value
+
+
+class TestDtype(base.BaseDtypeTests):
+    pass
+
+
+class TestInterface(base.BaseInterfaceTests):
+    pass
+
+
+class TestConstructors(base.BaseConstructorsTests):
+    pass
+
+
+class TestReshaping(base.BaseReshapingTests):
+    pass
+
+
+class TestGetitem(base.BaseGetitemTests):
+    pass
+
+
+class TestMissing(base.BaseMissingTests):
+    pass
+
+
+class TestMethods(base.BaseMethodsTests):
+    @pytest.mark.xfail(reason='upstream')
+    def test_value_counts(data, dropna):
+        pass

From d23fe9006ac69618662f2d250d463c6af3ca148f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Mar 2018 13:55:07 -0500
Subject: [PATCH 4/6] Linting

---
 cyberpandas/ip_array.py     | 4 ++--
 tests/ip/test_ip_pandas.py  | 1 -
 tests/mac/test_interface.py | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/cyberpandas/ip_array.py b/cyberpandas/ip_array.py
index baf592e..682c5dc 100644
--- a/cyberpandas/ip_array.py
+++ b/cyberpandas/ip_array.py
@@ -1,7 +1,6 @@
 import abc
 import collections
 import ipaddress
-import operator
 
 import six
 import numpy as np
@@ -11,7 +10,7 @@
 
 from ._accessor import (DelegatedMethod, DelegatedProperty,
                         delegated_method)
-from ._utils import combine, pack, unpack, refactorize
+from ._utils import combine, pack, unpack
 from .base import NumPyBackedExtensionArrayMixin
 from .common import _U8_MAX, _IPv4_MAX
 from .parser import _to_ipaddress_pyint, _as_ip_object
@@ -414,6 +413,7 @@ def packed(self):
         # TODO: I wonder if that should be post-fixed by 0s.
         return self.data.tobytes()
 
+
 # -----------------------------------------------------------------------------
 # Accessor
 # -----------------------------------------------------------------------------
diff --git a/tests/ip/test_ip_pandas.py b/tests/ip/test_ip_pandas.py
index 757fba5..16bbb87 100644
--- a/tests/ip/test_ip_pandas.py
+++ b/tests/ip/test_ip_pandas.py
@@ -2,7 +2,6 @@
 """
 import ipaddress
 
-import six
 import pytest
 import numpy as np
 from hypothesis.strategies import integers, lists
diff --git a/tests/mac/test_interface.py b/tests/mac/test_interface.py
index d3e2892..8211a32 100644
--- a/tests/mac/test_interface.py
+++ b/tests/mac/test_interface.py
@@ -5,7 +5,6 @@
 from cyberpandas.mac_array import MACArray, MACType
 
 
-
 @pytest.fixture
 def dtype():
     return MACType()

From 9be182636f3ea8ba17d945826bc923a47bc28011 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Mar 2018 14:16:37 -0500
Subject: [PATCH 5/6] Update test commands

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 61c3cbf..3cb3a5b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,8 +21,8 @@ install:
 script:
   - echo "script start"
   - source activate test-environment
-  - pytest cyberpandas
-  - flake8 cyberpandas
+  - pytest
+  - flake8
   - source ./ci/build.sh
 
 after_success:

From 9ca0f3395ddeeec1ff818f06a4f29f5bac0f1542 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Mar 2018 11:28:46 -0500
Subject: [PATCH 6/6] COMPAT: long for py2

---
 cyberpandas/mac_array.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cyberpandas/mac_array.py b/cyberpandas/mac_array.py
index 3f4da37..d753839 100644
--- a/cyberpandas/mac_array.py
+++ b/cyberpandas/mac_array.py
@@ -1,4 +1,5 @@
 import numpy as np
+import six
 
 from pandas.core.dtypes.dtypes import ExtensionDtype
 
@@ -8,7 +9,8 @@
 class MACType(ExtensionDtype):
     """Dtype for MAC Address Data."""
     name = 'mac'
-    type = int
+    # type is long for Py2 and int for py3
+    type = six.integer_types[-1]
     kind = 'u'
     na_value = 0  # TODO: Check this.