Skip to content

Commit fa420ad

Browse files
committed
Fixed factorize for MACArray
Relies on pandas-dev/pandas#19957
1 parent 468644b commit fa420ad

File tree

4 files changed

+20
-71
lines changed

4 files changed

+20
-71
lines changed

cyberpandas/base.py

Lines changed: 4 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,8 @@
22

33
import numpy as np
44

5-
import pandas as pd
65
from pandas.core.arrays import ExtensionArray
76

8-
from ._utils import refactorize
9-
107

118
class NumPyBackedExtensionArrayMixin(ExtensionArray):
129
@property
@@ -18,6 +15,10 @@ def dtype(self):
1815
def _constructor_from_sequence(cls, scalars):
1916
return cls(scalars)
2017

18+
@classmethod
19+
def _from_factorized(cls, values, original):
20+
return cls(values)
21+
2122
@property
2223
def shape(self):
2324
return (len(self.data),)
@@ -68,65 +69,3 @@ def unique(self):
6869
_, indices = np.unique(self.data, return_index=True)
6970
data = self.data.take(np.sort(indices))
7071
return self._from_ndarray(data)
71-
72-
def factorize(self, na_sentinel=-1):
73-
"""Factorize an IPArray into integer labels and unique values.
74-
75-
Calling :meth:`pandas.Series.factorize` or :meth:`pandas.factorize`
76-
will dispatch to this method.
77-
78-
Parameters
79-
----------
80-
na_sentinel : int, default -1
81-
The value in `labels` to use for indicating missing values in
82-
`self`.
83-
84-
Returns
85-
-------
86-
labels : ndarray
87-
An integer-type ndarray the same length as `self`. Each newly-
88-
observed value in `self` will be assigned the next integer.
89-
Missing values in self are assigned `na_sentinel`.
90-
uniques : IPArray
91-
The unique values in `self` in order of appereance, not including
92-
the missing value ``IPv4Address('0.0.0.0')``.
93-
94-
See Also
95-
--------
96-
pandas.factorize, pandas.Series.factorize
97-
98-
Examples
99-
--------
100-
>>> arr = IPArray([2, 2, 0, 1, 2, 2**64 + 1])
101-
>>> arr
102-
IPArray(['0.0.0.2', '0.0.0.2', '0.0.0.0', '0.0.0.1',
103-
'0.0.0.2', '::1:0:0:0:1'])
104-
105-
>>> labels, uniques = arr.factorize()
106-
>>> labels
107-
array([ 0, 0, -1, 1, 0, 2])
108-
109-
Notice that `uniques` does not include the missing value.
110-
>>> uniques
111-
IPArray(['0.0.0.2', '0.0.0.1', '::1:0:0:0:1'])
112-
"""
113-
# OK, so here's the plan.
114-
# Start with factorizing `self.data`, which has two unfortunate issues
115-
# 1. Requires casting to object.
116-
# 2. Gets the NA logic wrong, since (0, 0) isn't NA to pandas.
117-
# For now, we can't help with 1. Maybe someday.
118-
# For 2, we can "fix" things with a little post-factorization cleanup.
119-
l, u = pd.factorize(self.data)
120-
mask = self.isna()
121-
any_na = mask.any()
122-
123-
if any_na:
124-
first_na = mask.argmax()
125-
refactorize(l, first_na, na_sentinel=na_sentinel) # inplace op
126-
127-
# u is an ndarray of tuples. Go to our record type, then an IPArray
128-
u2 = type(self)((u.astype(self.dtype._record_type)))
129-
# May have a missing value.
130-
if any_na:
131-
u2 = u2[~u2.isna()]
132-
return l, u2

cyberpandas/mac_array.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from collections import Iterable
2+
13
import numpy as np
24
import six
35

@@ -60,9 +62,7 @@ def _box_scalar(scalar):
6062
return scalar
6163

6264
def __setitem__(self, key, value):
63-
from .parser import to_ipaddress
64-
65-
value = to_ipaddress(value).data
65+
value = to_macaddress(value)
6666
self.data[key] = value
6767

6868
def __iter__(self):
@@ -126,3 +126,13 @@ def _parse(mac):
126126
# https://stackoverflow.com/a/36883363/1889400
127127
mac_int = int(mac.replace(":", "").replace("-", ""), 16)
128128
return mac_int
129+
130+
131+
def to_macaddress(addresses):
132+
if (isinstance(addresses, six.string_types) or
133+
not isinstance(addresses, Iterable)):
134+
addresses = [addresses]
135+
136+
addresses = [_parse(mac) if isinstance(mac, six.string_types) else mac
137+
for mac in addresses]
138+
return np.array(addresses, dtype='u8')

cyberpandas/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def _to_int_pairs(values):
5757
if isinstance(values, (str, bytes, int)):
5858
values = ipaddress.ip_address(values)._ip
5959
return unpack(pack(values))
60-
elif isinstance(values, np.ndarray):
60+
elif isinstance(values, np.ndarray) and values.dtype != object:
6161
if values.ndim != 2:
6262
raise ValueError("'values' should be a 2-D when passing a "
6363
"NumPy array.")

tests/mac/test_interface.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ def all_data(request, data, data_missing):
3131

3232
@pytest.fixture
3333
def data_for_sorting():
34-
return MACArray([10, 2 ** 64 + 1, 1])
34+
return MACArray([10, 2 ** 64 - 1, 1])
3535

3636

3737
@pytest.fixture
3838
def data_missing_for_sorting():
39-
return MACArray([2 ** 64 + 1, 0, 1])
39+
return MACArray([2 ** 64 - 1, 0, 1])
4040

4141

4242
@pytest.fixture

0 commit comments

Comments
 (0)