diff --git a/cyberpandas/_accessor.py b/cyberpandas/_accessor.py index 8f5f281..646267d 100644 --- a/cyberpandas/_accessor.py +++ b/cyberpandas/_accessor.py @@ -2,7 +2,7 @@ def delegated_method(method, index, name, *args, **kwargs): - return pd.Series(method(*args, **kwargs), index, name) + return pd.Series(method(*args, **kwargs), index, name=name) class Delegated: @@ -16,7 +16,7 @@ def __get__(self, obj, type=None): index = object.__getattribute__(obj, '_index') name = object.__getattribute__(obj, '_name') result = self._get_result(obj) - return pd.Series(result, index, name) + return pd.Series(result, index, name=name) class DelegatedProperty(Delegated): diff --git a/cyberpandas/ip_array.py b/cyberpandas/ip_array.py index 59ab389..f3a64ea 100644 --- a/cyberpandas/ip_array.py +++ b/cyberpandas/ip_array.py @@ -52,7 +52,7 @@ def construct_from_string(cls, string): class IPArray(ExtensionArray): - """Holder for things""" + """Holder for IP Addresses.""" # A note on the internal data layout. IPv6 addresses require 128 bits, # which is more than a uint64 can store. So we use a NumPy structured array # with two fields, 'hi', 'lo' to store the data. Each field is a uint64. @@ -74,18 +74,21 @@ def __init__(self, values): # ------------------------------------------------------------------------- @property def dtype(self): + """The dtype for this extension array, IPType""" return self._dtype @property def shape(self): + """A length-tuple with the length of the array.""" return (len(self.data),) @property def nbytes(self): - return 2 * 64 * len(self) + """The number of bytes taken to store this array. - def view(self, dtype=None): - return self.data.view() + It takes 16 bytes to store each addresses. + """ + return 16 * len(self) def take(self, indexer, allow_fill=True, fill_value=None): mask = indexer == -1 diff --git a/cyberpandas/test_ip_pandas.py b/cyberpandas/test_ip_pandas.py index c902884..d475a4d 100644 --- a/cyberpandas/test_ip_pandas.py +++ b/cyberpandas/test_ip_pandas.py @@ -143,6 +143,12 @@ def test_accessor_works(): s.ip.is_ipv4 +def test_accessor_frame(): + s = pd.DataFrame({"A": ip.IPArray([0, 1, 2, 3])}) + s['A'].ip.is_ipv4 + + + # --------- # Factorize # --------- diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..fb4c5dc --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,7 @@ +API +=== + +.. currentmodule:: cyberpandas + + +.. autoclass:: IPArray diff --git a/docs/source/conf.py b/docs/source/conf.py index 9dc3fd7..450cf87 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -33,9 +33,12 @@ # ones. extensions = [ 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'numpydoc', + 'IPython.sphinxext.ipython_console_highlighting', + 'IPython.sphinxext.ipython_directive', ] # Add any paths that contain templates here, relative to this directory. @@ -160,4 +163,14 @@ # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = { + 'python': ('https://docs.python.org/', None), + 'pandas': ('http://pandas-docs.github.io/pandas-docs-travis/', None), +} + + +ipython_execlines = [ + "import ipaddress", + "import pandas as pd", + "from cyberpandas import IPArray, to_ipaddress", +] diff --git a/docs/source/index.rst b/docs/source/index.rst index 1005881..18b9394 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -9,21 +9,6 @@ cyberpandas cyberpandas is a library for working with arrays of IP Addresses. It's specifically designed to work well with pandas. -Install -======= - -With conda - -.. code-block:: none - - conda install -c conda-forge cyberpandas - -Or pip - -.. code-block:: none - - pip install cyberpandas - Key Concepts ============ @@ -43,13 +28,24 @@ This is the container for your IPAddress data. Usage ----- -.. code-block:: python +.. ipython:: python - >>> from cyberpandas import IPArray - >>> import pandas as pd + from cyberpandas import IPArray + import pandas as pd - >>> arr = IPArray([0, 1, 2])0000 - >>> arr + arr = IPArray(['192.168.1.1', + '2001:0db8:85a3:0000:0000:8a2e:0370:7334']) + arr + +``IPArray`` is a container for both IPv4 and IPv6 addresses. It can in turn be +stored in pandas' containers: + +.. ipython:: python + + pd.Series(arr) + pd.DataFrame({"addresses": arr}) + +See :ref:`usage` for more. API === @@ -61,6 +57,11 @@ API :maxdepth: 2 :caption: Contents: + install.rst + usage.rst + api.rst + + Indices and tables diff --git a/docs/source/install.rst b/docs/source/install.rst new file mode 100644 index 0000000..4e761a7 --- /dev/null +++ b/docs/source/install.rst @@ -0,0 +1,10 @@ +Install +======= + +cyberpandas requires pandas 0.23 or newer, which is currently unreleased. + +Once pandas is installed, cyberpandas can be installed from PyPI + +.. code-block:: none + + pip install cyberpandas diff --git a/docs/source/usage.rst b/docs/source/usage.rst new file mode 100644 index 0000000..7d0eedf --- /dev/null +++ b/docs/source/usage.rst @@ -0,0 +1,135 @@ +.. _usage: + +Usage +===== + +This document describes how to use the methods and classes provided by +``cyberpandas``. + +We'll assume that the following imports have been performed. + +.. ipython:: python + + import ipaddress + import pandas as pd + from cyberpandas import IPArray, to_ipaddress + +Parsing +------- + +First, you'll need some IP Address data. Much like pandas' +:func:`pandas.to_datetime`, ``cyberpandas`` provides :func:`to_ipaddress` for +converting sequences of anything to a specialized array, :class:`IPArray` in +this case. + +From Strings +"""""""""""" + +:func:`to_ipaddress` can parse a sequence strings where each element represents +an IP address. + +.. ipython:: python + + to_ipaddress([ + '192.168.1.1', + '2001:0db8:85a3:0000:0000:8a2e:0370:7334', + ]) + +You can also parse a *container* of bytes (Python 2 parlance). + +.. ipython:: python + + to_ipaddress([ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc0\xa8\x01\x01', + b' \x01\r\xb8\x85\xa3\x00\x00\x00\x00\x8a.\x03ps4', + ]) + +If you have a buffer / bytestring, see :ref:`from_bytes`. + +From Integers +""""""""""""" + +IP Addresses are just integers, and :func:`to_ipaddress` can parse a sequence of +them. + +.. ipython:: python + + to_ipaddress([ + 3232235777, + 42540766452641154071740215577757643572 + ]) + +There's also the :meth:`IPArray.from_pyints` method that does the same thing. + +.. _from_bytes: + +From Bytes +"""""""""" + +If you have a correctly structured buffer of bytes or bytestring, you can +directly construct an ``IPArray`` without any intermediate copies. + +.. ipython:: python + + stream = (b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc0\xa8\x01' + b'\x01 \x01\r\xb8\x85\xa3\x00\x00\x00\x00\x8a.\x03ps4') + IPArray.from_bytes(stream) + +``stream`` is expected to be a sequence of bytes representing IP Addresses (note +that it's just a bytestring that's be split across two lines for readability). +Each IP Address should be 128 bits, left padded with 0s for IPv4 addresses. +In particular, :meth:`IPArray.to_bytes` produces such a sequence of bytes. + +Pandas Integration +------------------ + +``IPArray`` satisfies pandas extension array interface, which means that it can +safely be stored inside pandas' Series and DataFrame. + +.. ipython:: python + + values = to_ipaddress([ + 0, + 3232235777, + 42540766452641154071740215577757643572 + ]) + values + + ser = pd.Series(values) + ser + df = pd.DataFrame({"addresses": values}) + df + +Most pandas methods that make sense should work. The following section will call +out points of interest. + +Indexing +"""""""" + +If your selection returns a scalar, you get back an +:class:`ipaddress.IPv4Address` or :class:`ipaddress.IPv6Address`. + +.. ipython:: python + + ser[0] + df.loc[2, 'addresses'] + +Missing Data +"""""""""""" + +The address 0 (``0.0.0.0``) is used to represent missing values. + +.. ipython:: python + + ser.isna() + ser.dropna() + +IP Accessor +----------- + +``cyberpandas`` offers an accessor for IP-specific methods. + +.. ipython:: python + + ser.ip.isna + df['addresses'].ip.is_ipv6