Skip to content

Commit 4fb25a2

Browse files
authored
Add Result.to_df to export records as pandas DataFrame (#663)
The method has an `expand` option which will, when `True`,make the driver flatten nodes, relationships, lists, and dicts into multiple columns of the DataFrame.
1 parent 52450a4 commit 4fb25a2

File tree

9 files changed

+772
-22
lines changed

9 files changed

+772
-22
lines changed

docs/source/api.rst

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -804,14 +804,14 @@ A :class:`neo4j.Result` is attached to an active connection, through a :class:`n
804804

805805
.. automethod:: graph
806806

807-
**This is experimental.** (See :ref:`filter-warnings-ref`)
808-
809807
.. automethod:: value
810808

811809
.. automethod:: values
812810

813811
.. automethod:: data
814812

813+
.. automethod:: to_df
814+
815815
.. automethod:: closed
816816

817817
See https://neo4j.com/docs/python-manual/current/cypher-workflow/#python-driver-type-mapping for more about type mapping.
@@ -987,7 +987,7 @@ Path :class:`neo4j.graph.Path`
987987
Node
988988
====
989989

990-
.. autoclass:: neo4j.graph.Node()
990+
.. autoclass:: neo4j.graph.Node
991991

992992
.. describe:: node == other
993993

@@ -1022,6 +1022,8 @@ Node
10221022

10231023
.. autoattribute:: id
10241024

1025+
.. autoattribute:: element_id
1026+
10251027
.. autoattribute:: labels
10261028

10271029
.. automethod:: get
@@ -1036,7 +1038,7 @@ Node
10361038
Relationship
10371039
============
10381040

1039-
.. autoclass:: neo4j.graph.Relationship()
1041+
.. autoclass:: neo4j.graph.Relationship
10401042

10411043
.. describe:: relationship == other
10421044

@@ -1076,6 +1078,8 @@ Relationship
10761078

10771079
.. autoattribute:: id
10781080

1081+
.. autoattribute:: element_id
1082+
10791083
.. autoattribute:: nodes
10801084

10811085
.. autoattribute:: start_node
@@ -1097,7 +1101,7 @@ Relationship
10971101
Path
10981102
====
10991103

1100-
.. autoclass:: neo4j.graph.Path()
1104+
.. autoclass:: neo4j.graph.Path
11011105

11021106
.. describe:: path == other
11031107

docs/source/async_api.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,14 +511,14 @@ A :class:`neo4j.AsyncResult` is attached to an active connection, through a :cla
511511

512512
.. automethod:: graph
513513

514-
**This is experimental.** (See :ref:`filter-warnings-ref`)
515-
516514
.. automethod:: value
517515

518516
.. automethod:: values
519517

520518
.. automethod:: data
521519

520+
.. automethod:: to_df
521+
522522
.. automethod:: closed
523523

524524
See https://neo4j.com/docs/python-manual/current/cypher-workflow/#python-driver-type-mapping for more about type mapping.

neo4j/_async/work/result.py

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,15 @@
2020
from warnings import warn
2121

2222
from ..._async_compat.util import AsyncUtil
23-
from ...data import DataDehydrator
23+
from ...data import (
24+
DataDehydrator,
25+
RecordTableRowExporter,
26+
)
2427
from ...exceptions import (
2528
ResultConsumedError,
2629
ResultNotSingleError,
2730
)
31+
from ...meta import experimental
2832
from ...work import ResultSummary
2933
from ..io import ConnectionErrorHandler
3034

@@ -455,6 +459,8 @@ async def graph(self):
455459
was obtained has been closed or the Result has been explicitly
456460
consumed.
457461
462+
**This is experimental.** (See :ref:`filter-warnings-ref`)
463+
458464
.. versionchanged:: 5.0
459465
Can raise :exc:`ResultConsumedError`.
460466
"""
@@ -519,6 +525,127 @@ async def data(self, *keys):
519525
"""
520526
return [record.data(*keys) async for record in self]
521527

528+
@experimental("pandas support is experimental and might be changed or "
529+
"removed in future versions")
530+
async def to_df(self, expand=False):
531+
r"""Convert (the rest of) the result to a pandas DataFrame.
532+
533+
This method is only available if the `pandas` library is installed.
534+
535+
::
536+
537+
res = await tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m")
538+
df = await res.to_df()
539+
540+
for instance will return a DataFrame with two columns: ``n`` and ``m``
541+
and 10 rows.
542+
543+
:param expand: if :const:`True`, some structures in the result will be
544+
recursively expanded (flattened out into multiple columns) like so
545+
(everything inside ``<...>`` is a placeholder):
546+
547+
* :class:`.Node` objects under any variable ``<n>`` will be
548+
expanded into columns (the recursion stops here)
549+
550+
* ``<n>().prop.<property_name>`` (any) for each property of the
551+
node.
552+
* ``<n>().element_id`` (str) the node's element id.
553+
See :attr:`.Node.element_id`.
554+
* ``<n>().labels`` (frozenset of str) the node's labels.
555+
See :attr:`.Node.labels`.
556+
557+
* :class:`.Relationship` objects under any variable ``<r>``
558+
will be expanded into columns (the recursion stops here)
559+
560+
* ``<r>->.prop.<property_name>`` (any) for each property of the
561+
relationship.
562+
* ``<r>->.element_id`` (str) the relationship's element id.
563+
See :attr:`.Relationship.element_id`.
564+
* ``<r>->.start.element_id`` (str) the relationship's
565+
start node's element id.
566+
See :attr:`.Relationship.start_node`.
567+
* ``<r>->.end.element_id`` (str) the relationship's
568+
end node's element id.
569+
See :attr:`.Relationship.end_node`.
570+
* ``<r>->.type`` (str) the relationship's type.
571+
See :attr:`.Relationship.type`.
572+
573+
* :const:`list` objects under any variable ``<l>`` will be expanded
574+
into
575+
576+
* ``<l>[].0`` (any) the 1st list element
577+
* ``<l>[].1`` (any) the 2nd list element
578+
* ...
579+
580+
* :const:`dict` objects under any variable ``<d>`` will be expanded
581+
into
582+
583+
* ``<d>{}.<key1>`` (any) the 1st key of the dict
584+
* ``<d>{}.<key2>`` (any) the 2nd key of the dict
585+
* ...
586+
587+
* :const:`list` and :const:`dict` objects are expanded recursively.
588+
Example::
589+
590+
variable x: [{"foo": "bar", "baz": [42, 0]}, "foobar"]
591+
592+
will be expanded to::
593+
594+
{
595+
"x[].0{}.foo": "bar",
596+
"x[].0{}.baz[].0": 42,
597+
"n[].0{}.baz[].1": 0,
598+
"n[].1": "foobar"
599+
}
600+
601+
* Everything else (including :class:`.Path` objects) will not
602+
be flattened.
603+
604+
:const:`dict` keys and variable names that contain ``.`` or ``\``
605+
will be escaped with a backslash (``\.`` and ``\\`` respectively).
606+
:type expand: bool
607+
608+
:rtype: :py:class:`pandas.DataFrame`
609+
:raises ImportError: if `pandas` library is not available.
610+
:raises ResultConsumedError: if the transaction from which this result
611+
was obtained has been closed or the Result has been explicitly
612+
consumed.
613+
614+
**This is experimental.**
615+
``pandas`` support might be changed or removed in future versions
616+
without warning. (See :ref:`filter-warnings-ref`)
617+
"""
618+
import pandas as pd
619+
620+
if not expand:
621+
return pd.DataFrame(await self.values(), columns=self._keys)
622+
else:
623+
df_keys = None
624+
rows = []
625+
async for record in self:
626+
row = RecordTableRowExporter().transform(dict(record.items()))
627+
if df_keys == row.keys():
628+
rows.append(row.values())
629+
elif df_keys is None:
630+
df_keys = row.keys()
631+
rows.append(row.values())
632+
elif df_keys is False:
633+
rows.append(row)
634+
else:
635+
# The rows have different keys. We need to pass a list
636+
# of dicts to pandas
637+
rows = [{k: v for k, v in zip(df_keys, r)} for r in rows]
638+
df_keys = False
639+
rows.append(row)
640+
if df_keys is False:
641+
return pd.DataFrame(rows)
642+
else:
643+
columns = df_keys or [
644+
k.replace(".", "\\.").replace("\\", "\\\\")
645+
for k in self._keys
646+
]
647+
return pd.DataFrame(rows, columns=columns)
648+
522649
def closed(self):
523650
"""Return True if the result has been closed.
524651

neo4j/_sync/work/result.py

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,15 @@
2020
from warnings import warn
2121

2222
from ..._async_compat.util import Util
23-
from ...data import DataDehydrator
23+
from ...data import (
24+
DataDehydrator,
25+
RecordTableRowExporter,
26+
)
2427
from ...exceptions import (
2528
ResultConsumedError,
2629
ResultNotSingleError,
2730
)
31+
from ...meta import experimental
2832
from ...work import ResultSummary
2933
from ..io import ConnectionErrorHandler
3034

@@ -455,6 +459,8 @@ def graph(self):
455459
was obtained has been closed or the Result has been explicitly
456460
consumed.
457461
462+
**This is experimental.** (See :ref:`filter-warnings-ref`)
463+
458464
.. versionchanged:: 5.0
459465
Can raise :exc:`ResultConsumedError`.
460466
"""
@@ -519,6 +525,127 @@ def data(self, *keys):
519525
"""
520526
return [record.data(*keys) for record in self]
521527

528+
@experimental("pandas support is experimental and might be changed or "
529+
"removed in future versions")
530+
def to_df(self, expand=False):
531+
r"""Convert (the rest of) the result to a pandas DataFrame.
532+
533+
This method is only available if the `pandas` library is installed.
534+
535+
::
536+
537+
res = tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m")
538+
df = res.to_df()
539+
540+
for instance will return a DataFrame with two columns: ``n`` and ``m``
541+
and 10 rows.
542+
543+
:param expand: if :const:`True`, some structures in the result will be
544+
recursively expanded (flattened out into multiple columns) like so
545+
(everything inside ``<...>`` is a placeholder):
546+
547+
* :class:`.Node` objects under any variable ``<n>`` will be
548+
expanded into columns (the recursion stops here)
549+
550+
* ``<n>().prop.<property_name>`` (any) for each property of the
551+
node.
552+
* ``<n>().element_id`` (str) the node's element id.
553+
See :attr:`.Node.element_id`.
554+
* ``<n>().labels`` (frozenset of str) the node's labels.
555+
See :attr:`.Node.labels`.
556+
557+
* :class:`.Relationship` objects under any variable ``<r>``
558+
will be expanded into columns (the recursion stops here)
559+
560+
* ``<r>->.prop.<property_name>`` (any) for each property of the
561+
relationship.
562+
* ``<r>->.element_id`` (str) the relationship's element id.
563+
See :attr:`.Relationship.element_id`.
564+
* ``<r>->.start.element_id`` (str) the relationship's
565+
start node's element id.
566+
See :attr:`.Relationship.start_node`.
567+
* ``<r>->.end.element_id`` (str) the relationship's
568+
end node's element id.
569+
See :attr:`.Relationship.end_node`.
570+
* ``<r>->.type`` (str) the relationship's type.
571+
See :attr:`.Relationship.type`.
572+
573+
* :const:`list` objects under any variable ``<l>`` will be expanded
574+
into
575+
576+
* ``<l>[].0`` (any) the 1st list element
577+
* ``<l>[].1`` (any) the 2nd list element
578+
* ...
579+
580+
* :const:`dict` objects under any variable ``<d>`` will be expanded
581+
into
582+
583+
* ``<d>{}.<key1>`` (any) the 1st key of the dict
584+
* ``<d>{}.<key2>`` (any) the 2nd key of the dict
585+
* ...
586+
587+
* :const:`list` and :const:`dict` objects are expanded recursively.
588+
Example::
589+
590+
variable x: [{"foo": "bar", "baz": [42, 0]}, "foobar"]
591+
592+
will be expanded to::
593+
594+
{
595+
"x[].0{}.foo": "bar",
596+
"x[].0{}.baz[].0": 42,
597+
"n[].0{}.baz[].1": 0,
598+
"n[].1": "foobar"
599+
}
600+
601+
* Everything else (including :class:`.Path` objects) will not
602+
be flattened.
603+
604+
:const:`dict` keys and variable names that contain ``.`` or ``\``
605+
will be escaped with a backslash (``\.`` and ``\\`` respectively).
606+
:type expand: bool
607+
608+
:rtype: :py:class:`pandas.DataFrame`
609+
:raises ImportError: if `pandas` library is not available.
610+
:raises ResultConsumedError: if the transaction from which this result
611+
was obtained has been closed or the Result has been explicitly
612+
consumed.
613+
614+
**This is experimental.**
615+
``pandas`` support might be changed or removed in future versions
616+
without warning. (See :ref:`filter-warnings-ref`)
617+
"""
618+
import pandas as pd
619+
620+
if not expand:
621+
return pd.DataFrame(self.values(), columns=self._keys)
622+
else:
623+
df_keys = None
624+
rows = []
625+
for record in self:
626+
row = RecordTableRowExporter().transform(dict(record.items()))
627+
if df_keys == row.keys():
628+
rows.append(row.values())
629+
elif df_keys is None:
630+
df_keys = row.keys()
631+
rows.append(row.values())
632+
elif df_keys is False:
633+
rows.append(row)
634+
else:
635+
# The rows have different keys. We need to pass a list
636+
# of dicts to pandas
637+
rows = [{k: v for k, v in zip(df_keys, r)} for r in rows]
638+
df_keys = False
639+
rows.append(row)
640+
if df_keys is False:
641+
return pd.DataFrame(rows)
642+
else:
643+
columns = df_keys or [
644+
k.replace(".", "\\.").replace("\\", "\\\\")
645+
for k in self._keys
646+
]
647+
return pd.DataFrame(rows, columns=columns)
648+
522649
def closed(self):
523650
"""Return True if the result has been closed.
524651

0 commit comments

Comments
 (0)