Skip to content

Commit ec33e56

Browse files
author
Christopher Doris
committed
adds pytable
1 parent bfe7166 commit ec33e56

File tree

3 files changed

+83
-72
lines changed

3 files changed

+83
-72
lines changed

src/PythonCall.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module PythonCall
22

3+
using Tables: rows
34
module External
45
import Conda
56
end
@@ -78,6 +79,7 @@ include("compat/serialization.jl")
7879
include("compat/gui.jl")
7980
include("compat/matplotlib.jl")
8081
include("compat/ipython.jl")
82+
include("compat/tables.jl")
8183

8284
function __init__()
8385
C.with_gil() do
@@ -105,6 +107,7 @@ function __init__()
105107
init_gui()
106108
init_matplotlib()
107109
init_ipython()
110+
init_tables()
108111
end
109112
end
110113

src/compat/tables.jl

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
asvector(x::AbstractVector) = x
2+
asvector(x) = collect(x)
3+
4+
"""
5+
pytable(src, format=:pandas; ...)
6+
7+
Construct a Python table from the Tables.jl-compatible table `src`.
8+
9+
The `format` controls the type of the resulting table, and is one of:
10+
- `:pandas`: A `pandas.DataFrame`. Keyword arguments are passed to the `DataFrame` constructor.
11+
- `:columns`: A `dict` mapping column names to columns.
12+
- `:rows`: A `list` of rows, which are `namedtuple`s.
13+
- `:rowdicts`: A `list` of rows, which are `dict`s.
14+
"""
15+
function pytable(src, format=:pandas; opts...)
16+
format = Symbol(format)
17+
if format == :pandas
18+
_pytable_pandas(src; opts...)
19+
elseif format == :columns
20+
_pytable_columns(src; opts...)
21+
elseif format == :rows
22+
_pytable_rows(src; opts...)
23+
elseif format == :rowdicts
24+
_pytable_rowdicts(src; opts...)
25+
else
26+
error("invalid format")
27+
end
28+
end
29+
export pytable
30+
31+
function _pytable_columns(src, cols=Tables.columns(src))
32+
pydict(
33+
pystr(String(n)) => asvector(Tables.getcolumn(cols, n))
34+
for n in Tables.columnnames(cols)
35+
)
36+
end
37+
38+
function _pytable_rows(src, rows=Tables.rows(src))
39+
names = Tables.columnnames(rows)
40+
t = pyimport("collections"=>"namedtuple")("Row", pylist(pystr(string(n)) for n in names))
41+
pylist(
42+
t(map(n->Tables.getcolumn(row, n), names)...)
43+
for row in rows
44+
)
45+
end
46+
47+
function _pytable_rowdicts(src, rows=Tables.rows(src))
48+
names = Tables.columnnames(rows)
49+
pynames = [pystr(string(n)) for n in names]
50+
pylist(
51+
pydict(
52+
p => Tables.getcolumn(row, n)
53+
for (n,p) in zip(names, pynames)
54+
)
55+
for row in rows
56+
)
57+
end
58+
59+
aspandasvector(x) = asvector(x)
60+
61+
function _pytable_pandas(src, cols=Tables.columns(src); opts...)
62+
pyimport("pandas").DataFrame(
63+
pydict(
64+
pystr(string(n)) => aspandasvector(Tables.getcolumn(cols, n))
65+
for n in Tables.columnnames(cols)
66+
);
67+
opts...
68+
)
69+
end
70+
71+
function init_tables()
72+
@require CategoricalArrays="324d7699-5711-5eae-9e2f-1d82baa6b597" @eval begin
73+
aspandasvector(x::CategoricalArrays.CategoricalArray) = begin
74+
codes = map(x -> x===missing ? -1 : Int(CategoricalArrays.levelcode(x))-1, x)
75+
cats = CategoricalArrays.levels(x)
76+
ordered = x.pool.ordered
77+
pypandasmodule().Categorical.from_codes(codes, cats, ordered=ordered)
78+
end
79+
end
80+
end

src/old/PyPandasDataFrame.jl

Lines changed: 0 additions & 72 deletions
This file was deleted.

0 commit comments

Comments
 (0)