|
| 1 | +asvector(x::AbstractVector) = x |
| 2 | +asvector(x) = collect(x) |
| 3 | + |
| 4 | +""" |
| 5 | + pytable(src, format=:pandas; ...) |
| 6 | +
|
| 7 | +Construct a Python table from the Tables.jl-compatible table `src`. |
| 8 | +
|
| 9 | +The `format` controls the type of the resulting table, and is one of: |
| 10 | +- `:pandas`: A `pandas.DataFrame`. Keyword arguments are passed to the `DataFrame` constructor. |
| 11 | +- `:columns`: A `dict` mapping column names to columns. |
| 12 | +- `:rows`: A `list` of rows, which are `namedtuple`s. |
| 13 | +- `:rowdicts`: A `list` of rows, which are `dict`s. |
| 14 | +""" |
| 15 | +function pytable(src, format=:pandas; opts...) |
| 16 | + format = Symbol(format) |
| 17 | + if format == :pandas |
| 18 | + _pytable_pandas(src; opts...) |
| 19 | + elseif format == :columns |
| 20 | + _pytable_columns(src; opts...) |
| 21 | + elseif format == :rows |
| 22 | + _pytable_rows(src; opts...) |
| 23 | + elseif format == :rowdicts |
| 24 | + _pytable_rowdicts(src; opts...) |
| 25 | + else |
| 26 | + error("invalid format") |
| 27 | + end |
| 28 | +end |
| 29 | +export pytable |
| 30 | + |
| 31 | +function _pytable_columns(src, cols=Tables.columns(src)) |
| 32 | + pydict( |
| 33 | + pystr(String(n)) => asvector(Tables.getcolumn(cols, n)) |
| 34 | + for n in Tables.columnnames(cols) |
| 35 | + ) |
| 36 | +end |
| 37 | + |
| 38 | +function _pytable_rows(src, rows=Tables.rows(src)) |
| 39 | + names = Tables.columnnames(rows) |
| 40 | + t = pyimport("collections"=>"namedtuple")("Row", pylist(pystr(string(n)) for n in names)) |
| 41 | + pylist( |
| 42 | + t(map(n->Tables.getcolumn(row, n), names)...) |
| 43 | + for row in rows |
| 44 | + ) |
| 45 | +end |
| 46 | + |
| 47 | +function _pytable_rowdicts(src, rows=Tables.rows(src)) |
| 48 | + names = Tables.columnnames(rows) |
| 49 | + pynames = [pystr(string(n)) for n in names] |
| 50 | + pylist( |
| 51 | + pydict( |
| 52 | + p => Tables.getcolumn(row, n) |
| 53 | + for (n,p) in zip(names, pynames) |
| 54 | + ) |
| 55 | + for row in rows |
| 56 | + ) |
| 57 | +end |
| 58 | + |
| 59 | +aspandasvector(x) = asvector(x) |
| 60 | + |
| 61 | +function _pytable_pandas(src, cols=Tables.columns(src); opts...) |
| 62 | + pyimport("pandas").DataFrame( |
| 63 | + pydict( |
| 64 | + pystr(string(n)) => aspandasvector(Tables.getcolumn(cols, n)) |
| 65 | + for n in Tables.columnnames(cols) |
| 66 | + ); |
| 67 | + opts... |
| 68 | + ) |
| 69 | +end |
| 70 | + |
| 71 | +function init_tables() |
| 72 | + @require CategoricalArrays="324d7699-5711-5eae-9e2f-1d82baa6b597" @eval begin |
| 73 | + aspandasvector(x::CategoricalArrays.CategoricalArray) = begin |
| 74 | + codes = map(x -> x===missing ? -1 : Int(CategoricalArrays.levelcode(x))-1, x) |
| 75 | + cats = CategoricalArrays.levels(x) |
| 76 | + ordered = x.pool.ordered |
| 77 | + pypandasmodule().Categorical.from_codes(codes, cats, ordered=ordered) |
| 78 | + end |
| 79 | + end |
| 80 | +end |
0 commit comments